]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge ath-next from git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
authorKalle Valo <kvalo@codeaurora.org>
Tue, 27 Mar 2018 07:06:18 +0000 (10:06 +0300)
committerKalle Valo <kvalo@codeaurora.org>
Tue, 27 Mar 2018 07:06:18 +0000 (10:06 +0300)
ath.git patches for 4.17. Major changes:

wil6210

* support multiple virtual interfaces

1910 files changed:
Documentation/ABI/testing/sysfs-ata
Documentation/ABI/testing/sysfs-block-device [new file with mode: 0644]
Documentation/ABI/testing/sysfs-class-scsi_host
Documentation/PCI/pci.txt
Documentation/accelerators/ocxl.rst
Documentation/bpf/bpf_devel_QA.txt
Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt [moved from Documentation/devicetree/bindings/misc/arm-charlcd.txt with 100% similarity]
Documentation/devicetree/bindings/display/exynos/exynos_hdmi.txt
Documentation/devicetree/bindings/dma/mv-xor-v2.txt
Documentation/devicetree/bindings/eeprom/at24.txt
Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
Documentation/devicetree/bindings/net/dsa/marvell.txt
Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/macb.txt
Documentation/devicetree/bindings/net/renesas,ravb.txt
Documentation/devicetree/bindings/net/sff,sfp.txt
Documentation/devicetree/bindings/power/wakeup-source.txt
Documentation/devicetree/bindings/thermal/imx-thermal.txt
Documentation/devicetree/bindings/usb/dwc2.txt
Documentation/devicetree/bindings/usb/renesas_usb3.txt
Documentation/devicetree/bindings/usb/renesas_usbhs.txt
Documentation/devicetree/bindings/usb/usb-xhci.txt
Documentation/ia64/serial.txt
Documentation/media/dmx.h.rst.exceptions
Documentation/media/uapi/dvb/dmx-qbuf.rst
Documentation/networking/ip-sysctl.txt
Documentation/networking/msg_zerocopy.rst
Documentation/networking/net_dim.txt [new file with mode: 0644]
Documentation/networking/packet_mmap.txt
Documentation/networking/segmentation-offloads.txt
Documentation/networking/tls.txt
Documentation/sphinx/kerneldoc.py
Documentation/sysctl/net.txt
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/cpuid.txt
Documentation/virtual/kvm/msr.txt
Documentation/x86/intel_rdt_ui.txt
Documentation/x86/topology.txt
MAINTAINERS
Makefile
arch/alpha/include/asm/cmpxchg.h
arch/alpha/include/asm/xchg.h
arch/arc/Kconfig
arch/arc/boot/dts/axs101.dts
arch/arc/boot/dts/axs10x_mb.dtsi
arch/arc/boot/dts/haps_hs_idu.dts
arch/arc/boot/dts/nsim_700.dts
arch/arc/boot/dts/nsim_hs.dts
arch/arc/boot/dts/nsim_hs_idu.dts
arch/arc/boot/dts/nsimosci.dts
arch/arc/boot/dts/nsimosci_hs.dts
arch/arc/boot/dts/nsimosci_hs_idu.dts
arch/arc/include/asm/entry-arcv2.h
arch/arc/kernel/mcip.c
arch/arc/kernel/setup.c
arch/arc/kernel/smp.c
arch/arc/kernel/unwind.c
arch/arc/mm/cache.c
arch/arm/boot/dts/armada-370-rd.dts
arch/arm/boot/dts/bcm11351.dtsi
arch/arm/boot/dts/bcm21664.dtsi
arch/arm/boot/dts/bcm2835.dtsi
arch/arm/boot/dts/bcm2836.dtsi
arch/arm/boot/dts/bcm2837.dtsi
arch/arm/boot/dts/bcm283x.dtsi
arch/arm/boot/dts/bcm958625hr.dts
arch/arm/boot/dts/gemini-dlink-dns-313.dts
arch/arm/boot/dts/imx6dl-icore-rqs.dts
arch/arm/boot/dts/logicpd-som-lv.dtsi
arch/arm/boot/dts/logicpd-torpedo-som.dtsi
arch/arm/boot/dts/omap5-uevm.dts
arch/arm/boot/dts/rk3036.dtsi
arch/arm/boot/dts/rk322x.dtsi
arch/arm/boot/dts/rk3288-phycore-som.dtsi
arch/arm/boot/dts/zx296702.dtsi
arch/arm/configs/omap2plus_defconfig
arch/arm/kernel/time.c
arch/arm/kvm/hyp/Makefile
arch/arm/kvm/hyp/banked-sr.c
arch/arm/mach-clps711x/board-dt.c
arch/arm/mach-davinci/board-dm355-evm.c
arch/arm/mach-davinci/board-dm355-leopard.c
arch/arm/mach-davinci/board-dm365-evm.c
arch/arm/mach-mvebu/Kconfig
arch/arm/mach-omap1/clock.c
arch/arm/mach-omap2/omap-wakeupgen.c
arch/arm/mach-omap2/omap_hwmod.c
arch/arm/mach-omap2/pm.c
arch/arm/mach-omap2/timer.c
arch/arm/mach-orion5x/Kconfig
arch/arm/mach-orion5x/dns323-setup.c
arch/arm/mach-orion5x/tsx09-common.c
arch/arm/plat-orion/common.c
arch/arm64/boot/dts/amlogic/meson-axg.dtsi
arch/arm64/boot/dts/amlogic/meson-gx.dtsi
arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi
arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
arch/arm64/boot/dts/mediatek/mt8173.dtsi
arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
arch/arm64/boot/dts/qcom/msm8996.dtsi
arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
arch/arm64/boot/dts/rockchip/rk3328.dtsi
arch/arm64/boot/dts/rockchip/rk3368.dtsi
arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi
arch/arm64/boot/dts/rockchip/rk3399.dtsi
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/ptrace.c
arch/arm64/kvm/guest.c
arch/arm64/mm/mmu.c
arch/h8300/include/asm/byteorder.h
arch/ia64/include/asm/atomic.h
arch/ia64/kernel/err_inject.c
arch/ia64/scripts/unwcheck.py
arch/m68k/mac/config.c
arch/microblaze/Kconfig
arch/microblaze/Kconfig.platform
arch/microblaze/include/asm/setup.h
arch/microblaze/lib/fastcopy.S
arch/microblaze/mm/init.c
arch/mips/ath25/board.c
arch/mips/cavium-octeon/octeon-irq.c
arch/mips/kernel/smp-bmips.c
arch/mips/loongson64/Kconfig
arch/parisc/include/asm/cacheflush.h
arch/parisc/include/asm/processor.h
arch/parisc/kernel/cache.c
arch/parisc/kernel/head.S
arch/parisc/kernel/pacache.S
arch/parisc/kernel/smp.c
arch/parisc/kernel/time.c
arch/parisc/mm/init.c
arch/powerpc/boot/Makefile
arch/powerpc/include/asm/firmware.h
arch/powerpc/kernel/eeh_driver.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/drmem.c
arch/powerpc/net/bpf_jit_comp.c
arch/powerpc/net/bpf_jit_comp64.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/setup.c
arch/powerpc/platforms/pseries/setup.c
arch/riscv/include/asm/barrier.h
arch/s390/include/asm/mmu_context.h
arch/s390/kernel/entry.S
arch/s390/kernel/nospec-branch.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/s390/kvm/vsie.c
arch/sh/boot/dts/Makefile
arch/sparc/mm/tlb.c
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/boot/compressed/eboot.c
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/vsyscall/vsyscall_64.c
arch/x86/events/intel/uncore_snbep.c
arch/x86/ia32/sys_ia32.c
arch/x86/include/asm/apm.h
arch/x86/include/asm/asm-prototypes.h
arch/x86/include/asm/bitops.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/efi.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/microcode.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_32.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/refcount.h
arch/x86/include/asm/rmwcc.h
arch/x86/include/asm/sections.h
arch/x86/include/asm/sys_ia32.h
arch/x86/include/asm/vmx.h
arch/x86/include/uapi/asm/hyperv.h
arch/x86/include/uapi/asm/kvm_para.h
arch/x86/include/uapi/asm/mce.h
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/microcode/core.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/head_64.S
arch/x86/kernel/ioport.c
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/kvm.c
arch/x86/kernel/setup.c
arch/x86/kernel/setup_percpu.c
arch/x86/kernel/signal_compat.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/unwind_orc.c
arch/x86/kernel/vm86_32.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/Makefile
arch/x86/lib/retpoline.S
arch/x86/mm/cpu_entry_area.c
arch/x86/mm/fault.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/mem_encrypt_boot.S
arch/x86/mm/pgtable.c
arch/x86/mm/pti.c
arch/x86/net/bpf_jit_comp.c
arch/x86/oprofile/nmi_int.c
arch/x86/platform/intel-mid/intel-mid.c
arch/x86/realmode/rm/trampoline_64.S
arch/x86/xen/enlighten_pv.c
arch/x86/xen/suspend.c
arch/xtensa/kernel/pci-dma.c
arch/xtensa/mm/init.c
block/blk-cgroup.c
block/blk-core.c
block/blk-mq.c
block/genhd.c
block/ioctl.c
block/kyber-iosched.c
block/mq-deadline.c
block/partition-generic.c
block/sed-opal.c
drivers/acpi/acpi_watchdog.c
drivers/acpi/battery.c
drivers/acpi/nfit/core.c
drivers/acpi/numa.c
drivers/ata/ahci.c
drivers/ata/libahci.c
drivers/ata/libahci_platform.c
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/libata-scsi.c
drivers/ata/sata_rcar.c
drivers/auxdisplay/img-ascii-lcd.c
drivers/auxdisplay/panel.c
drivers/bcma/Kconfig
drivers/bcma/driver_chipcommon_pmu.c
drivers/bcma/host_pci.c
drivers/block/amiflop.c
drivers/block/ataflop.c
drivers/block/brd.c
drivers/block/floppy.c
drivers/block/loop.c
drivers/block/nbd.c
drivers/block/pktcdvd.c
drivers/block/swim.c
drivers/block/xen-blkfront.c
drivers/block/z2ram.c
drivers/bluetooth/Kconfig
drivers/bluetooth/Makefile
drivers/bluetooth/btrsi.c [new file with mode: 0644]
drivers/bluetooth/btusb.c
drivers/bluetooth/hci_bcm.c
drivers/bus/ti-sysc.c
drivers/char/tpm/st33zp24/st33zp24.c
drivers/char/tpm/tpm-interface.c
drivers/char/tpm/tpm2-cmd.c
drivers/char/tpm/tpm_i2c_infineon.c
drivers/char/tpm/tpm_i2c_nuvoton.c
drivers/char/tpm/tpm_tis_core.c
drivers/clk/bcm/clk-bcm2835.c
drivers/clk/clk-aspeed.c
drivers/clk/clk.c
drivers/clk/hisilicon/clk-hi3660-stub.c
drivers/clk/imx/clk-imx51-imx53.c
drivers/clk/qcom/apcs-msm8916.c
drivers/clk/sunxi-ng/ccu-sun6i-a31.c
drivers/clk/ti/clk-33xx.c
drivers/clk/ti/clk-43xx.c
drivers/clk/ti/clkctrl.c
drivers/clocksource/Kconfig
drivers/clocksource/arc_timer.c
drivers/clocksource/fsl_ftm_timer.c
drivers/clocksource/mips-gic-timer.c
drivers/clocksource/timer-sun5i.c
drivers/cpufreq/Kconfig.arm
drivers/cpufreq/s3c24xx-cpufreq.c
drivers/cpufreq/scpi-cpufreq.c
drivers/crypto/ccp/psp-dev.c
drivers/dax/super.c
drivers/dma/mv_xor_v2.c
drivers/dma/sh/rcar-dmac.c
drivers/edac/sb_edac.c
drivers/firmware/dcdbas.c
drivers/firmware/efi/libstub/tpm.c
drivers/gpio/gpio-rcar.c
drivers/gpio/gpiolib-of.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
drivers/gpu/drm/amd/amdgpu/si.c
drivers/gpu/drm/amd/amdgpu/si_dpm.c
drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
drivers/gpu/drm/amd/display/dc/core/dc_stream.c
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dc_stream.h
drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
drivers/gpu/drm/amd/display/include/signal_types.h
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
drivers/gpu/drm/ast/ast_tables.h
drivers/gpu/drm/drm_framebuffer.c
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/mmio_context.c
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/gvt/scheduler.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_request.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_sysfs.c
drivers/gpu/drm/i915/intel_audio.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_hangcheck.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/imx/ipuv3-crtc.c
drivers/gpu/drm/imx/ipuv3-plane.c
drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
drivers/gpu/drm/nouveau/nouveau_backlight.c
drivers/gpu/drm/nouveau/nv50_display.c
drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/radeon_pm.c
drivers/gpu/drm/scheduler/gpu_scheduler.c
drivers/gpu/drm/sun4i/sun4i_crtc.c
drivers/gpu/drm/sun4i/sun4i_dotclock.c
drivers/gpu/drm/sun4i/sun4i_drv.c
drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
drivers/gpu/drm/sun4i/sun4i_rgb.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/gpu/drm/sun4i/sun4i_tcon.h
drivers/gpu/drm/tegra/dc.c
drivers/gpu/drm/tegra/drm.c
drivers/gpu/drm/tegra/dsi.c
drivers/gpu/drm/tegra/plane.c
drivers/gpu/drm/udl/udl_fb.c
drivers/gpu/drm/virtio/virtgpu_ioctl.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
drivers/gpu/ipu-v3/ipu-prg.c
drivers/i2c/busses/i2c-octeon-core.c
drivers/i2c/busses/i2c-octeon-core.h
drivers/ide/ide-probe.c
drivers/infiniband/core/addr.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/cq.c
drivers/infiniband/core/device.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/ucma.c
drivers/infiniband/hw/bnxt_re/bnxt_re.h
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/bnxt_re/ib_verbs.h
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/bnxt_re/qplib_fp.c
drivers/infiniband/hw/bnxt_re/qplib_fp.h
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
drivers/infiniband/hw/bnxt_re/qplib_sp.c
drivers/infiniband/hw/bnxt_re/roce_hsi.h
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx5/Makefile
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/ib_rep.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/ib_rep.h [new file with mode: 0644]
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/qedr/qedr_iw_cm.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/sw/rdmavt/mr.c
drivers/input/keyboard/matrix_keypad.c
drivers/input/mouse/synaptics.c
drivers/input/touchscreen/mms114.c
drivers/iommu/intel-svm.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-imx-gpcv2.c
drivers/md/bcache/request.c
drivers/md/bcache/super.c
drivers/md/dm-bufio.c
drivers/md/dm-mpath.c
drivers/md/dm-raid.c
drivers/md/dm-table.c
drivers/md/dm.c
drivers/md/md-multipath.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid1.c
drivers/md/raid1.h
drivers/md/raid10.c
drivers/md/raid10.h
drivers/md/raid5-log.h
drivers/md/raid5-ppl.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/media/Kconfig
drivers/media/common/videobuf2/Kconfig
drivers/media/common/videobuf2/Makefile
drivers/media/common/videobuf2/vb2-trace.c [moved from drivers/media/v4l2-core/vb2-trace.c with 100% similarity]
drivers/media/dvb-core/Makefile
drivers/media/dvb-core/dmxdev.c
drivers/media/dvb-core/dvb_demux.c
drivers/media/dvb-core/dvb_net.c
drivers/media/dvb-core/dvb_vb2.c
drivers/media/dvb-frontends/m88ds3103.c
drivers/media/i2c/tvp5150.c
drivers/media/pci/ttpci/av7110.c
drivers/media/pci/ttpci/av7110_av.c
drivers/media/usb/au0828/Kconfig
drivers/media/usb/ttusb-dec/ttusb_dec.c
drivers/media/v4l2-core/Kconfig
drivers/media/v4l2-core/Makefile
drivers/memory/brcmstb_dpfe.c
drivers/misc/ocxl/file.c
drivers/mmc/core/block.c
drivers/mmc/core/card.h
drivers/mmc/core/mmc_ops.c
drivers/mmc/core/quirks.h
drivers/mmc/host/dw_mmc-exynos.c
drivers/mmc/host/dw_mmc-k3.c
drivers/mmc/host/dw_mmc-rockchip.c
drivers/mmc/host/dw_mmc-zx.c
drivers/mmc/host/dw_mmc.c
drivers/mmc/host/dw_mmc.h
drivers/mmc/host/sdhci-acpi.c
drivers/mmc/host/sdhci-pci-core.c
drivers/net/Kconfig
drivers/net/Space.c
drivers/net/bonding/bond_main.c
drivers/net/can/cc770/cc770.c
drivers/net/can/cc770/cc770.h
drivers/net/can/ifi_canfd/ifi_canfd.c
drivers/net/can/m_can/m_can.c
drivers/net/can/peak_canfd/peak_canfd.c
drivers/net/can/peak_canfd/peak_pciefd_main.c
drivers/net/dsa/Makefile
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/dsa_loop.c
drivers/net/dsa/lan9303-core.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/dsa/mv88e6xxx/global2.c
drivers/net/dsa/mv88e6xxx/global2.h
drivers/net/dsa/mv88e6xxx/serdes.c
drivers/net/dsa/mv88e6xxx/serdes.h
drivers/net/dsa/qca8k.c
drivers/net/ethernet/8390/Kconfig
drivers/net/ethernet/amd/amd8111e.c
drivers/net/ethernet/apple/macmace.c
drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
drivers/net/ethernet/aquantia/atlantic/aq_nic.h
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.h
drivers/net/ethernet/aquantia/atlantic/aq_vec.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
drivers/net/ethernet/aquantia/atlantic/ver.h
drivers/net/ethernet/arc/emac_rockchip.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bcmsysport.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/genet/bcmgenet.h
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cavium/liquidio/lio_core.c
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/liquidio/liquidio_common.h
drivers/net/ethernet/cavium/liquidio/octeon_device.h
drivers/net/ethernet/cavium/liquidio/octeon_droq.c
drivers/net/ethernet/cavium/liquidio/octeon_droq.h
drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
drivers/net/ethernet/cavium/liquidio/octeon_network.h
drivers/net/ethernet/cavium/liquidio/request_manager.c
drivers/net/ethernet/cavium/liquidio/response_manager.c
drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
drivers/net/ethernet/chelsio/cxgb4/Makefile
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
drivers/net/ethernet/chelsio/cxgb4/sched.h
drivers/net/ethernet/chelsio/cxgb4/srq.c [new file with mode: 0644]
drivers/net/ethernet/chelsio/cxgb4/srq.h [new file with mode: 0644]
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
drivers/net/ethernet/cirrus/mac89x0.c
drivers/net/ethernet/cisco/enic/enic.h
drivers/net/ethernet/cisco/enic/enic_ethtool.c
drivers/net/ethernet/cisco/enic/enic_main.c
drivers/net/ethernet/cisco/enic/vnic_dev.c
drivers/net/ethernet/cisco/enic/vnic_dev.h
drivers/net/ethernet/cisco/enic/vnic_devcmd.h
drivers/net/ethernet/cisco/enic/vnic_nic.h
drivers/net/ethernet/cortina/gemini.c
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_cmds.h
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fman/fman_dtsec.c
drivers/net/ethernet/freescale/fman/fman_dtsec.h
drivers/net/ethernet/freescale/fman/fman_memac.c
drivers/net/ethernet/freescale/fman/fman_memac.h
drivers/net/ethernet/freescale/fman/fman_tgec.c
drivers/net/ethernet/freescale/fman/fman_tgec.h
drivers/net/ethernet/freescale/fman/mac.c
drivers/net/ethernet/freescale/fman/mac.h
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/intel/e100.c
drivers/net/ethernet/intel/e1000/Makefile
drivers/net/ethernet/intel/e1000/e1000.h
drivers/net/ethernet/intel/e1000/e1000_ethtool.c
drivers/net/ethernet/intel/e1000/e1000_hw.c
drivers/net/ethernet/intel/e1000/e1000_hw.h
drivers/net/ethernet/intel/e1000/e1000_main.c
drivers/net/ethernet/intel/e1000/e1000_osdep.h
drivers/net/ethernet/intel/e1000/e1000_param.c
drivers/net/ethernet/intel/e1000e/80003es2lan.c
drivers/net/ethernet/intel/e1000e/80003es2lan.h
drivers/net/ethernet/intel/e1000e/82571.c
drivers/net/ethernet/intel/e1000e/82571.h
drivers/net/ethernet/intel/e1000e/Makefile
drivers/net/ethernet/intel/e1000e/defines.h
drivers/net/ethernet/intel/e1000e/e1000.h
drivers/net/ethernet/intel/e1000e/ethtool.c
drivers/net/ethernet/intel/e1000e/hw.h
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/e1000e/ich8lan.h
drivers/net/ethernet/intel/e1000e/mac.c
drivers/net/ethernet/intel/e1000e/mac.h
drivers/net/ethernet/intel/e1000e/manage.c
drivers/net/ethernet/intel/e1000e/manage.h
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/e1000e/nvm.c
drivers/net/ethernet/intel/e1000e/nvm.h
drivers/net/ethernet/intel/e1000e/param.c
drivers/net/ethernet/intel/e1000e/phy.c
drivers/net/ethernet/intel/e1000e/phy.h
drivers/net/ethernet/intel/e1000e/ptp.c
drivers/net/ethernet/intel/e1000e/regs.h
drivers/net/ethernet/intel/fm10k/Makefile
drivers/net/ethernet/intel/fm10k/fm10k.h
drivers/net/ethernet/intel/fm10k/fm10k_common.c
drivers/net/ethernet/intel/fm10k/fm10k_common.h
drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
drivers/net/ethernet/intel/fm10k/fm10k_iov.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
drivers/net/ethernet/intel/fm10k/fm10k_pci.c
drivers/net/ethernet/intel/fm10k/fm10k_pf.c
drivers/net/ethernet/intel/fm10k/fm10k_pf.h
drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
drivers/net/ethernet/intel/fm10k/fm10k_type.h
drivers/net/ethernet/intel/fm10k/fm10k_vf.c
drivers/net/ethernet/intel/fm10k/fm10k_vf.h
drivers/net/ethernet/intel/i40e/Makefile
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_adminq.c
drivers/net/ethernet/intel/i40e/i40e_adminq.h
drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40e/i40e_alloc.h
drivers/net/ethernet/intel/i40e/i40e_client.c
drivers/net/ethernet/intel/i40e/i40e_client.h
drivers/net/ethernet/intel/i40e/i40e_common.c
drivers/net/ethernet/intel/i40e/i40e_dcb.c
drivers/net/ethernet/intel/i40e/i40e_dcb.h
drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
drivers/net/ethernet/intel/i40e/i40e_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_devids.h
drivers/net/ethernet/intel/i40e/i40e_diag.c
drivers/net/ethernet/intel/i40e/i40e_diag.h
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_fcoe.c [deleted file]
drivers/net/ethernet/intel/i40e/i40e_fcoe.h [deleted file]
drivers/net/ethernet/intel/i40e/i40e_hmc.c
drivers/net/ethernet/intel/i40e/i40e_hmc.h
drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_nvm.c
drivers/net/ethernet/intel/i40e/i40e_osdep.h
drivers/net/ethernet/intel/i40e/i40e_prototype.h
drivers/net/ethernet/intel/i40e/i40e_ptp.c
drivers/net/ethernet/intel/i40e/i40e_register.h
drivers/net/ethernet/intel/i40e/i40e_status.h
drivers/net/ethernet/intel/i40e/i40e_trace.h
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/i40e/i40e_type.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
drivers/net/ethernet/intel/i40evf/Makefile
drivers/net/ethernet/intel/i40evf/i40e_adminq.c
drivers/net/ethernet/intel/i40evf/i40e_adminq.h
drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40evf/i40e_alloc.h
drivers/net/ethernet/intel/i40evf/i40e_common.c
drivers/net/ethernet/intel/i40evf/i40e_devids.h
drivers/net/ethernet/intel/i40evf/i40e_hmc.h
drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
drivers/net/ethernet/intel/i40evf/i40e_osdep.h
drivers/net/ethernet/intel/i40evf/i40e_prototype.h
drivers/net/ethernet/intel/i40evf/i40e_register.h
drivers/net/ethernet/intel/i40evf/i40e_status.h
drivers/net/ethernet/intel/i40evf/i40e_trace.h
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.h
drivers/net/ethernet/intel/i40evf/i40e_type.h
drivers/net/ethernet/intel/i40evf/i40evf.h
drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
drivers/net/ethernet/intel/igb/Makefile
drivers/net/ethernet/intel/igb/e1000_82575.c
drivers/net/ethernet/intel/igb/e1000_82575.h
drivers/net/ethernet/intel/igb/e1000_defines.h
drivers/net/ethernet/intel/igb/e1000_hw.h
drivers/net/ethernet/intel/igb/e1000_i210.c
drivers/net/ethernet/intel/igb/e1000_i210.h
drivers/net/ethernet/intel/igb/e1000_mac.c
drivers/net/ethernet/intel/igb/e1000_mac.h
drivers/net/ethernet/intel/igb/e1000_mbx.c
drivers/net/ethernet/intel/igb/e1000_mbx.h
drivers/net/ethernet/intel/igb/e1000_nvm.c
drivers/net/ethernet/intel/igb/e1000_nvm.h
drivers/net/ethernet/intel/igb/e1000_phy.c
drivers/net/ethernet/intel/igb/e1000_phy.h
drivers/net/ethernet/intel/igb/e1000_regs.h
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_ethtool.c
drivers/net/ethernet/intel/igb/igb_hwmon.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/igbvf/Makefile
drivers/net/ethernet/intel/igbvf/defines.h
drivers/net/ethernet/intel/igbvf/ethtool.c
drivers/net/ethernet/intel/igbvf/igbvf.h
drivers/net/ethernet/intel/igbvf/mbx.c
drivers/net/ethernet/intel/igbvf/mbx.h
drivers/net/ethernet/intel/igbvf/netdev.c
drivers/net/ethernet/intel/igbvf/regs.h
drivers/net/ethernet/intel/igbvf/vf.c
drivers/net/ethernet/intel/igbvf/vf.h
drivers/net/ethernet/intel/ixgb/Makefile
drivers/net/ethernet/intel/ixgb/ixgb.h
drivers/net/ethernet/intel/ixgb/ixgb_ee.h
drivers/net/ethernet/intel/ixgb/ixgb_hw.h
drivers/net/ethernet/intel/ixgb/ixgb_ids.h
drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
drivers/net/ethernet/intel/ixgbe/Makefile
drivers/net/ethernet/intel/ixgbe/ixgbe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
drivers/net/ethernet/intel/ixgbevf/Makefile
drivers/net/ethernet/intel/ixgbevf/defines.h
drivers/net/ethernet/intel/ixgbevf/ethtool.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/intel/ixgbevf/mbx.h
drivers/net/ethernet/intel/ixgbevf/regs.h
drivers/net/ethernet/intel/ixgbevf/vf.h
drivers/net/ethernet/marvell/mvpp2.c
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_port.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
drivers/net/ethernet/mellanox/mlx5/core/cq.c
drivers/net/ethernet/mellanox/mlx5/core/dev.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlxsw/Kconfig
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/mellanox/mlxsw/switchx2.c
drivers/net/ethernet/microchip/Kconfig
drivers/net/ethernet/microchip/Makefile
drivers/net/ethernet/microchip/lan743x_main.c [new file with mode: 0644]
drivers/net/ethernet/microchip/lan743x_main.h [new file with mode: 0644]
drivers/net/ethernet/natsemi/Kconfig
drivers/net/ethernet/natsemi/Makefile
drivers/net/ethernet/natsemi/jazzsonic.c
drivers/net/ethernet/natsemi/macsonic.c
drivers/net/ethernet/natsemi/sonic.c
drivers/net/ethernet/natsemi/sonic.h
drivers/net/ethernet/natsemi/xtsonic.c
drivers/net/ethernet/qlogic/qed/qed_cxt.c
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_iwarp.c
drivers/net/ethernet/qlogic/qed/qed_rdma.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qlogic/qede/qede_ptp.c
drivers/net/ethernet/qualcomm/emac/emac-mac.c
drivers/net/ethernet/qualcomm/qca_spi.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/renesas/sh_eth.h
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/efx.h
drivers/net/ethernet/sfc/ethtool.c
drivers/net/ethernet/sfc/falcon/enum.h
drivers/net/ethernet/sfc/farch.c
drivers/net/ethernet/sfc/filter.h
drivers/net/ethernet/sfc/mcdi_pcol.h
drivers/net/ethernet/sfc/mcdi_port.c
drivers/net/ethernet/sfc/net_driver.h
drivers/net/ethernet/sfc/nic.h
drivers/net/ethernet/sfc/siena.c
drivers/net/ethernet/smsc/smsc911x.c
drivers/net/ethernet/socionext/sni_ave.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/sun/sunvnet.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/ethernet/ti/davinci_cpdma.h
drivers/net/geneve.c
drivers/net/gtp.c
drivers/net/hyperv/Makefile
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/netvsc_trace.c [new file with mode: 0644]
drivers/net/hyperv/netvsc_trace.h [new file with mode: 0644]
drivers/net/hyperv/rndis_filter.c
drivers/net/ieee802154/Kconfig
drivers/net/ieee802154/Makefile
drivers/net/ieee802154/mcr20a.c [new file with mode: 0644]
drivers/net/ieee802154/mcr20a.h [new file with mode: 0644]
drivers/net/ipvlan/ipvlan.h
drivers/net/ipvlan/ipvlan_core.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/macsec.c
drivers/net/macvlan.c
drivers/net/phy/aquantia.c
drivers/net/phy/bcm-phy-lib.c
drivers/net/phy/bcm7xxx.c
drivers/net/phy/cortina.c
drivers/net/phy/intel-xway.c
drivers/net/phy/marvell.c
drivers/net/phy/marvell10g.c
drivers/net/phy/mdio-mux-mmioreg.c
drivers/net/phy/micrel.c
drivers/net/phy/phy-c45.c
drivers/net/phy/phy-core.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/phy/realtek.c
drivers/net/phy/sfp-bus.c
drivers/net/phy/sfp.c
drivers/net/phy/teranetics.c
drivers/net/ppp/ppp_generic.c
drivers/net/ppp/pppoe.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/cdc_eem.c
drivers/net/usb/cdc_ether.c
drivers/net/usb/kalmia.c
drivers/net/usb/lg-vl600.c
drivers/net/usb/r8152.c
drivers/net/usb/usbnet.c
drivers/net/virtio_net.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/vmxnet3/vmxnet3_int.h
drivers/net/vrf.c
drivers/net/vxlan.c
drivers/net/wan/hdlc_ppp.c
drivers/net/wimax/i2400m/usb-rx.c
drivers/net/wireless/admtek/Kconfig
drivers/net/wireless/ath/Kconfig
drivers/net/wireless/ath/ath9k/htc_drv_init.c
drivers/net/wireless/atmel/Kconfig
drivers/net/wireless/broadcom/Kconfig
drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c
drivers/net/wireless/cisco/Kconfig
drivers/net/wireless/intel/Kconfig
drivers/net/wireless/intel/iwlwifi/Kconfig
drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
drivers/net/wireless/intel/iwlwifi/fw/dbg.c
drivers/net/wireless/intel/iwlwifi/fw/dbg.h
drivers/net/wireless/intel/iwlwifi/fw/debugfs.h
drivers/net/wireless/intel/iwlwifi/fw/init.c
drivers/net/wireless/intel/iwlwifi/fw/runtime.h
drivers/net/wireless/intel/iwlwifi/mvm/d3.c
drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
drivers/net/wireless/intel/iwlwifi/mvm/ops.c
drivers/net/wireless/intel/iwlwifi/mvm/rs.c
drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
drivers/net/wireless/intel/iwlwifi/mvm/sta.c
drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
drivers/net/wireless/intel/iwlwifi/mvm/tx.c
drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
drivers/net/wireless/intel/iwlwifi/pcie/tx.c
drivers/net/wireless/intersil/Kconfig
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/marvell/Kconfig
drivers/net/wireless/marvell/mwifiex/11n.c
drivers/net/wireless/marvell/mwifiex/cfg80211.c
drivers/net/wireless/marvell/mwifiex/cmdevt.c
drivers/net/wireless/marvell/mwifiex/decl.h
drivers/net/wireless/marvell/mwifiex/fw.h
drivers/net/wireless/marvell/mwifiex/main.c
drivers/net/wireless/marvell/mwifiex/main.h
drivers/net/wireless/marvell/mwifiex/sta_cmd.c
drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
drivers/net/wireless/mediatek/Kconfig
drivers/net/wireless/mediatek/mt76/mac80211.c
drivers/net/wireless/mediatek/mt76/mt76.h
drivers/net/wireless/mediatek/mt76/mt76x2.h
drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c
drivers/net/wireless/mediatek/mt76/mt76x2_init.c
drivers/net/wireless/mediatek/mt76/mt76x2_mac.c
drivers/net/wireless/mediatek/mt76/mt76x2_main.c
drivers/net/wireless/mediatek/mt76/mt76x2_phy.c
drivers/net/wireless/mediatek/mt76/mt76x2_regs.h
drivers/net/wireless/mediatek/mt7601u/eeprom.c
drivers/net/wireless/mediatek/mt7601u/initvals.h
drivers/net/wireless/mediatek/mt7601u/mac.c
drivers/net/wireless/mediatek/mt7601u/mac.h
drivers/net/wireless/mediatek/mt7601u/main.c
drivers/net/wireless/mediatek/mt7601u/mcu.c
drivers/net/wireless/mediatek/mt7601u/mt7601u.h
drivers/net/wireless/mediatek/mt7601u/usb.c
drivers/net/wireless/quantenna/Kconfig
drivers/net/wireless/quantenna/qtnfmac/bus.h
drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
drivers/net/wireless/ralink/Kconfig
drivers/net/wireless/realtek/Kconfig
drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
drivers/net/wireless/realtek/rtlwifi/base.c
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c [new file with mode: 0644]
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h [new file with mode: 0644]
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
drivers/net/wireless/realtek/rtlwifi/efuse.c
drivers/net/wireless/realtek/rtlwifi/efuse.h
drivers/net/wireless/realtek/rtlwifi/pci.c
drivers/net/wireless/realtek/rtlwifi/rc.c
drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
drivers/net/wireless/realtek/rtlwifi/wifi.h
drivers/net/wireless/rsi/Kconfig
drivers/net/wireless/rsi/Makefile
drivers/net/wireless/rsi/rsi_91x_coex.c [new file with mode: 0644]
drivers/net/wireless/rsi/rsi_91x_core.c
drivers/net/wireless/rsi/rsi_91x_hal.c
drivers/net/wireless/rsi/rsi_91x_main.c
drivers/net/wireless/rsi/rsi_91x_mgmt.c
drivers/net/wireless/rsi/rsi_91x_sdio.c
drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
drivers/net/wireless/rsi/rsi_91x_usb.c
drivers/net/wireless/rsi/rsi_91x_usb_ops.c
drivers/net/wireless/rsi/rsi_coex.h [new file with mode: 0644]
drivers/net/wireless/rsi/rsi_common.h
drivers/net/wireless/rsi/rsi_hal.h
drivers/net/wireless/rsi/rsi_main.h
drivers/net/wireless/rsi/rsi_mgmt.h
drivers/net/wireless/rsi/rsi_sdio.h
drivers/net/wireless/rsi/rsi_usb.h
drivers/net/wireless/st/Kconfig
drivers/net/wireless/ti/Kconfig
drivers/net/wireless/ti/wl1251/main.c
drivers/net/wireless/ti/wl1251/tx.c
drivers/net/wireless/zydas/Kconfig
drivers/net/wireless/zydas/zd1211rw/zd_mac.c
drivers/net/xen-netback/rx.c
drivers/net/xen-netfront.c
drivers/nvdimm/blk.c
drivers/nvdimm/btt.c
drivers/nvdimm/pfn_devs.c
drivers/nvdimm/pmem.c
drivers/nvdimm/region_devs.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/fc.c
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/core.c
drivers/nvme/target/loop.c
drivers/pci/dwc/pcie-designware-host.c
drivers/pci/quirks.c
drivers/pci/setup-res.c
drivers/perf/arm_pmu.c
drivers/phy/qualcomm/phy-qcom-ufs.c
drivers/pinctrl/meson/pinctrl-meson-axg.c
drivers/platform/chrome/chromeos_laptop.c
drivers/platform/x86/Kconfig
drivers/platform/x86/Makefile
drivers/platform/x86/dell-smbios-base.c [moved from drivers/platform/x86/dell-smbios.c with 95% similarity]
drivers/platform/x86/dell-smbios-smm.c
drivers/platform/x86/dell-smbios-wmi.c
drivers/platform/x86/dell-smbios.h
drivers/platform/x86/dell-wmi.c
drivers/platform/x86/intel-hid.c
drivers/platform/x86/intel-vbtn.c
drivers/platform/x86/wmi.c
drivers/regulator/core.c
drivers/regulator/stm32-vrefbuf.c
drivers/s390/block/dasd.c
drivers/s390/cio/device_fsm.c
drivers/s390/cio/device_ops.c
drivers/s390/cio/io_sch.h
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3.h
drivers/s390/net/qeth_l3_main.c
drivers/s390/net/qeth_l3_sys.c
drivers/scsi/hosts.c
drivers/scsi/libsas/sas_scsi_host.c
drivers/scsi/megaraid/megaraid_sas_fusion.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.h
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/qedi/qedi_fw.c
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_gs.c
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/scsi_error.c
drivers/scsi/scsi_lib.c
drivers/scsi/sd.c
drivers/scsi/sd_zbc.c
drivers/scsi/storvsc_drv.c
drivers/soc/fsl/qbman/qman.c
drivers/soc/imx/gpc.c
drivers/ssb/Kconfig
drivers/ssb/main.c
drivers/staging/android/ashmem.c
drivers/staging/comedi/drivers.c
drivers/tty/n_tty.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/serial/atmel_serial.c
drivers/tty/serial/earlycon.c
drivers/tty/serial/imx.c
drivers/tty/serial/serial_core.c
drivers/tty/serial/sh-sci.c
drivers/tty/tty_io.c
drivers/usb/core/message.c
drivers/usb/core/quirks.c
drivers/usb/dwc2/params.c
drivers/usb/dwc3/core.c
drivers/usb/gadget/function/f_eem.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/host/ohci-hcd.c
drivers/usb/host/xhci-dbgcap.c
drivers/usb/host/xhci-dbgtty.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-plat.c
drivers/usb/host/xhci-rcar.c
drivers/usb/host/xhci.c
drivers/usb/host/xhci.h
drivers/usb/mon/mon_text.c
drivers/usb/musb/musb_core.c
drivers/usb/storage/uas.c
drivers/usb/storage/unusual_devs.h
drivers/usb/typec/fusb302/fusb302.c
drivers/usb/typec/tcpm.c
drivers/usb/usbip/vudc_sysfs.c
drivers/vfio/vfio_iommu_type1.c
drivers/vhost/net.c
drivers/vhost/vsock.c
drivers/video/fbdev/sbuslib.c
drivers/virtio/virtio_ring.c
drivers/watchdog/Kconfig
drivers/watchdog/f71808e_wdt.c
drivers/watchdog/hpwdt.c
drivers/watchdog/sbsa_gwdt.c
drivers/watchdog/wdat_wdt.c
drivers/xen/events/events_base.c
drivers/xen/pvcalls-back.c
drivers/xen/pvcalls-front.c
drivers/xen/xenbus/xenbus_probe.c
fs/aio.c
fs/block_dev.c
fs/btrfs/backref.c
fs/btrfs/ctree.h
fs/btrfs/inode-item.c
fs/btrfs/inode.c
fs/btrfs/raid56.c
fs/btrfs/relocation.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/ceph/caps.c
fs/ceph/dir.c
fs/ceph/super.c
fs/ceph/super.h
fs/dcache.c
fs/direct-io.c
fs/gfs2/bmap.c
fs/hugetlbfs/inode.c
fs/lockd/svc.c
fs/namei.c
fs/nfs/callback_proc.c
fs/nfs/direct.c
fs/nfs/inode.c
fs/nfs/nfs3proc.c
fs/nfs/nfs4client.c
fs/nfs/pnfs.c
fs/nfs/super.c
fs/nfs/write.c
fs/nfs_common/grace.c
fs/nfsd/nfs4state.c
fs/overlayfs/Kconfig
fs/overlayfs/export.c
fs/overlayfs/inode.c
fs/overlayfs/namei.c
fs/overlayfs/overlayfs.h
fs/overlayfs/super.c
fs/sysfs/symlink.c
fs/xfs/scrub/agheader.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_refcount_item.c
fs/xfs/xfs_rmap_item.c
fs/xfs/xfs_super.c
include/asm-generic/pgtable.h
include/drm/drm_drv.h
include/kvm/arm_vgic.h
include/linux/bio.h
include/linux/bpf-cgroup.h
include/linux/bpf.h
include/linux/bpf_types.h
include/linux/cgroup-defs.h
include/linux/compat.h
include/linux/compiler-clang.h
include/linux/compiler-gcc.h
include/linux/ethtool.h
include/linux/filter.h
include/linux/fs.h
include/linux/genhd.h
include/linux/if_tun.h
include/linux/if_vlan.h
include/linux/init.h
include/linux/irqchip/arm-gic-v3.h
include/linux/irqchip/arm-gic.h
include/linux/jump_label.h
include/linux/kernel.h
include/linux/kvm_host.h
include/linux/memblock.h
include/linux/mlx5/accel.h [new file with mode: 0644]
include/linux/mlx5/driver.h
include/linux/mlx5/eswitch.h [new file with mode: 0644]
include/linux/mlx5/fs.h
include/linux/mlx5/fs_helpers.h [new file with mode: 0644]
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/mlx5_ifc_fpga.h
include/linux/mroute.h
include/linux/mroute6.h
include/linux/mroute_base.h [new file with mode: 0644]
include/linux/mutex.h
include/linux/net.h
include/linux/netdevice.h
include/linux/netfilter/x_tables.h
include/linux/nospec.h
include/linux/of_pci.h
include/linux/percpu-refcount.h
include/linux/phy.h
include/linux/rhashtable.h
include/linux/rtnetlink.h
include/linux/sfp.h
include/linux/skbuff.h
include/linux/socket.h
include/linux/tty.h
include/linux/u64_stats_sync.h
include/linux/usb/quirks.h
include/linux/workqueue.h
include/media/demux.h
include/media/dmxdev.h
include/media/dvb_demux.h
include/media/dvb_vb2.h
include/net/Space.h
include/net/act_api.h
include/net/addrconf.h
include/net/cfg80211.h
include/net/devlink.h
include/net/dsa.h
include/net/dst.h
include/net/dst_cache.h
include/net/ethoc.h
include/net/fib_rules.h
include/net/flow.h
include/net/gre.h
include/net/inet_connection_sock.h
include/net/ip.h
include/net/ip6_fib.h
include/net/ip6_route.h
include/net/ip_fib.h
include/net/ip_tunnels.h
include/net/ipv6.h
include/net/mac80211.h
include/net/net_namespace.h
include/net/netevent.h
include/net/netns/ipv4.h
include/net/netns/ipv6.h
include/net/pkt_cls.h
include/net/route.h
include/net/rsi_91x.h [new file with mode: 0644]
include/net/sch_generic.h
include/net/sctp/auth.h
include/net/sctp/command.h
include/net/sctp/sctp.h
include/net/sctp/sm.h
include/net/sctp/structs.h
include/net/sock.h
include/net/tcp.h
include/net/tls.h
include/net/xfrm.h
include/rdma/ib_verbs.h
include/scsi/scsi_cmnd.h
include/scsi/scsi_host.h
include/soc/arc/mcip.h
include/trace/events/mmc.h
include/uapi/asm-generic/siginfo.h
include/uapi/drm/virtgpu_drm.h
include/uapi/linux/batadv_packet.h
include/uapi/linux/batman_adv.h
include/uapi/linux/blktrace_api.h
include/uapi/linux/bpf.h
include/uapi/linux/bpf_perf_event.h
include/uapi/linux/dvb/dmx.h
include/uapi/linux/errqueue.h
include/uapi/linux/ethtool.h
include/uapi/linux/fib_rules.h
include/uapi/linux/if_ether.h
include/uapi/linux/if_link.h
include/uapi/linux/kvm.h
include/uapi/linux/ncsi.h [new file with mode: 0644]
include/uapi/linux/pkt_cls.h
include/uapi/linux/psp-sev.h
include/uapi/linux/rds.h
include/uapi/linux/sctp.h
include/uapi/linux/tcp.h
include/uapi/linux/tipc.h
include/uapi/linux/tipc_netlink.h
include/uapi/linux/tipc_sockets_diag.h [new file with mode: 0644]
include/uapi/linux/tls.h
include/uapi/misc/ocxl.h
init/main.c
kernel/bpf/inode.c
kernel/bpf/sockmap.c
kernel/bpf/stackmap.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/cgroup/cgroup.c
kernel/compat.c
kernel/events/core.c
kernel/extable.c
kernel/fail_function.c
kernel/irq/matrix.c
kernel/jump_label.c
kernel/locking/rtmutex.c
kernel/memremap.c
kernel/module.c
kernel/panic.c
kernel/printk/printk.c
kernel/sched/core.c
kernel/time/timer.c
kernel/trace/bpf_trace.c
kernel/workqueue.c
lib/btree.c
lib/bug.c
lib/dma-debug.c
lib/idr.c
lib/ioremap.c
lib/kobject_uevent.c
lib/percpu-refcount.c
lib/rhashtable.c
lib/test_bpf.c
lib/test_kmod.c
lib/test_rhashtable.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/khugepaged.c
mm/memblock.c
mm/mempolicy.c
mm/page_alloc.c
mm/percpu-km.c
mm/percpu-vm.c
mm/percpu.c
mm/shmem.c
mm/vmscan.c
net/8021q/vlan.c
net/8021q/vlan_core.c
net/batman-adv/Kconfig
net/batman-adv/Makefile
net/batman-adv/bat_algo.c
net/batman-adv/bat_algo.h
net/batman-adv/bat_iv_ogm.c
net/batman-adv/bat_iv_ogm.h
net/batman-adv/bat_v.c
net/batman-adv/bat_v.h
net/batman-adv/bat_v_elp.c
net/batman-adv/bat_v_elp.h
net/batman-adv/bat_v_ogm.c
net/batman-adv/bat_v_ogm.h
net/batman-adv/bitarray.c
net/batman-adv/bitarray.h
net/batman-adv/bridge_loop_avoidance.c
net/batman-adv/bridge_loop_avoidance.h
net/batman-adv/debugfs.c
net/batman-adv/debugfs.h
net/batman-adv/distributed-arp-table.c
net/batman-adv/distributed-arp-table.h
net/batman-adv/fragmentation.c
net/batman-adv/fragmentation.h
net/batman-adv/gateway_client.c
net/batman-adv/gateway_client.h
net/batman-adv/gateway_common.c
net/batman-adv/gateway_common.h
net/batman-adv/hard-interface.c
net/batman-adv/hard-interface.h
net/batman-adv/hash.c
net/batman-adv/hash.h
net/batman-adv/icmp_socket.c
net/batman-adv/icmp_socket.h
net/batman-adv/log.c
net/batman-adv/log.h
net/batman-adv/main.c
net/batman-adv/main.h
net/batman-adv/multicast.c
net/batman-adv/multicast.h
net/batman-adv/netlink.c
net/batman-adv/netlink.h
net/batman-adv/network-coding.c
net/batman-adv/network-coding.h
net/batman-adv/originator.c
net/batman-adv/originator.h
net/batman-adv/routing.c
net/batman-adv/routing.h
net/batman-adv/send.c
net/batman-adv/send.h
net/batman-adv/soft-interface.c
net/batman-adv/soft-interface.h
net/batman-adv/sysfs.c
net/batman-adv/sysfs.h
net/batman-adv/tp_meter.c
net/batman-adv/tp_meter.h
net/batman-adv/translation-table.c
net/batman-adv/translation-table.h
net/batman-adv/tvlv.c
net/batman-adv/tvlv.h
net/batman-adv/types.h
net/bluetooth/smp.c
net/bridge/br.c
net/bridge/br_device.c
net/bridge/br_if.c
net/bridge/br_netfilter_hooks.c
net/bridge/br_private.h
net/bridge/br_vlan.c
net/bridge/netfilter/ebt_among.c
net/bridge/netfilter/ebtable_broute.c
net/bridge/netfilter/ebtable_filter.c
net/bridge/netfilter/ebtable_nat.c
net/bridge/netfilter/ebtables.c
net/bridge/netfilter/nf_log_bridge.c
net/caif/caif_dev.c
net/can/af_can.c
net/can/bcm.c
net/can/gw.c
net/ceph/ceph_common.c
net/core/dev.c
net/core/dev_ioctl.c
net/core/devlink.c
net/core/dst_cache.c
net/core/ethtool.c
net/core/fib_rules.c
net/core/filter.c
net/core/flow_dissector.c
net/core/net_namespace.c
net/core/pktgen.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock.c
net/core/sock_diag.c
net/core/sysctl_net_core.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/dccp/proto.c
net/dsa/legacy.c
net/dsa/master.c
net/dsa/slave.c
net/ieee802154/6lowpan/core.c
net/ieee802154/6lowpan/reassembly.c
net/ieee802154/core.c
net/ipv4/Kconfig
net/ipv4/Makefile
net/ipv4/fib_rules.c
net/ipv4/fib_semantics.c
net/ipv4/fib_trie.c
net/ipv4/fou.c
net/ipv4/inet_diag.c
net/ipv4/inet_fragment.c
net/ipv4/inetpeer.c
net/ipv4/ip_forward.c
net/ipv4/ip_gre.c
net/ipv4/ip_input.c
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c
net/ipv4/ip_tunnel.c
net/ipv4/ip_vti.c
net/ipv4/ipip.c
net/ipv4/ipmr.c
net/ipv4/ipmr_base.c [new file with mode: 0644]
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/arptable_filter.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/iptable_mangle.c
net/ipv4/netfilter/iptable_nat.c
net/ipv4/netfilter/iptable_raw.c
net/ipv4/netfilter/iptable_security.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
net/ipv4/netfilter/nf_defrag_ipv4.c
net/ipv4/netfilter/nf_flow_table_ipv4.c
net/ipv4/netfilter/nf_log_arp.c
net/ipv4/netfilter/nf_log_ipv4.c
net/ipv4/proc.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_illinois.c
net/ipv4/tcp_input.c
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c
net/ipv4/tunnel4.c
net/ipv4/udp.c
net/ipv4/xfrm4_mode_tunnel.c
net/ipv4/xfrm4_output.c
net/ipv4/xfrm4_policy.c
net/ipv6/Kconfig
net/ipv6/addrconf.c
net/ipv6/anycast.c
net/ipv6/datagram.c
net/ipv6/exthdrs_core.c
net/ipv6/fib6_rules.c
net/ipv6/icmp.c
net/ipv6/ila/ila_xlat.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6_vti.c
net/ipv6/ip6mr.c
net/ipv6/ipv6_sockglue.c
net/ipv6/mcast.c
net/ipv6/ndisc.c
net/ipv6/netfilter.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/ip6t_rpfilter.c
net/ipv6/netfilter/ip6table_filter.c
net/ipv6/netfilter/ip6table_mangle.c
net/ipv6/netfilter/ip6table_nat.c
net/ipv6/netfilter/ip6table_raw.c
net/ipv6/netfilter/ip6table_security.c
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
net/ipv6/netfilter/nf_flow_table_ipv6.c
net/ipv6/netfilter/nf_log_ipv6.c
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
net/ipv6/netfilter/nft_fib_ipv6.c
net/ipv6/proc.c
net/ipv6/route.c
net/ipv6/seg6_iptunnel.c
net/ipv6/seg6_local.c
net/ipv6/sit.c
net/ipv6/sysctl_net_ipv6.c
net/ipv6/udp.c
net/ipv6/xfrm6_mode_tunnel.c
net/ipv6/xfrm6_output.c
net/ipv6/xfrm6_policy.c
net/ipv6/xfrm6_state.c
net/ipv6/xfrm6_tunnel.c
net/iucv/af_iucv.c
net/kcm/kcmproc.c
net/kcm/kcmsock.c
net/key/af_key.c
net/l2tp/l2tp_core.c
net/l2tp/l2tp_core.h
net/l2tp/l2tp_ip.c
net/l2tp/l2tp_ip6.c
net/l2tp/l2tp_ppp.c
net/llc/llc_sap.c
net/mac80211/agg-rx.c
net/mac80211/cfg.c
net/mac80211/debugfs.c
net/mac80211/mlme.c
net/mac80211/rx.c
net/mac80211/tx.c
net/mpls/af_mpls.c
net/ncsi/Makefile
net/ncsi/internal.h
net/ncsi/ncsi-manage.c
net/ncsi/ncsi-netlink.c [new file with mode: 0644]
net/ncsi/ncsi-netlink.h [new file with mode: 0644]
net/netfilter/ipset/ip_set_core.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ftp.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_log_netdev.c
net/netfilter/nf_synproxy_core.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink.c
net/netfilter/nfnetlink_acct.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue.c
net/netfilter/nft_set_hash.c
net/netfilter/x_tables.c
net/netfilter/xt_hashlimit.c
net/netfilter/xt_recent.c
net/netlink/genetlink.c
net/openvswitch/datapath.c
net/openvswitch/meter.c
net/openvswitch/vport.c
net/phonet/pn_dev.c
net/qrtr/smd.c
net/rds/af_rds.c
net/rds/connection.c
net/rds/ib.c
net/rds/message.c
net/rds/rds.h
net/rds/recv.c
net/rds/tcp.c
net/rds/tcp_listen.c
net/rxrpc/net_ns.c
net/rxrpc/recvmsg.c
net/sched/act_api.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_gact.c
net/sched/act_ife.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_sample.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c
net/sched/cls_api.c
net/sched/cls_flower.c
net/sched/sch_api.c
net/sched/sch_generic.c
net/sched/sch_netem.c
net/sched/sch_prio.c
net/sched/sch_tbf.c
net/sctp/auth.c
net/sctp/chunk.c
net/sctp/input.c
net/sctp/inqueue.c
net/sctp/objcnt.c
net/sctp/offload.c
net/sctp/output.c
net/sctp/proc.c
net/sctp/protocol.c
net/sctp/sm_make_chunk.c
net/sctp/sm_sideeffect.c
net/sctp/sm_statefuns.c
net/sctp/socket.c
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_cdc.c
net/smc/smc_clc.c
net/smc/smc_clc.h
net/smc/smc_close.c
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_ib.c
net/smc/smc_llc.c
net/smc/smc_llc.h
net/smc/smc_wr.h
net/socket.c
net/tipc/Kconfig
net/tipc/Makefile
net/tipc/addr.c
net/tipc/addr.h
net/tipc/bcast.c
net/tipc/bearer.c
net/tipc/bearer.h
net/tipc/core.c
net/tipc/core.h
net/tipc/diag.c [new file with mode: 0644]
net/tipc/discover.c
net/tipc/discover.h
net/tipc/group.c
net/tipc/link.c
net/tipc/link.h
net/tipc/msg.c
net/tipc/msg.h
net/tipc/name_distr.c
net/tipc/name_distr.h
net/tipc/name_table.c
net/tipc/name_table.h
net/tipc/net.c
net/tipc/net.h
net/tipc/node.c
net/tipc/node.h
net/tipc/socket.c
net/tipc/socket.h
net/tipc/udp_media.c
net/tls/Kconfig
net/tls/tls_main.c
net/tls/tls_sw.c
net/wireless/Kconfig
net/wireless/util.c
net/xfrm/xfrm_device.c
net/xfrm/xfrm_ipcomp.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_replay.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
samples/bpf/Makefile
samples/bpf/bpf_load.c
samples/bpf/cpustat_kern.c [new file with mode: 0644]
samples/bpf/cpustat_user.c [new file with mode: 0644]
samples/bpf/tcbpf2_kern.c
samples/bpf/test_cgrp2_sock.sh
samples/bpf/test_cgrp2_sock2.sh
samples/bpf/test_tunnel_bpf.sh
samples/bpf/trace_event_kern.c
samples/bpf/trace_event_user.c
samples/bpf/xdp_redirect_user.c
samples/seccomp/Makefile
samples/sockmap/Makefile
samples/sockmap/sockmap_kern.c
samples/sockmap/sockmap_test.sh [new file with mode: 0755]
samples/sockmap/sockmap_user.c
scripts/Makefile.build
scripts/Makefile.lib
scripts/basic/fixdep.c
scripts/bloat-o-meter
scripts/coccinelle/api/memdup.cocci
scripts/kallsyms.c
scripts/kconfig/confdata.c
scripts/kconfig/kxgettext.c
scripts/kconfig/lkc.h
scripts/kconfig/lxdialog/check-lxdialog.sh
scripts/kconfig/menu.c
scripts/kconfig/symbol.c
scripts/kconfig/util.c
scripts/kconfig/zconf.l
scripts/kconfig/zconf.y
scripts/link-vmlinux.sh
security/selinux/hooks.c
security/smack/smack_netfilter.c
sound/core/control.c
sound/core/oss/pcm_oss.c
sound/core/seq/seq_clientmgr.c
sound/core/seq/seq_fifo.c
sound/core/seq/seq_memory.c
sound/core/seq/seq_memory.h
sound/core/seq/seq_prioq.c
sound/core/seq/seq_prioq.h
sound/core/seq/seq_queue.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_realtek.c
sound/soc/amd/acp-pcm-dma.c
sound/soc/amd/acp.h
sound/soc/codecs/hdmi-codec.c
sound/soc/codecs/rt5651.c
sound/soc/codecs/sgtl5000.c
sound/soc/codecs/wm_adsp.c
sound/soc/sunxi/sun4i-i2s.c
sound/usb/quirks-table.h
sound/x86/intel_hdmi_audio.c
tools/arch/x86/include/asm/cpufeatures.h
tools/bpf/Makefile
tools/bpf/bpftool/Documentation/bpftool-prog.rst
tools/bpf/bpftool/Makefile
tools/bpf/bpftool/bash-completion/bpftool
tools/bpf/bpftool/cfg.c [new file with mode: 0644]
tools/bpf/bpftool/cfg.h [new file with mode: 0644]
tools/bpf/bpftool/common.c
tools/bpf/bpftool/main.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/xlated_dumper.c [new file with mode: 0644]
tools/bpf/bpftool/xlated_dumper.h [new file with mode: 0644]
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/kvm.h
tools/kvm/kvm_stat/kvm_stat
tools/kvm/kvm_stat/kvm_stat.txt
tools/lib/bpf/libbpf.c
tools/objtool/builtin-check.c
tools/objtool/builtin-orc.c
tools/objtool/builtin.h
tools/objtool/check.c
tools/objtool/check.h
tools/perf/Documentation/perf-kallsyms.txt
tools/perf/builtin-record.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/perf.h
tools/perf/ui/browsers/annotate.c
tools/perf/util/auxtrace.c
tools/perf/util/record.c
tools/perf/util/trigger.h
tools/testing/radix-tree/idr-test.c
tools/testing/radix-tree/linux.c
tools/testing/radix-tree/linux/compiler_types.h [new file with mode: 0644]
tools/testing/radix-tree/linux/gfp.h
tools/testing/radix-tree/linux/slab.h
tools/testing/selftests/android/Makefile
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_helpers.h
tools/testing/selftests/bpf/bpf_rlimit.h [new file with mode: 0644]
tools/testing/selftests/bpf/sockmap_parse_prog.c
tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c [new file with mode: 0644]
tools/testing/selftests/bpf/sockmap_verdict_prog.c
tools/testing/selftests/bpf/test_align.c
tools/testing/selftests/bpf/test_dev_cgroup.c
tools/testing/selftests/bpf/test_lpm_map.c
tools/testing/selftests/bpf/test_lru_map.c
tools/testing/selftests/bpf/test_maps.c
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_stacktrace_build_id.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_tag.c
tools/testing/selftests/bpf/test_tcpbpf_user.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/test_verifier_log.c
tools/testing/selftests/bpf/urandom_read.c [new file with mode: 0644]
tools/testing/selftests/futex/Makefile
tools/testing/selftests/memfd/config [new file with mode: 0644]
tools/testing/selftests/memory-hotplug/Makefile
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/config
tools/testing/selftests/net/fib-onlink-tests.sh
tools/testing/selftests/net/fib_tests.sh
tools/testing/selftests/net/forwarding/.gitignore [new file with mode: 0644]
tools/testing/selftests/net/forwarding/README [new file with mode: 0644]
tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/config [new file with mode: 0644]
tools/testing/selftests/net/forwarding/forwarding.config.sample [new file with mode: 0644]
tools/testing/selftests/net/forwarding/lib.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/router.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/router_multipath.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_actions.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_chains.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_common.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/tc_flower.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/tc_shblocks.sh [new file with mode: 0755]
tools/testing/selftests/net/in_netns.sh
tools/testing/selftests/net/msg_zerocopy.c
tools/testing/selftests/net/pmtu.sh [new file with mode: 0755]
tools/testing/selftests/net/rtnetlink.sh
tools/testing/selftests/networking/timestamping/txtimestamp.c
tools/testing/selftests/powerpc/mm/subpage_prot.c
tools/testing/selftests/powerpc/tm/Makefile
tools/testing/selftests/powerpc/tm/tm-trap.c
tools/testing/selftests/pstore/config
tools/testing/selftests/sync/Makefile
tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json [new file with mode: 0644]
tools/testing/selftests/tc-testing/tc-tests/actions/csum.json [new file with mode: 0644]
tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
tools/testing/selftests/tc-testing/tc-tests/actions/police.json
tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json [new file with mode: 0644]
tools/testing/selftests/tc-testing/tdc.py
tools/testing/selftests/tc-testing/tdc_batch.py
tools/testing/selftests/vDSO/Makefile
tools/testing/selftests/vm/.gitignore
tools/testing/selftests/vm/run_vmtests
tools/testing/selftests/x86/entry_from_vm86.c
tools/testing/selftests/x86/test_vsyscall.c
virt/kvm/arm/arch_timer.c
virt/kvm/arm/arm.c
virt/kvm/arm/hyp/vgic-v3-sr.c
virt/kvm/arm/mmu.c
virt/kvm/arm/vgic/vgic-mmio.c
virt/kvm/arm/vgic/vgic-v2.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c
virt/kvm/arm/vgic/vgic.h
virt/kvm/kvm_main.c

index aa4296498859e49617cc7fc6f377c6dcb7a8f6c1..9ab0ef1dd1c72d2be0c3d68f8000b225f79df963 100644 (file)
 What:          /sys/class/ata_...
-Date:          August 2008
-Contact:       Gwendal Grignou<gwendal@google.com>
 Description:
-
-Provide a place in sysfs for storing the ATA topology of the system.  This allows
-retrieving various information about ATA objects.
+               Provide a place in sysfs for storing the ATA topology of the
+               system. This allows retrieving various information about ATA
+               objects.
 
 Files under /sys/class/ata_port
 -------------------------------
 
-       For each port, a directory ataX is created where X is the ata_port_id of
-       the port. The device parent is the ata host device.
+For each port, a directory ataX is created where X is the ata_port_id of the
+port. The device parent is the ata host device.
 
-idle_irq (read)
 
-       Number of IRQ received by the port while idle [some ata HBA only].
+What:          /sys/class/ata_port/ataX/nr_pmp_links
+What:          /sys/class/ata_port/ataX/idle_irq
+Date:          May, 2010
+KernelVersion: v2.6.37
+Contact:       Gwendal Grignou <gwendal@chromium.org>
+Description:
+               nr_pmp_links:   (RO) If a SATA Port Multiplier (PM) is
+                               connected, the number of links behind it.
 
-nr_pmp_links (read)
+               idle_irq:       (RO) Number of IRQ received by the port while
+                               idle [some ata HBA only].
 
-       If a SATA Port Multiplier (PM) is connected, number of link behind it.
+
+What:          /sys/class/ata_port/ataX/port_no
+Date:          May, 2013
+KernelVersion: v3.11
+Contact:       Gwendal Grignou <gwendal@chromium.org>
+Description:
+               (RO) Host local port number. While registering host controller,
+               port numbers are tracked based upon number of ports available on
+               the controller. This attribute is needed by udev for composing
+               persistent links in /dev/disk/by-path.
 
 Files under /sys/class/ata_link
 -------------------------------
 
-       Behind each port, there is a ata_link. If there is a SATA PM in the
-       topology, 15 ata_link objects are created.
-
-       If a link is behind a port, the directory name is linkX, where X is
-       ata_port_id of the port.
-       If a link is behind a PM, its name is linkX.Y where X is ata_port_id
-       of the parent port and Y the PM port.
+Behind each port, there is a ata_link. If there is a SATA PM in the topology, 15
+ata_link objects are created.
 
-hw_sata_spd_limit
+If a link is behind a port, the directory name is linkX, where X is ata_port_id
+of the port. If a link is behind a PM, its name is linkX.Y where X is
+ata_port_id of the parent port and Y the PM port.
 
-       Maximum speed supported by the connected SATA device.
 
-sata_spd_limit
+What:          /sys/class/ata_link/linkX[.Y]/hw_sata_spd_limit
+What:          /sys/class/ata_link/linkX[.Y]/sata_spd_limit
+What:          /sys/class/ata_link/linkX[.Y]/sata_spd
+Date:          May, 2010
+KernelVersion: v2.6.37
+Contact:       Gwendal Grignou <gwendal@chromium.org>
+Description:
+               hw_sata_spd_limit:      (RO) Maximum speed supported by the
+                                       connected SATA device.
 
-       Maximum speed imposed by libata.
+               sata_spd_limit:         (RO) Maximum speed imposed by libata.
 
-sata_spd
+               sata_spd:               (RO) Current speed of the link
+                                       eg. 1.5, 3 Gbps etc.
 
-       Current speed of the link [1.5, 3Gps,...].
 
 Files under /sys/class/ata_device
 ---------------------------------
 
-       Behind each link, up to two ata device are created.
-       The name of the directory is devX[.Y].Z where:
-       - X is ata_port_id of the port where the device is connected,
-       - Y the port of the PM if any, and
-       - Z the device id: for PATA, there is usually 2 devices [0,1],
-       only 1 for SATA.
-
-class
-       Device class. Can be "ata" for disk, "atapi" for packet device,
-       "pmp" for PM, or "none" if no device was found behind the link.
-
-dma_mode
+Behind each link, up to two ata devices are created.
+The name of the directory is devX[.Y].Z where:
+- X is ata_port_id of the port where the device is connected,
+- Y the port of the PM if any, and
+- Z the device id: for PATA, there is usually 2 devices [0,1], only 1 for SATA.
+
+
+What:          /sys/class/ata_device/devX[.Y].Z/spdn_cnt
+What:          /sys/class/ata_device/devX[.Y].Z/gscr
+What:          /sys/class/ata_device/devX[.Y].Z/ering
+What:          /sys/class/ata_device/devX[.Y].Z/id
+What:          /sys/class/ata_device/devX[.Y].Z/pio_mode
+What:          /sys/class/ata_device/devX[.Y].Z/xfer_mode
+What:          /sys/class/ata_device/devX[.Y].Z/dma_mode
+What:          /sys/class/ata_device/devX[.Y].Z/class
+Date:          May, 2010
+KernelVersion: v2.6.37
+Contact:       Gwendal Grignou <gwendal@chromium.org>
+Description:
+               spdn_cnt:       (RO) Number of times libata decided to lower the
+                               speed of link due to errors.
 
-       Transfer modes supported by the device when in DMA mode.
-       Mostly used by PATA device.
+               gscr:           (RO) Cached result of the dump of PM GSCR
+                               register. Valid registers are:
 
-pio_mode
+                               0:      SATA_PMP_GSCR_PROD_ID,
+                               1:      SATA_PMP_GSCR_REV,
+                               2:      SATA_PMP_GSCR_PORT_INFO,
+                               32:     SATA_PMP_GSCR_ERROR,
+                               33:     SATA_PMP_GSCR_ERROR_EN,
+                               64:     SATA_PMP_GSCR_FEAT,
+                               96:     SATA_PMP_GSCR_FEAT_EN,
+                               130:    SATA_PMP_GSCR_SII_GPIO
 
-       Transfer modes supported by the device when in PIO mode.
-       Mostly used by PATA device.
+                               Only valid if the device is a PM.
 
-xfer_mode
+               ering:          (RO) Formatted output of the error ring of the
+                               device.
 
-       Current transfer mode.
+               id:             (RO) Cached result of IDENTIFY command, as
+                               described in ATA8 7.16 and 7.17. Only valid if
+                               the device is not a PM.
 
-id
+               pio_mode:       (RO) Transfer modes supported by the device when
+                               in PIO mode. Mostly used by PATA device.
 
-       Cached result of IDENTIFY command, as described in ATA8 7.16 and 7.17.
-       Only valid if the device is not a PM.
+               xfer_mode:      (RO) Current transfer mode
 
-gscr
+               dma_mode:       (RO) Transfer modes supported by the device when
+                               in DMA mode. Mostly used by PATA device.
 
-       Cached result of the dump of PM GSCR register.
-       Valid registers are:
-       0:      SATA_PMP_GSCR_PROD_ID,
-       1:      SATA_PMP_GSCR_REV,
-       2:      SATA_PMP_GSCR_PORT_INFO,
-       32:     SATA_PMP_GSCR_ERROR,
-       33:     SATA_PMP_GSCR_ERROR_EN,
-       64:     SATA_PMP_GSCR_FEAT,
-       96:     SATA_PMP_GSCR_FEAT_EN,
-       130:    SATA_PMP_GSCR_SII_GPIO
-       Only valid if the device is a PM.
+               class:          (RO) Device class. Can be "ata" for disk,
+                               "atapi" for packet device, "pmp" for PM, or
+                               "none" if no device was found behind the link.
 
-trim
 
-       Shows the DSM TRIM mode currently used by the device. Valid
-       values are:
-       unsupported:            Drive does not support DSM TRIM
-       unqueued:               Drive supports unqueued DSM TRIM only
-       queued:                 Drive supports queued DSM TRIM
-       forced_unqueued:        Drive's queued DSM support is known to be
-                               buggy and only unqueued TRIM commands
-                               are sent
+What:          /sys/class/ata_device/devX[.Y].Z/trim
+Date:          May, 2015
+KernelVersion: v4.10
+Contact:       Gwendal Grignou <gwendal@chromium.org>
+Description:
+               (RO) Shows the DSM TRIM mode currently used by the device. Valid
+               values are:
 
-spdn_cnt
+               unsupported:            Drive does not support DSM TRIM
 
-       Number of time libata decided to lower the speed of link due to errors.
+               unqueued:               Drive supports unqueued DSM TRIM only
 
-ering
+               queued:                 Drive supports queued DSM TRIM
 
-       Formatted output of the error ring of the device.
+               forced_unqueued:        Drive's queued DSM support is known to
+                                       be buggy and only unqueued TRIM commands
+                                       are sent
diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device
new file mode 100644 (file)
index 0000000..82ef6ea
--- /dev/null
@@ -0,0 +1,58 @@
+What:          /sys/block/*/device/sw_activity
+Date:          Jun, 2008
+KernelVersion: v2.6.27
+Contact:       linux-ide@vger.kernel.org
+Description:
+               (RW) Used by drivers which support software controlled activity
+               LEDs.
+
+               It has the following valid values:
+
+               0       OFF - the LED is not activated on activity
+               1       BLINK_ON - the LED blinks on every 10ms when activity is
+                       detected.
+               2       BLINK_OFF - the LED is on when idle, and blinks off
+                       every 10ms when activity is detected.
+
+               Note that the user must turn sw_activity OFF it they wish to
+               control the activity LED via the em_message file.
+
+
+What:          /sys/block/*/device/unload_heads
+Date:          Sep, 2008
+KernelVersion: v2.6.28
+Contact:       linux-ide@vger.kernel.org
+Description:
+               (RW) Hard disk shock protection
+
+               Writing an integer value to this file will take the heads of the
+               respective drive off the platter and block all I/O operations
+               for the specified number of milliseconds.
+
+               - If the device does not support the unload heads feature,
+                 access is denied with -EOPNOTSUPP.
+               - The maximal value accepted for a timeout is 30000
+                 milliseconds.
+               - A previously set timeout can be cancelled and disk can resume
+                 normal operation immediately by specifying a timeout of 0.
+               - Some hard drives only comply with an earlier version of the
+                 ATA standard, but support the unload feature nonetheless.
+                 There is no safe way Linux can detect these devices, so this
+                 is not enabled by default. If it is known that your device
+                 does support the unload feature, then you can tell the kernel
+                 to enable it by writing -1. It can be disabled again by
+                 writing -2.
+               - Values below -2 are rejected with -EINVAL
+
+               For more information, see
+               Documentation/laptops/disk-shock-protection.txt
+
+
+What:          /sys/block/*/device/ncq_prio_enable
+Date:          Oct, 2016
+KernelVersion: v4.10
+Contact:       linux-ide@vger.kernel.org
+Description:
+               (RW) Write to the file to turn on or off the SATA ncq (native
+               command queueing) support. By default this feature is turned
+               off.
index 0eb255e7db123c84ebcd08d04f2cd867ee6bffa0..bafc59fd7b69ec355d6631681e3471195b0ae5ca 100644 (file)
@@ -27,3 +27,92 @@ Description: This file contains the current status of the "SSD Smart Path"
                the direct i/o path to physical devices.  This setting is
                controller wide, affecting all configured logical drives on the
                controller.  This file is readable and writable.
+
+What:          /sys/class/scsi_host/hostX/link_power_management_policy
+Date:          Oct, 2007
+KernelVersion: v2.6.24
+Contact:       linux-ide@vger.kernel.org
+Description:
+               (RW) This parameter allows the user to read and set the link
+               (interface) power management.
+
+               There are four possible options:
+
+               min_power: Tell the controller to try to make the link use the
+               least possible power when possible. This may sacrifice some
+               performance due to increased latency when coming out of lower
+               power states.
+
+               max_performance: Generally, this means no power management.
+               Tell the controller to have performance be a priority over power
+               management.
+
+               medium_power: Tell the controller to enter a lower power state
+               when possible, but do not enter the lowest power state, thus
+               improving latency over min_power setting.
+
+               med_power_with_dipm: Identical to the existing medium_power
+               setting except that it enables dipm (device initiated power
+               management) on top, which makes it match the Windows IRST (Intel
+               Rapid Storage Technology) driver settings. This setting is also
+               close to min_power, except that:
+               a) It does not use host-initiated slumber mode, but it does
+               allow device-initiated slumber
+               b) It does not enable low power device sleep mode (DevSlp).
+
+What:          /sys/class/scsi_host/hostX/em_message
+What:          /sys/class/scsi_host/hostX/em_message_type
+Date:          Jun, 2008
+KernelVersion: v2.6.27
+Contact:       linux-ide@vger.kernel.org
+Description:
+               em_message: (RW) Enclosure management support. For the LED
+               protocol, writes and reads correspond to the LED message format
+               as defined in the AHCI spec.
+
+               The user must turn sw_activity (under /sys/block/*/device/) OFF
+               it they wish to control the activity LED via the em_message
+               file.
+
+               em_message_type: (RO) Displays the current enclosure management
+               protocol that is being used by the driver (for eg. LED, SAF-TE,
+               SES-2, SGPIO etc).
+
+What:          /sys/class/scsi_host/hostX/ahci_port_cmd
+What:          /sys/class/scsi_host/hostX/ahci_host_caps
+What:          /sys/class/scsi_host/hostX/ahci_host_cap2
+Date:          Mar, 2010
+KernelVersion: v2.6.35
+Contact:       linux-ide@vger.kernel.org
+Description:
+               [to be documented]
+
+What:          /sys/class/scsi_host/hostX/ahci_host_version
+Date:          Mar, 2010
+KernelVersion: v2.6.35
+Contact:       linux-ide@vger.kernel.org
+Description:
+               (RO) Display the version of the AHCI spec implemented by the
+               host.
+
+What:          /sys/class/scsi_host/hostX/em_buffer
+Date:          Apr, 2010
+KernelVersion: v2.6.35
+Contact:       linux-ide@vger.kernel.org
+Description:
+               (RW) Allows access to AHCI EM (enclosure management) buffer
+               directly if the host supports EM.
+
+               For eg. the AHCI driver supports SGPIO EM messages but the
+               SATA/AHCI specs do not define the SGPIO message format of the EM
+               buffer. Different hardware(HW) vendors may have different
+               definitions. With the em_buffer attribute, this issue can be
+               solved by allowing HW vendors to provide userland drivers and
+               tools for their SGPIO initiators.
+
+What:          /sys/class/scsi_host/hostX/em_message_supported
+Date:          Oct, 2009
+KernelVersion: v2.6.39
+Contact:       linux-ide@vger.kernel.org
+Description:
+               (RO) Displays supported enclosure management message types.
index 611a75e4366ed2404b807dc03f1159f65d0a819a..badb26ac33dc8eec08d38c3ee13c24380568aadc 100644 (file)
@@ -570,7 +570,9 @@ your driver if they're helpful, or just use plain hex constants.
 The device IDs are arbitrary hex numbers (vendor controlled) and normally used
 only in a single location, the pci_device_id table.
 
-Please DO submit new vendor/device IDs to http://pciids.sourceforge.net/.
+Please DO submit new vendor/device IDs to http://pci-ids.ucw.cz/.
+There are mirrors of the pci.ids file at http://pciids.sourceforge.net/
+and https://github.com/pciutils/pciids.
 
 
 
index 4f7af841d935a261f5ffb96abd4f221364ee9e3d..ddcc58d01cfbce99a2da80a91101213203b83e82 100644 (file)
@@ -152,6 +152,11 @@ OCXL_IOCTL_IRQ_SET_FD:
   Associate an event fd to an AFU interrupt so that the user process
   can be notified when the AFU sends an interrupt.
 
+OCXL_IOCTL_GET_METADATA:
+
+  Obtains configuration information from the card, such at the size of
+  MMIO areas, the AFU version, and the PASID for the current context.
+
 
 mmap
 ----
index 84cbb302f2b56e1fe9b4ecc46dfb2f94b1ff392d..1a0b704e1a38f2a8749c83bb3f647d9a952c2787 100644 (file)
@@ -539,6 +539,18 @@ A: Although LLVM IR generation and optimization try to stay architecture
        The clang option "-fno-jump-tables" can be used to disable
        switch table generation.
 
+     - For clang -target bpf, it is guaranteed that pointer or long /
+       unsigned long types will always have a width of 64 bit, no matter
+       whether underlying clang binary or default target (or kernel) is
+       32 bit. However, when native clang target is used, then it will
+       compile these types based on the underlying architecture's conventions,
+       meaning in case of 32 bit architecture, pointer or long / unsigned
+       long types e.g. in BPF context structure will have width of 32 bit
+       while the BPF LLVM back end still operates in 64 bit. The native
+       target is mostly needed in tracing for the case of walking pt_regs
+       or other kernel structures where CPU's register width matters.
+       Otherwise, clang -target bpf is generally recommended.
+
    You should use default target when:
 
      - Your program includes a header file, e.g., ptrace.h, which eventually
index 6394ea9e3b9e5b45a8803dd5a6c7e98426a1b889..58b12e25bbb16d5ce85b7d11be530ec80dab71d0 100644 (file)
@@ -16,6 +16,7 @@ Required properties:
 - ddc: phandle to the hdmi ddc node
 - phy: phandle to the hdmi phy node
 - samsung,syscon-phandle: phandle for system controller node for PMU.
+- #sound-dai-cells: should be 0.
 
 Required properties for Exynos 4210, 4212, 5420 and 5433:
 - clocks: list of clock IDs from SoC clock driver.
index 217a90eaabe7f87db72539a2c9046f5ebc892b7c..9c38bbe7e6d7d86993be1f24ad011ee27a8e1d59 100644 (file)
@@ -11,7 +11,11 @@ Required properties:
   interrupts.
 
 Optional properties:
-- clocks: Optional reference to the clock used by the XOR engine.
+- clocks: Optional reference to the clocks used by the XOR engine.
+- clock-names: mandatory if there is a second clock, in this case the
+   name must be "core" for the first clock and "reg" for the second
+   one
+
 
 Example:
 
index 1812c848e369e2a261f42067eaf65d5e563f7f9a..abfae1beca2b4def66184cdf704fffdaddc846d3 100644 (file)
@@ -38,9 +38,9 @@ Required properties:
 
                 "catalyst",
                 "microchip",
+                "nxp",
                 "ramtron",
                 "renesas",
-                "nxp",
                 "st",
 
                 Some vendors use different model names for chips which are just
index 33c9a10fdc91a1dca6f7e09308ab81862f2e5874..20f121daa9106f177a9224ecd12140e6d5f30781 100644 (file)
@@ -14,6 +14,7 @@ Required properties:
     - "renesas,irqc-r8a7794" (R-Car E2)
     - "renesas,intc-ex-r8a7795" (R-Car H3)
     - "renesas,intc-ex-r8a7796" (R-Car M3-W)
+    - "renesas,intc-ex-r8a77965" (R-Car M3-N)
     - "renesas,intc-ex-r8a77970" (R-Car V3M)
     - "renesas,intc-ex-r8a77995" (R-Car D3)
 - #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in
index 1d4d0f49c9d06eb66d9957fb0661cec35ddc7af9..60d50a2b032375663f50b7cec035ef4ba888f145 100644 (file)
@@ -13,9 +13,18 @@ placed as a child node of an mdio device.
 The properties described here are those specific to Marvell devices.
 Additional required and optional properties can be found in dsa.txt.
 
+The compatibility string is used only to find an identification register,
+which is at a different MDIO base address in different switch families.
+- "marvell,mv88e6085"  : Switch has base address 0x10. Use with models:
+                         6085, 6095, 6097, 6123, 6131, 6141, 6161, 6165,
+                         6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321,
+                         6341, 6350, 6351, 6352
+- "marvell,mv88e6190"  : Switch has base address 0x00. Use with models:
+                         6190, 6190X, 6191, 6290, 6390, 6390X
+
 Required properties:
 - compatible           : Should be one of "marvell,mv88e6085" or
-                         "marvell,mv88e6190"
+                         "marvell,mv88e6190" as indicated above
 - reg                  : Address on the MII bus for the switch.
 
 Optional properties:
@@ -50,14 +59,15 @@ Example:
                        compatible = "marvell,mv88e6085";
                        reg = <0>;
                        reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
-               };
-               mdio {
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-                       switch1phy0: switch1phy0@0 {
-                               reg = <0>;
-                               interrupt-parent = <&switch0>;
-                               interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+
+                       mdio {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               switch1phy0: switch1phy0@0 {
+                                       reg = <0>;
+                                       interrupt-parent = <&switch0>;
+                                       interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+                               };
                        };
                };
        };
@@ -74,23 +84,24 @@ Example:
                        compatible = "marvell,mv88e6390";
                        reg = <0>;
                        reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
-               };
-               mdio {
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-                       switch1phy0: switch1phy0@0 {
-                               reg = <0>;
-                               interrupt-parent = <&switch0>;
-                               interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+
+                       mdio {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               switch1phy0: switch1phy0@0 {
+                                       reg = <0>;
+                                       interrupt-parent = <&switch0>;
+                                       interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+                               };
                        };
-               };
 
-               mdio1 {
-                       compatible = "marvell,mv88e6xxx-mdio-external";
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-                       switch1phy9: switch1phy0@9 {
-                               reg = <9>;
+                       mdio1 {
+                               compatible = "marvell,mv88e6xxx-mdio-external";
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               switch1phy9: switch1phy0@9 {
+                                       reg = <9>;
+                               };
                        };
                };
        };
diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
new file mode 100644 (file)
index 0000000..2aaef56
--- /dev/null
@@ -0,0 +1,23 @@
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+  - compatible:                should be "nxp,mcr20a"
+  - spi-max-frequency: maximal bus speed, should be set to a frequency
+                       lower than 9000000 depends sync or async operation mode
+  - reg:               the chipselect index
+  - interrupts:                the interrupt generated by the device. Non high-level
+                       can occur deadlocks while handling isr.
+
+Optional properties:
+  - rst_b-gpio:                GPIO spec for the RST_B pin
+
+Example:
+
+       mcr20a@0 {
+               compatible = "nxp,mcr20a";
+               spi-max-frequency = <9000000>;
+               reg = <0>;
+               interrupts = <17 2>;
+               interrupt-parent = <&gpio>;
+               rst_b-gpio = <&gpio 27 1>
+       };
index 27966ae741e09a51180ee45c92bf3da05f879ce9..457d5ae16f23490b21bbcd6171734f8aa8858b94 100644 (file)
@@ -29,6 +29,7 @@ Optional properties for PHY child node:
 - reset-gpios : Should specify the gpio for phy reset
 - magic-packet : If present, indicates that the hardware supports waking
   up via magic packet.
+- phy-handle : see ethernet.txt file in the same directory
 
 Examples:
 
index c902261893b913f529b1ec8d9852048f9d711e59..b4dc455eb1554e2df9fa8c6b3b3c0a8fd3a8cb9b 100644 (file)
@@ -18,6 +18,7 @@ Required properties:
       - "renesas,etheravb-r8a7795" for the R8A7795 SoC.
       - "renesas,etheravb-r8a7796" for the R8A7796 SoC.
       - "renesas,etheravb-r8a77970" for the R8A77970 SoC.
+      - "renesas,etheravb-r8a77980" for the R8A77980 SoC.
       - "renesas,etheravb-r8a77995" for the R8A77995 SoC.
       - "renesas,etheravb-rcar-gen3" as a fallback for the above
                R-Car Gen3 devices.
@@ -26,7 +27,11 @@ Required properties:
        SoC-specific version corresponding to the platform first followed by
        the generic version.
 
-- reg: offset and length of (1) the register block and (2) the stream buffer.
+- reg: Offset and length of (1) the register block and (2) the stream buffer.
+       The region for the register block is mandatory.
+       The region for the stream buffer is optional, as it is only present on
+       R-Car Gen2 and RZ/G1 SoCs, and on R-Car H3 (R8A7795), M3-W (R8A7796),
+       and M3-N (R8A77965).
 - interrupts: A list of interrupt-specifiers, one for each entry in
              interrupt-names.
              If interrupt-names is not present, an interrupt specifier
index f1c441bedf68622fb5f762e311a0ccf0b05352b9..929591d52ed6670c321e3790cdc7d8c8b9787efa 100644 (file)
@@ -33,6 +33,10 @@ Optional Properties:
   Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
   high Tx rate. Must not be present for SFF modules
 
+- maximum-power-milliwatt : Maximum module power consumption
+  Specifies the maximum power consumption allowable by a module in the
+  slot, in milli-Watts.  Presently, modules can be up to 1W, 1.5W or 2W.
+
 Example #1: Direct serdes to SFP connection
 
 sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
        i2c-bus = <&sfp_1g_i2c>;
        los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
        mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+       maximum-power-milliwatt = <1000>;
        pinctrl-names = "default";
        pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
        tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
index 3c81f78b5c27d50bb809cf6bbf13766a779024f5..5d254ab13ebf384034b7e8629365a7b0070c2af3 100644 (file)
@@ -60,7 +60,7 @@ Examples
                #size-cells = <0>;
 
                button@1 {
-                       debounce_interval = <50>;
+                       debounce-interval = <50>;
                        wakeup-source;
                        linux,code = <116>;
                        label = "POWER";
index 28be51afdb6a2623a9eb9465a86e0ec3fa7a1d7a..379eb763073e68d4a63b540c6ca3384a3b09f642 100644 (file)
@@ -22,7 +22,32 @@ Optional properties:
 - clocks : thermal sensor's clock source.
 
 Example:
+ocotp: ocotp@21bc000 {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "fsl,imx6sx-ocotp", "syscon";
+       reg = <0x021bc000 0x4000>;
+       clocks = <&clks IMX6SX_CLK_OCOTP>;
 
+       tempmon_calib: calib@38 {
+               reg = <0x38 4>;
+       };
+
+       tempmon_temp_grade: temp-grade@20 {
+               reg = <0x20 4>;
+       };
+};
+
+tempmon: tempmon {
+       compatible = "fsl,imx6sx-tempmon", "fsl,imx6q-tempmon";
+       interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
+       fsl,tempmon = <&anatop>;
+       nvmem-cells = <&tempmon_calib>, <&tempmon_temp_grade>;
+       nvmem-cell-names = "calib", "temp_grade";
+       clocks = <&clks IMX6SX_CLK_PLL3_USB_OTG>;
+};
+
+Legacy method (Deprecated):
 tempmon {
        compatible = "fsl,imx6q-tempmon";
        fsl,tempmon = <&anatop>;
index e64d903bcbe8177574cc1cf82c0bfeb37168206c..46da5f1844608fa85c9a88a947fe50643392a7f1 100644 (file)
@@ -19,7 +19,7 @@ Required properties:
   configured in FS mode;
   - "st,stm32f4x9-hsotg": The DWC2 USB HS controller instance in STM32F4x9 SoCs
   configured in HS mode;
-  - "st,stm32f7xx-hsotg": The DWC2 USB HS controller instance in STM32F7xx SoCs
+  - "st,stm32f7-hsotg": The DWC2 USB HS controller instance in STM32F7 SoCs
     configured in HS mode;
 - reg : Should contain 1 register range (address and length)
 - interrupts : Should contain 1 interrupt
index 87a45e2f9b7f99c1a26028e79dd38581bebec43e..2c071bb5801e7c8af98c44deb0c87d80627269b9 100644 (file)
@@ -4,6 +4,7 @@ Required properties:
   - compatible: Must contain one of the following:
        - "renesas,r8a7795-usb3-peri"
        - "renesas,r8a7796-usb3-peri"
+       - "renesas,r8a77965-usb3-peri"
        - "renesas,rcar-gen3-usb3-peri" for a generic R-Car Gen3 compatible
          device
 
index d060172f152914895bc59544d55eae0ba685cfc8..43960faf5a88c6c1c1c3f34e11d9ceef7d5e40cf 100644 (file)
@@ -12,6 +12,7 @@ Required properties:
        - "renesas,usbhs-r8a7794" for r8a7794 (R-Car E2) compatible device
        - "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device
        - "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device
+       - "renesas,usbhs-r8a77965" for r8a77965 (R-Car M3-N) compatible device
        - "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device
        - "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device
        - "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices
index e2ea59bbca93f1cd0ec107bd3d6ff3cc175a7043..1651483a7048a2df7e2bc7078af1f1e7233e7ccf 100644 (file)
@@ -13,6 +13,7 @@ Required properties:
     - "renesas,xhci-r8a7793" for r8a7793 SoC
     - "renesas,xhci-r8a7795" for r8a7795 SoC
     - "renesas,xhci-r8a7796" for r8a7796 SoC
+    - "renesas,xhci-r8a77965" for r8a77965 SoC
     - "renesas,rcar-gen2-xhci" for a generic R-Car Gen2 or RZ/G1 compatible
       device
     - "renesas,rcar-gen3-xhci" for a generic R-Car Gen3 compatible device
index 6869c73de4e255ee54f7afd8e29cd51ad236e6e1..a63d2c54329b0a799469b9637fc566412df5d62a 100644 (file)
@@ -111,7 +111,7 @@ TROUBLESHOOTING SERIAL CONSOLE PROBLEMS
 
        - If you don't have an HCDP, the kernel doesn't know where
          your console lives until the driver discovers serial
-         devices.  Use "console=uart, io,0x3f8" (or appropriate
+         devices.  Use "console=uart,io,0x3f8" (or appropriate
          address for your machine).
 
     Kernel and init script output works fine, but no "login:" prompt:
index 63f55a9ae2b1ffa54d088eb60b7e79e36c050f6e..a8c4239ed95bab6c87d8adcdb4edfa52de981099 100644 (file)
@@ -50,9 +50,15 @@ replace typedef dmx_filter_t :c:type:`dmx_filter`
 replace typedef dmx_pes_type_t :c:type:`dmx_pes_type`
 replace typedef dmx_input_t :c:type:`dmx_input`
 
-ignore symbol DMX_OUT_DECODER
-ignore symbol DMX_OUT_TAP
-ignore symbol DMX_OUT_TS_TAP
-ignore symbol DMX_OUT_TSDEMUX_TAP
+replace symbol DMX_BUFFER_FLAG_HAD_CRC32_DISCARD :c:type:`dmx_buffer_flags`
+replace        symbol DMX_BUFFER_FLAG_TEI :c:type:`dmx_buffer_flags`
+replace        symbol DMX_BUFFER_PKT_COUNTER_MISMATCH :c:type:`dmx_buffer_flags`
+replace        symbol DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED :c:type:`dmx_buffer_flags`
+replace        symbol DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR :c:type:`dmx_buffer_flags`
+
+replace symbol DMX_OUT_DECODER :c:type:`dmx_output`
+replace symbol DMX_OUT_TAP :c:type:`dmx_output`
+replace symbol DMX_OUT_TS_TAP :c:type:`dmx_output`
+replace symbol DMX_OUT_TSDEMUX_TAP :c:type:`dmx_output`
 
 replace ioctl DMX_DQBUF dmx_qbuf
index b48c4931658eda71586ec7ea4ddb380df518170f..be5a4c6f19040c6e5bf1acbb34f9598e6eec0b2f 100644 (file)
@@ -51,9 +51,10 @@ out to disk. Buffers remain locked until dequeued, until the
 the device is closed.
 
 Applications call the ``DMX_DQBUF`` ioctl to dequeue a filled
-(capturing) buffer from the driver's outgoing queue. They just set the ``reserved`` field array to zero. When ``DMX_DQBUF`` is called with a
-pointer to this structure, the driver fills the remaining fields or
-returns an error code.
+(capturing) buffer from the driver's outgoing queue.
+They just set the ``index`` field withe the buffer ID to be queued.
+When ``DMX_DQBUF`` is called with a pointer to struct :c:type:`dmx_buffer`,
+the driver fills the remaining fields or returns an error code.
 
 By default ``DMX_DQBUF`` blocks when no buffer is in the outgoing
 queue. When the ``O_NONBLOCK`` flag was given to the
index a553d4e4a0fb4c6f5ef675629edb2aa1b0a6f71a..1d1120753ae82d0aee3e934a3d9c074b70dcbca6 100644 (file)
@@ -755,13 +755,13 @@ udp_rmem_min - INTEGER
        Minimal size of receive buffer used by UDP sockets in moderation.
        Each UDP socket is able to use the size for receiving data, even if
        total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-       Default: 1 page
+       Default: 4K
 
 udp_wmem_min - INTEGER
        Minimal size of send buffer used by UDP sockets in moderation.
        Each UDP socket is able to use the size for sending data, even if
        total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-       Default: 1 page
+       Default: 4K
 
 CIPSOv4 Variables:
 
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
        FALSE: disabled
        Default: FALSE
 
+fib_multipath_hash_policy - INTEGER
+       Controls which hash policy to use for multipath routes.
+       Default: 0 (Layer 3)
+       Possible values:
+       0 - Layer 3 (source and destination addresses plus flow label)
+       1 - Layer 4 (standard 5-tuple)
+
 anycast_src_echo_reply - BOOLEAN
        Controls the use of anycast addresses as source addresses for ICMPv6
        echo reply
@@ -2094,7 +2101,7 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
        It is guaranteed to each SCTP socket (but not association) even
        under moderate memory pressure.
 
-       Default: 1 page
+       Default: 4K
 
 sctp_wmem  - vector of 3 INTEGERs: min, default, max
        Currently this tunable has no effect.
index 291a012649678035b5d0fdc306904093ceedf610..fe46d4867e2dbfa4cde05cc0b900c157d99b67d9 100644 (file)
@@ -72,11 +72,6 @@ this flag, a process must first signal intent by setting a socket option:
        if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
                error(1, errno, "setsockopt zerocopy");
 
-Setting the socket option only works when the socket is in its initial
-(TCP_CLOSED) state.  Trying to set the option for a socket returned by accept(),
-for example, will lead to an EBUSY error. In this case, the option should be set
-to the listening socket and it will be inherited by the accepted sockets.
-
 Transmission
 ------------
 
diff --git a/Documentation/networking/net_dim.txt b/Documentation/networking/net_dim.txt
new file mode 100644 (file)
index 0000000..9cb31c5
--- /dev/null
@@ -0,0 +1,174 @@
+Net DIM - Generic Network Dynamic Interrupt Moderation
+======================================================
+
+Author:
+       Tal Gilboa <talgi@mellanox.com>
+
+
+Contents
+=========
+
+- Assumptions
+- Introduction
+- The Net DIM Algorithm
+- Registering a Network Device to DIM
+- Example
+
+Part 0: Assumptions
+======================
+
+This document assumes the reader has basic knowledge in network drivers
+and in general interrupt moderation.
+
+
+Part I: Introduction
+======================
+
+Dynamic Interrupt Moderation (DIM) (in networking) refers to changing the
+interrupt moderation configuration of a channel in order to optimize packet
+processing. The mechanism includes an algorithm which decides if and how to
+change moderation parameters for a channel, usually by performing an analysis on
+runtime data sampled from the system. Net DIM is such a mechanism. In each
+iteration of the algorithm, it analyses a given sample of the data, compares it
+to the previous sample and if required, it can decide to change some of the
+interrupt moderation configuration fields. The data sample is composed of data
+bandwidth, the number of packets and the number of events. The time between
+samples is also measured. Net DIM compares the current and the previous data and
+returns an adjusted interrupt moderation configuration object. In some cases,
+the algorithm might decide not to change anything. The configuration fields are
+the minimum duration (microseconds) allowed between events and the maximum
+number of wanted packets per event. The Net DIM algorithm ascribes importance to
+increase bandwidth over reducing interrupt rate.
+
+
+Part II: The Net DIM Algorithm
+===============================
+
+Each iteration of the Net DIM algorithm follows these steps:
+1. Calculates new data sample.
+2. Compares it to previous sample.
+3. Makes a decision - suggests interrupt moderation configuration fields.
+4. Applies a schedule work function, which applies suggested configuration.
+
+The first two steps are straightforward, both the new and the previous data are
+supplied by the driver registered to Net DIM. The previous data is the new data
+supplied to the previous iteration. The comparison step checks the difference
+between the new and previous data and decides on the result of the last step.
+A step would result as "better" if bandwidth increases and as "worse" if
+bandwidth reduces. If there is no change in bandwidth, the packet rate is
+compared in a similar fashion - increase == "better" and decrease == "worse".
+In case there is no change in the packet rate as well, the interrupt rate is
+compared. Here the algorithm tries to optimize for lower interrupt rate so an
+increase in the interrupt rate is considered "worse" and a decrease is
+considered "better". Step #2 has an optimization for avoiding false results: it
+only considers a difference between samples as valid if it is greater than a
+certain percentage. Also, since Net DIM does not measure anything by itself, it
+assumes the data provided by the driver is valid.
+
+Step #3 decides on the suggested configuration based on the result from step #2
+and the internal state of the algorithm. The states reflect the "direction" of
+the algorithm: is it going left (reducing moderation), right (increasing
+moderation) or standing still. Another optimization is that if a decision
+to stay still is made multiple times, the interval between iterations of the
+algorithm would increase in order to reduce calculation overhead. Also, after
+"parking" on one of the most left or most right decisions, the algorithm may
+decide to verify this decision by taking a step in the other direction. This is
+done in order to avoid getting stuck in a "deep sleep" scenario. Once a
+decision is made, an interrupt moderation configuration is selected from
+the predefined profiles.
+
+The last step is to notify the registered driver that it should apply the
+suggested configuration. This is done by scheduling a work function, defined by
+the Net DIM API and provided by the registered driver.
+
+As you can see, Net DIM itself does not actively interact with the system. It
+would have trouble making the correct decisions if the wrong data is supplied to
+it and it would be useless if the work function would not apply the suggested
+configuration. This does, however, allow the registered driver some room for
+manoeuvre as it may provide partial data or ignore the algorithm suggestion
+under some conditions.
+
+
+Part III: Registering a Network Device to DIM
+==============================================
+
+Net DIM API exposes the main function net_dim(struct net_dim *dim,
+struct net_dim_sample end_sample). This function is the entry point to the Net
+DIM algorithm and has to be called every time the driver would like to check if
+it should change interrupt moderation parameters. The driver should provide two
+data structures: struct net_dim and struct net_dim_sample. Struct net_dim
+describes the state of DIM for a specific object (RX queue, TX queue,
+other queues, etc.). This includes the current selected profile, previous data
+samples, the callback function provided by the driver and more.
+Struct net_dim_sample describes a data sample, which will be compared to the
+data sample stored in struct net_dim in order to decide on the algorithm's next
+step. The sample should include bytes, packets and interrupts, measured by
+the driver.
+
+In order to use Net DIM from a networking driver, the driver needs to call the
+main net_dim() function. The recommended method is to call net_dim() on each
+interrupt. Since Net DIM has a built-in moderation and it might decide to skip
+iterations under certain conditions, there is no need to moderate the net_dim()
+calls as well. As mentioned above, the driver needs to provide an object of type
+struct net_dim to the net_dim() function call. It is advised for each entity
+using Net DIM to hold a struct net_dim as part of its data structure and use it
+as the main Net DIM API object. The struct net_dim_sample should hold the latest
+bytes, packets and interrupts count. No need to perform any calculations, just
+include the raw data.
+
+The net_dim() call itself does not return anything. Instead Net DIM relies on
+the driver to provide a callback function, which is called when the algorithm
+decides to make a change in the interrupt moderation parameters. This callback
+will be scheduled and run in a separate thread in order not to add overhead to
+the data flow. After the work is done, Net DIM algorithm needs to be set to
+the proper state in order to move to the next iteration.
+
+
+Part IV: Example
+=================
+
+The following code demonstrates how to register a driver to Net DIM. The actual
+usage is not complete but it should make the outline of the usage clear.
+
+my_driver.c:
+
+#include <linux/net_dim.h>
+
+/* Callback for net DIM to schedule on a decision to change moderation */
+void my_driver_do_dim_work(struct work_struct *work)
+{
+       /* Get struct net_dim from struct work_struct */
+       struct net_dim *dim = container_of(work, struct net_dim,
+                                          work);
+       /* Do interrupt moderation related stuff */
+       ...
+
+       /* Signal net DIM work is done and it should move to next iteration */
+       dim->state = NET_DIM_START_MEASURE;
+}
+
+/* My driver's interrupt handler */
+int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...)
+{
+       ...
+       /* A struct to hold current measured data */
+       struct net_dim_sample dim_sample;
+       ...
+       /* Initiate data sample struct with current data */
+       net_dim_sample(my_entity->events,
+                      my_entity->packets,
+                      my_entity->bytes,
+                      &dim_sample);
+       /* Call net DIM */
+       net_dim(&my_entity->dim, dim_sample);
+       ...
+}
+
+/* My entity's initialization function (my_entity was already allocated) */
+int my_driver_init_my_entity(struct my_driver_entity *my_entity, ...)
+{
+       ...
+       /* Initiate struct work_struct with my driver's callback function */
+       INIT_WORK(&my_entity->dim.work, my_driver_do_dim_work);
+       ...
+}
index bf654845556e19d0d09ad113d997ac48aa67ccaa..999eb41da81dfca0fc07926feee9eb16caed67a6 100644 (file)
@@ -7,15 +7,12 @@ socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for
 i) capture network traffic with utilities like tcpdump, ii) transmit network
 traffic, or any other that needs raw access to network interface.
 
-You can find the latest version of this document at:
-    http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap
-
 Howto can be found at:
-    http://wiki.gnu-log.net (packet_mmap)
+    https://sites.google.com/site/packetmmap/
 
 Please send your comments to
     Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
-    Johann Baudy <johann.baudy@gnu-log.net>
+    Johann Baudy
 
 -------------------------------------------------------------------------------
 + Why use PACKET_MMAP
@@ -51,17 +48,8 @@ From the user standpoint, you should use the higher level libpcap library, which
 is a de facto standard, portable across nearly all operating systems
 including Win32. 
 
-Said that, at time of this writing, official libpcap 0.8.1 is out and doesn't include
-support for PACKET_MMAP, and also probably the libpcap included in your distribution. 
-
-I'm aware of two implementations of PACKET_MMAP in libpcap:
-
-    http://wiki.ipxwarzone.com/                     (by Simon Patarin, based on libpcap 0.6.2)
-    http://public.lanl.gov/cpw/              (by Phil Wood, based on lastest libpcap)
-
-The rest of this document is intended for people who want to understand
-the low level details or want to improve libpcap by including PACKET_MMAP
-support.
+Packet MMAP support was integrated into libpcap around the time of version 1.3.0;
+TPACKET_V3 support was added in version 1.5.0
 
 --------------------------------------------------------------------------------
 + How to use mmap() directly to improve capture process
@@ -174,7 +162,7 @@ As capture, each frame contains two parts:
  /* bind socket to eth0 */
  bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
 
- A complete tutorial is available at: http://wiki.gnu-log.net/
+ A complete tutorial is available at: https://sites.google.com/site/packetmmap/
 
 By default, the user should put data at :
  frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)
index d47480b61ac6d0611c0e1cbfe378c14941f1cfb5..aca542ec125c96bdc95411359fceffcaee9898a0 100644 (file)
@@ -20,8 +20,8 @@ TCP Segmentation Offload
 
 TCP segmentation allows a device to segment a single frame into multiple
 frames with a data payload size specified in skb_shinfo()->gso_size.
-When TCP segmentation requested the bit for either SKB_GSO_TCP or
-SKB_GSO_TCP6 should be set in skb_shinfo()->gso_type and
+When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or
+SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and
 skb_shinfo()->gso_size should be set to a non-zero value.
 
 TCP segmentation is dependent on support for the use of partial checksum
@@ -153,8 +153,18 @@ To signal this, gso_size is set to the special value GSO_BY_FRAGS.
 
 Therefore, any code in the core networking stack must be aware of the
 possibility that gso_size will be GSO_BY_FRAGS and handle that case
-appropriately. (For size checks, the skb_gso_validate_*_len family of
-helpers do this automatically.)
+appropriately.
+
+There are some helpers to make this easier:
+
+ - skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if
+   an skb is an SCTP GSO skb.
+
+ - For size checks, the skb_gso_validate_*_len family of helpers correctly
+   considers GSO_BY_FRAGS.
+
+ - For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size
+   will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs.
 
 This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits
 set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE.
index 77ed00631c12fa946e27055a7a68b2753b96f73f..58b5ef75f1b746084e37463ec62020843a3c9ea9 100644 (file)
@@ -48,6 +48,9 @@ the transmit and the receive into the kernel.
 
   setsockopt(sock, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info));
 
+Transmit and receive are set separately, but the setup is the same, using either
+TLS_TX or TLS_RX.
+
 Sending TLS application data
 ----------------------------
 
@@ -79,6 +82,28 @@ for memory), or the encryption will always succeed.  If send() returns
 -ENOMEM and some data was left on the socket buffer from a previous
 call using MSG_MORE, the MSG_MORE data is left on the socket buffer.
 
+Receiving TLS application data
+------------------------------
+
+After setting the TLS_RX socket option, all recv family socket calls
+are decrypted using TLS parameters provided.  A full TLS record must
+be received before decryption can happen.
+
+  char buffer[16384];
+  recv(sock, buffer, 16384);
+
+Received data is decrypted directly in to the user buffer if it is
+large enough, and no additional allocations occur.  If the userspace
+buffer is too small, data is decrypted in the kernel and copied to
+userspace.
+
+EINVAL is returned if the TLS version in the received message does not
+match the version passed in setsockopt.
+
+EMSGSIZE is returned if the received message is too big.
+
+EBADMSG is returned if decryption failed for any other reason.
+
 Send TLS control messages
 -------------------------
 
@@ -118,6 +143,43 @@ using a record of type @record_type.
 Control message data should be provided unencrypted, and will be
 encrypted by the kernel.
 
+Receiving TLS control messages
+------------------------------
+
+TLS control messages are passed in the userspace buffer, with message
+type passed via cmsg.  If no cmsg buffer is provided, an error is
+returned if a control message is received.  Data messages may be
+received without a cmsg buffer set.
+
+  char buffer[16384];
+  char cmsg[CMSG_SPACE(sizeof(unsigned char))];
+  struct msghdr msg = {0};
+  msg.msg_control = cmsg;
+  msg.msg_controllen = sizeof(cmsg);
+
+  struct iovec msg_iov;
+  msg_iov.iov_base = buffer;
+  msg_iov.iov_len = 16384;
+
+  msg.msg_iov = &msg_iov;
+  msg.msg_iovlen = 1;
+
+  int ret = recvmsg(sock, &msg, 0 /* flags */);
+
+  struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+  if (cmsg->cmsg_level == SOL_TLS &&
+      cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
+      int record_type = *((unsigned char *)CMSG_DATA(cmsg));
+      // Do something with record_type, and control message data in
+      // buffer.
+      //
+      // Note that record_type may be == to application data (23).
+  } else {
+      // Buffer contains application data.
+  }
+
+recv will never return data from mixed types of TLS records.
+
 Integrating in to userspace TLS library
 ---------------------------------------
 
@@ -126,10 +188,10 @@ layer of a userspace TLS library.
 
 A patchset to OpenSSL to use ktls as the record layer is here:
 
-https://github.com/Mellanox/tls-openssl
+https://github.com/Mellanox/openssl/commits/tls_rx2
 
 An example of calling send directly after a handshake using
 gnutls.  Since it doesn't implement a full record layer, control
 messages are not supported:
 
-https://github.com/Mellanox/tls-af_ktls_tool
+https://github.com/ktls/af_ktls-tool/commits/RX
index 39aa9e8697ccf8f1ad3e0d7210294eff20bd9cb0..fbedcc39460bb28b156db85ac085fbb077d3fe6e 100644 (file)
@@ -36,8 +36,7 @@ import glob
 
 from docutils import nodes, statemachine
 from docutils.statemachine import ViewList
-from docutils.parsers.rst import directives
-from sphinx.util.compat import Directive
+from docutils.parsers.rst import directives, Directive
 from sphinx.ext.autodoc import AutodocReporter
 
 __version__  = '1.0'
index 35c62f522754a79f8ecaba74dd8c84a038a8928f..5992602469d894d594b829690b9b8caf7fda57f4 100644 (file)
@@ -270,6 +270,18 @@ optmem_max
 Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
 of struct cmsghdr structures with appended data.
 
+fb_tunnels_only_for_init_net
+----------------------------
+
+Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
+sit0, ip6tnl0, ip6gre0) are automatically created when a new
+network namespace is created, if corresponding tunnel is present
+in initial network namespace.
+If set to 1, these devices are not automatically created, and
+user space is responsible for creating them if needed.
+
+Default : 0  (for compatibility reasons)
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 -------------------------------------------------------
 
index 792fa8717d133e1aa7d6c73a8b948d53150e6d78..d6b3ff51a14fd96600cc9f65f8ed11cd5c64b768 100644 (file)
@@ -123,14 +123,15 @@ memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
 flag KVM_VM_MIPS_VZ.
 
 
-4.3 KVM_GET_MSR_INDEX_LIST
+4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
 
-Capability: basic
+Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
 Architectures: x86
-Type: system
+Type: system ioctl
 Parameters: struct kvm_msr_list (in/out)
 Returns: 0 on success; -1 on error
 Errors:
+  EFAULT:    the msr index list cannot be read from or written to
   E2BIG:     the msr index list is to be to fit in the array specified by
              the user.
 
@@ -139,16 +140,23 @@ struct kvm_msr_list {
        __u32 indices[0];
 };
 
-This ioctl returns the guest msrs that are supported.  The list varies
-by kvm version and host processor, but does not change otherwise.  The
-user fills in the size of the indices array in nmsrs, and in return
-kvm adjusts nmsrs to reflect the actual number of msrs and fills in
-the indices array with their numbers.
+The user fills in the size of the indices array in nmsrs, and in return
+kvm adjusts nmsrs to reflect the actual number of msrs and fills in the
+indices array with their numbers.
+
+KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported.  The list
+varies by kvm version and host processor, but does not change otherwise.
 
 Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
 not returned in the MSR list, as different vcpus can have a different number
 of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
+KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed
+to the KVM_GET_MSRS system ioctl.  This lets userspace probe host capabilities
+and processor features that are exposed via MSRs (e.g., VMX capabilities).
+This list also varies by kvm version and host processor, but does not change
+otherwise.
+
 
 4.4 KVM_CHECK_EXTENSION
 
@@ -475,14 +483,22 @@ Support for this has been removed.  Use KVM_SET_GUEST_DEBUG instead.
 
 4.18 KVM_GET_MSRS
 
-Capability: basic
+Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system)
 Architectures: x86
-Type: vcpu ioctl
+Type: system ioctl, vcpu ioctl
 Parameters: struct kvm_msrs (in/out)
-Returns: 0 on success, -1 on error
+Returns: number of msrs successfully returned;
+        -1 on error
+
+When used as a system ioctl:
+Reads the values of MSR-based features that are available for the VM.  This
+is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values.
+The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST
+in a system ioctl.
 
+When used as a vcpu ioctl:
 Reads model-specific registers from the vcpu.  Supported msr indices can
-be obtained using KVM_GET_MSR_INDEX_LIST.
+be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl.
 
 struct kvm_msrs {
        __u32 nmsrs; /* number of msrs in entries */
index dcab6dc11e3b08117456f10903ad3ac2fa29eb99..87a7506f31c2b7f9b03be131ba3b3ef1c85f360b 100644 (file)
@@ -58,6 +58,10 @@ KVM_FEATURE_PV_TLB_FLUSH           ||     9 || guest checks this feature bit
                                    ||       || before enabling paravirtualized
                                    ||       || tlb flush.
 ------------------------------------------------------------------------------
+KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
+                                   ||       || can be enabled by setting bit 2
+                                   ||       || when writing to msr 0x4b564d02
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
index 1ebecc115dc6efdbbfa76eb4ab329f5b1d8b765e..f3f0d57ced8e1827fe8e8e6dc49808b18c9253fb 100644 (file)
@@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
        when asynchronous page faults are enabled on the vcpu 0 when
        disabled. Bit 1 is 1 if asynchronous page faults can be injected
        when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
-       are delivered to L1 as #PF vmexits.
+       are delivered to L1 as #PF vmexits.  Bit 2 can be set only if
+       KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
 
        First 4 byte of 64 byte memory location will be written to by
        the hypervisor at the time of asynchronous page fault (APF)
index 756fd76b78a6f95156abb68e7405e01384a75067..71c30984e94d58e9e1de13ac0ee05ef7655a020a 100644 (file)
@@ -671,7 +671,7 @@ occupancy of the real time threads on these cores.
 # mkdir p1
 
 Move the cpus 4-7 over to p1
-# echo f0 > p0/cpus
+# echo f0 > p1/cpus
 
 View the llc occupancy snapshot
 
index f3e9d7e9ed6cbcbe4e731881ffb5203925b758db..2953e3ec9a0259f102ce40b9c28340f761c2df4e 100644 (file)
@@ -108,7 +108,7 @@ The topology of a system is described in the units of:
 
     The number of online threads is also printed in /proc/cpuinfo "siblings."
 
-  - topology_sibling_mask():
+  - topology_sibling_cpumask():
 
     The cpumask contains all online threads in the core to which a thread
     belongs.
index 93a12af4f180b1104e0c6a4cfc030d73b0d120cc..b3ea844cf228d647628d0b79aa126eb4c96460ea 100644 (file)
@@ -1238,7 +1238,7 @@ F:        drivers/clk/at91
 
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
 M:     Nicolas Ferre <nicolas.ferre@microchip.com>
-M:     Alexandre Belloni <alexandre.belloni@free-electrons.com>
+M:     Alexandre Belloni <alexandre.belloni@bootlin.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.linux4sam.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/nferre/linux-at91.git
@@ -1590,7 +1590,7 @@ ARM/Marvell Dove/MV78xx0/Orion SOC support
 M:     Jason Cooper <jason@lakedaemon.net>
 M:     Andrew Lunn <andrew@lunn.ch>
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
-M:     Gregory Clement <gregory.clement@free-electrons.com>
+M:     Gregory Clement <gregory.clement@bootlin.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/soc/dove/
@@ -1604,7 +1604,7 @@ F:        arch/arm/boot/dts/orion5x*
 ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K SOC support
 M:     Jason Cooper <jason@lakedaemon.net>
 M:     Andrew Lunn <andrew@lunn.ch>
-M:     Gregory Clement <gregory.clement@free-electrons.com>
+M:     Gregory Clement <gregory.clement@bootlin.com>
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
@@ -1999,8 +1999,10 @@ M:       Maxime Coquelin <mcoquelin.stm32@gmail.com>
 M:     Alexandre Torgue <alexandre.torgue@st.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mcoquelin/stm32.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32.git stm32-next
 N:     stm32
+F:     arch/arm/boot/dts/stm32*
+F:     arch/arm/mach-stm32/
 F:     drivers/clocksource/armv7m_systick.c
 
 ARM/TANGO ARCHITECTURE
@@ -7600,8 +7602,10 @@ F:       mm/kasan/
 F:     scripts/Makefile.kasan
 
 KCONFIG
+M:     Masahiro Yamada <yamada.masahiro@socionext.com>
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig
 L:     linux-kbuild@vger.kernel.org
-S:     Orphan
+S:     Maintained
 F:     Documentation/kbuild/kconfig-language.txt
 F:     scripts/kconfig/
 
@@ -8592,6 +8596,15 @@ S:       Maintained
 F:     Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
 F:     drivers/iio/potentiometer/mcp4531.c
 
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M:     Xue Liu <liuxuenetmail@gmail.com>
+L:     linux-wpan@vger.kernel.org
+W:     https://github.com/xueliu/mcr20a-linux
+S:     Maintained
+F:     drivers/net/ieee802154/mcr20a.c
+F:     drivers/net/ieee802154/mcr20a.h
+F:     Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
 MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
 M:     William Breathitt Gray <vilhelm.gray@gmail.com>
 L:     linux-iio@vger.kernel.org
@@ -9148,6 +9161,13 @@ F:       drivers/net/dsa/microchip/*
 F:     include/linux/platform_data/microchip-ksz.h
 F:     Documentation/devicetree/bindings/net/dsa/ksz.txt
 
+MICROCHIP LAN743X ETHERNET DRIVER
+M:     Bryan Whitehead <bryan.whitehead@microchip.com>
+M:     Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     drivers/net/ethernet/microchip/lan743x_*
+
 MICROCHIP USB251XB DRIVER
 M:     Richard Leitner <richard.leitner@skidata.com>
 L:     linux-usb@vger.kernel.org
@@ -9921,6 +9941,13 @@ F:       Documentation/ABI/stable/sysfs-bus-nvmem
 F:     include/linux/nvmem-consumer.h
 F:     include/linux/nvmem-provider.h
 
+NXP SGTL5000 DRIVER
+M:     Fabio Estevam <fabio.estevam@nxp.com>
+L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
+S:     Maintained
+F:     Documentation/devicetree/bindings/sound/sgtl5000.txt
+F:     sound/soc/codecs/sgtl5000*
+
 NXP TDA998X DRM DRIVER
 M:     Russell King <linux@armlinux.org.uk>
 S:     Supported
@@ -10323,7 +10350,7 @@ F:      drivers/oprofile/
 F:     include/linux/oprofile.h
 
 ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
-M:     Mark Fasheh <mfasheh@versity.com>
+M:     Mark Fasheh <mark@fasheh.com>
 M:     Joel Becker <jlbec@evilplan.org>
 L:     ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
 W:     http://ocfs2.wiki.kernel.org
@@ -10833,6 +10860,7 @@ F:      drivers/platform/x86/peaq-wmi.c
 PER-CPU MEMORY ALLOCATOR
 M:     Tejun Heo <tj@kernel.org>
 M:     Christoph Lameter <cl@linux.com>
+M:     Dennis Zhou <dennisszhou@gmail.com>
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
 S:     Maintained
 F:     include/linux/percpu*.h
@@ -10926,6 +10954,17 @@ L:     linux-gpio@vger.kernel.org
 S:     Supported
 F:     drivers/pinctrl/pinctrl-at91-pio4.*
 
+PIN CONTROLLER - FREESCALE
+M:     Dong Aisheng <aisheng.dong@nxp.com>
+M:     Fabio Estevam <festevam@gmail.com>
+M:     Shawn Guo <shawnguo@kernel.org>
+M:     Stefan Agner <stefan@agner.ch>
+R:     Pengutronix Kernel Team <kernel@pengutronix.de>
+L:     linux-gpio@vger.kernel.org
+S:     Maintained
+F:     drivers/pinctrl/freescale/
+F:     Documentation/devicetree/bindings/pinctrl/fsl,*
+
 PIN CONTROLLER - INTEL
 M:     Mika Westerberg <mika.westerberg@linux.intel.com>
 M:     Heikki Krogerus <heikki.krogerus@linux.intel.com>
@@ -12092,6 +12131,7 @@ M:      Sylwester Nawrocki <s.nawrocki@samsung.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
 F:     sound/soc/samsung/
+F:     Documentation/devicetree/bindings/sound/samsung*
 
 SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
 M:     Krzysztof Kozlowski <krzk@kernel.org>
index d9cf3a40eda9d20ce03ceda2ebc921a95dc2aea7..486db374d1c1ada440cc8b023c3b5c3e91b41da0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc6
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -388,7 +388,7 @@ PYTHON              = python
 CHECK          = sparse
 
 CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
-                 -Wbitwise -Wno-return-void $(CF)
+                 -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF)
 NOSTDINC_FLAGS  =
 CFLAGS_MODULE   =
 AFLAGS_MODULE   =
@@ -489,6 +489,11 @@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 endif
 
+RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
+export RETPOLINE_CFLAGS
+
 ifeq ($(config-targets),1)
 # ===========================================================================
 # *config targets only - make sure prerequisites are updated, and descend
@@ -579,10 +584,9 @@ ifeq ($(KBUILD_EXTMOD),)
 # To avoid any implicit rule to kick in, define an empty command
 $(KCONFIG_CONFIG) include/config/auto.conf.cmd: ;
 
-# If .config is newer than include/config/auto.conf, someone tinkered
-# with it and forgot to run make oldconfig.
-# if auto.conf.cmd is missing then we are probably in a cleaned tree so
-# we execute the config step to be sure to catch updated Kconfig files
+# The actual configuration files used during the build are stored in
+# include/generated/ and include/config/. Update them if .config is newer than
+# include/config/auto.conf (which mirrors .config).
 include/config/%.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd
        $(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig
 else
@@ -822,6 +826,15 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS  += $(call cc-option,-fno-strict-overflow)
 
+# clang sets -fmerge-all-constants by default as optimization, but this
+# is non-conforming behavior for C and in fact breaks the kernel, so we
+# need to disable it here generally.
+KBUILD_CFLAGS  += $(call cc-option,-fno-merge-all-constants)
+
+# for gcc -fno-merge-all-constants disables everything, but it is fine
+# to have actual conforming behavior enabled.
+KBUILD_CFLAGS  += $(call cc-option,-fmerge-constants)
+
 # Make sure -fstack-check isn't enabled (like gentoo apparently did)
 KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)
 
@@ -857,8 +870,7 @@ KBUILD_AFLAGS   += $(ARCH_AFLAGS)   $(KAFLAGS)
 KBUILD_CFLAGS   += $(ARCH_CFLAGS)   $(KCFLAGS)
 
 # Use --build-id when available.
-LDFLAGS_BUILD_ID := $(patsubst -Wl$(comma)%,%,\
-                             $(call cc-ldoption, -Wl$(comma)--build-id,))
+LDFLAGS_BUILD_ID := $(call ld-option, --build-id)
 KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
 LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
 
index 46ebf14aed4e55348b1b96aa82516348e00171c3..8a2b331e43febb724f2072498b1c4b31daa6f757 100644 (file)
@@ -6,7 +6,6 @@
  * Atomic exchange routines.
  */
 
-#define __ASM__MB
 #define ____xchg(type, args...)                __xchg ## type ## _local(args)
 #define ____cmpxchg(type, args...)     __cmpxchg ## type ## _local(args)
 #include <asm/xchg.h>
        cmpxchg_local((ptr), (o), (n));                                 \
 })
 
-#ifdef CONFIG_SMP
-#undef __ASM__MB
-#define __ASM__MB      "\tmb\n"
-#endif
 #undef ____xchg
 #undef ____cmpxchg
 #define ____xchg(type, args...)                __xchg ##type(args)
@@ -64,7 +59,6 @@
        cmpxchg((ptr), (o), (n));                                       \
 })
 
-#undef __ASM__MB
 #undef ____cmpxchg
 
 #endif /* _ALPHA_CMPXCHG_H */
index 68dfb3cb71454384dd187edfd0dded0fe4b65117..e2b59fac5257de10134c8d0c116d8a4827b76a09 100644 (file)
  * Atomic exchange.
  * Since it can be used to implement critical sections
  * it must clobber "memory" (also for interrupts in UP).
+ *
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
  */
 
 static inline unsigned long
@@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val)
 {
        unsigned long ret, tmp, addr64;
 
+       smp_mb();
        __asm__ __volatile__(
        "       andnot  %4,7,%3\n"
        "       insbl   %1,%4,%1\n"
@@ -28,12 +33,12 @@ ____xchg(_u8, volatile char *m, unsigned long val)
        "       or      %1,%2,%2\n"
        "       stq_c   %2,0(%3)\n"
        "       beq     %2,2f\n"
-               __ASM__MB
        ".subsection 2\n"
        "2:     br      1b\n"
        ".previous"
        : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
        : "r" ((long)m), "1" (val) : "memory");
+       smp_mb();
 
        return ret;
 }
@@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val)
 {
        unsigned long ret, tmp, addr64;
 
+       smp_mb();
        __asm__ __volatile__(
        "       andnot  %4,7,%3\n"
        "       inswl   %1,%4,%1\n"
@@ -52,12 +58,12 @@ ____xchg(_u16, volatile short *m, unsigned long val)
        "       or      %1,%2,%2\n"
        "       stq_c   %2,0(%3)\n"
        "       beq     %2,2f\n"
-               __ASM__MB
        ".subsection 2\n"
        "2:     br      1b\n"
        ".previous"
        : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
        : "r" ((long)m), "1" (val) : "memory");
+       smp_mb();
 
        return ret;
 }
@@ -67,17 +73,18 @@ ____xchg(_u32, volatile int *m, unsigned long val)
 {
        unsigned long dummy;
 
+       smp_mb();
        __asm__ __volatile__(
        "1:     ldl_l %0,%4\n"
        "       bis $31,%3,%1\n"
        "       stl_c %1,%2\n"
        "       beq %1,2f\n"
-               __ASM__MB
        ".subsection 2\n"
        "2:     br 1b\n"
        ".previous"
        : "=&r" (val), "=&r" (dummy), "=m" (*m)
        : "rI" (val), "m" (*m) : "memory");
+       smp_mb();
 
        return val;
 }
@@ -87,17 +94,18 @@ ____xchg(_u64, volatile long *m, unsigned long val)
 {
        unsigned long dummy;
 
+       smp_mb();
        __asm__ __volatile__(
        "1:     ldq_l %0,%4\n"
        "       bis $31,%3,%1\n"
        "       stq_c %1,%2\n"
        "       beq %1,2f\n"
-               __ASM__MB
        ".subsection 2\n"
        "2:     br 1b\n"
        ".previous"
        : "=&r" (val), "=&r" (dummy), "=m" (*m)
        : "rI" (val), "m" (*m) : "memory");
+       smp_mb();
 
        return val;
 }
@@ -128,10 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
  * store NEW in MEM.  Return the initial value in MEM.  Success is
  * indicated by comparing RETURN with OLD.
  *
- * The memory barrier should be placed in SMP only when we actually
- * make the change. If we don't change anything (so if the returned
- * prev is equal to old) then we aren't acquiring anything new and
- * we don't need any memory barrier as far I can tell.
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
+ * The trailing memory barrier is placed in SMP unconditionally, in
+ * order to guarantee that dependency ordering is preserved when a
+ * dependency is headed by an unsuccessful operation.
  */
 
 static inline unsigned long
@@ -139,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
 {
        unsigned long prev, tmp, cmp, addr64;
 
+       smp_mb();
        __asm__ __volatile__(
        "       andnot  %5,7,%4\n"
        "       insbl   %1,%5,%1\n"
@@ -150,13 +161,13 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
        "       or      %1,%2,%2\n"
        "       stq_c   %2,0(%4)\n"
        "       beq     %2,3f\n"
-               __ASM__MB
        "2:\n"
        ".subsection 2\n"
        "3:     br      1b\n"
        ".previous"
        : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
        : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+       smp_mb();
 
        return prev;
 }
@@ -166,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
 {
        unsigned long prev, tmp, cmp, addr64;
 
+       smp_mb();
        __asm__ __volatile__(
        "       andnot  %5,7,%4\n"
        "       inswl   %1,%5,%1\n"
@@ -177,13 +189,13 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
        "       or      %1,%2,%2\n"
        "       stq_c   %2,0(%4)\n"
        "       beq     %2,3f\n"
-               __ASM__MB
        "2:\n"
        ".subsection 2\n"
        "3:     br      1b\n"
        ".previous"
        : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
        : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+       smp_mb();
 
        return prev;
 }
@@ -193,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
 {
        unsigned long prev, cmp;
 
+       smp_mb();
        __asm__ __volatile__(
        "1:     ldl_l %0,%5\n"
        "       cmpeq %0,%3,%1\n"
@@ -200,13 +213,13 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
        "       mov %4,%1\n"
        "       stl_c %1,%2\n"
        "       beq %1,3f\n"
-               __ASM__MB
        "2:\n"
        ".subsection 2\n"
        "3:     br 1b\n"
        ".previous"
        : "=&r"(prev), "=&r"(cmp), "=m"(*m)
        : "r"((long) old), "r"(new), "m"(*m) : "memory");
+       smp_mb();
 
        return prev;
 }
@@ -216,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
 {
        unsigned long prev, cmp;
 
+       smp_mb();
        __asm__ __volatile__(
        "1:     ldq_l %0,%5\n"
        "       cmpeq %0,%3,%1\n"
@@ -223,13 +237,13 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
        "       mov %4,%1\n"
        "       stq_c %1,%2\n"
        "       beq %1,3f\n"
-               __ASM__MB
        "2:\n"
        ".subsection 2\n"
        "3:     br 1b\n"
        ".previous"
        : "=&r"(prev), "=&r"(cmp), "=m"(*m)
        : "r"((long) old), "r"(new), "m"(*m) : "memory");
+       smp_mb();
 
        return prev;
 }
index f3a80cf164cc92a0860ddb77f5402dd7d8856ab0..d76bf4a8374016043963f1c04d42700e64ad605a 100644 (file)
@@ -484,7 +484,6 @@ config ARC_CURR_IN_REG
 
 config ARC_EMUL_UNALIGNED
        bool "Emulate unaligned memory access (userspace only)"
-       default N
        select SYSCTL_ARCH_UNALIGN_NO_WARN
        select SYSCTL_ARCH_UNALIGN_ALLOW
        depends on ISA_ARCOMPACT
index 70aec7d6ca600c2889d0a78bc3dac5b5fcb5b967..626b694c7be75ccf7868d7423aca79fb58eef586 100644 (file)
@@ -17,6 +17,6 @@ / {
        compatible = "snps,axs101", "snps,arc-sdp";
 
        chosen {
-               bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60";
+               bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60 print-fatal-signals=1";
        };
 };
index 74d070cd3c13a723fef1a2b3cd91cd2919392762..47b74fbc403c21cc2f493f6f84d6216b7c5ef5c1 100644 (file)
@@ -214,13 +214,13 @@ adv7511_output: endpoint {
                        };
 
                        eeprom@0x54{
-                               compatible = "24c01";
+                               compatible = "atmel,24c01";
                                reg = <0x54>;
                                pagesize = <0x8>;
                        };
 
                        eeprom@0x57{
-                               compatible = "24c04";
+                               compatible = "atmel,24c04";
                                reg = <0x57>;
                                pagesize = <0x8>;
                        };
index 215cddd0b63baa4d1f865828e1df50914526e64e..0c603308aeb360880bca3a3403a74044cb1c12bc 100644 (file)
@@ -22,7 +22,7 @@ memory {
        };
 
        chosen {
-               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug";
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
        };
 
        aliases {
index 5ee96b067c085ce1f061e0aac02732d63d4ecf7e..ff2f2c70c545645f7cb38a526914378edc303aa1 100644 (file)
@@ -17,7 +17,7 @@ / {
        interrupt-parent = <&core_intc>;
 
        chosen {
-               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
        };
 
        aliases {
index 8d787b251f73746191cfea3bd176bfb2869eccce..8e2489b16b0aeecb9bc83de255ccad3eeb33efd5 100644 (file)
@@ -24,7 +24,7 @@ memory {
        };
 
        chosen {
-               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
        };
 
        aliases {
index 4f98ebf71fd836624b248aa0cd107fb892bb76b2..ed12f494721df91f4597b2ead8b1c7ecd5a4f5e2 100644 (file)
@@ -15,7 +15,7 @@ / {
        interrupt-parent = <&core_intc>;
 
        chosen {
-               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
        };
 
        aliases {
index 3c391ba565ed080cfad8b66f4c3395975eec90da..7842e5eb4ab5cbb73c1bac4d881d7b332979bdaa 100644 (file)
@@ -20,7 +20,7 @@ chosen {
                /* this is for console on PGU */
                /* bootargs = "console=tty0 consoleblank=0"; */
                /* this is for console on serial */
-               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24";
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";
        };
 
        aliases {
index 14a727cbf4c98e12590f0ad74148f24971f772f4..b8838cf2b4ec72ab119e72f367c6ef6184a72bce 100644 (file)
@@ -20,7 +20,7 @@ chosen {
                /* this is for console on PGU */
                /* bootargs = "console=tty0 consoleblank=0"; */
                /* this is for console on serial */
-               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24";
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";
        };
 
        aliases {
index 5052917d4a99490ea77d2e18cc03b97ce3722f3a..72a2c723f1f7af826b07c49a416bf3265ed39f89 100644 (file)
@@ -18,7 +18,7 @@ / {
 
        chosen {
                /* this is for console on serial */
-               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24";
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24 print-fatal-signals=1";
        };
 
        aliases {
index 257a68f3c2feef3a369093f5bade883e814b7648..309f4e6721b3e22829847f88a4da884fdc9edf93 100644 (file)
 .macro FAKE_RET_FROM_EXCPN
        lr      r9, [status32]
        bic     r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK)
-       or      r9, r9, (STATUS_L_MASK|STATUS_IE_MASK)
+       or      r9, r9, STATUS_IE_MASK
        kflag   r9
 .endm
 
index f61a52b01625b106143b089570b327b585e5a254..5fe84e481654ebe76a483bf81c2a11ffe870322d 100644 (file)
@@ -22,10 +22,79 @@ static DEFINE_RAW_SPINLOCK(mcip_lock);
 
 static char smp_cpuinfo_buf[128];
 
+/*
+ * Set mask to halt GFRC if any online core in SMP cluster is halted.
+ * Only works for ARC HS v3.0+, on earlier versions has no effect.
+ */
+static void mcip_update_gfrc_halt_mask(int cpu)
+{
+       struct bcr_generic gfrc;
+       unsigned long flags;
+       u32 gfrc_halt_mask;
+
+       READ_BCR(ARC_REG_GFRC_BUILD, gfrc);
+
+       /*
+        * CMD_GFRC_SET_CORE and CMD_GFRC_READ_CORE commands were added in
+        * GFRC 0x3 version.
+        */
+       if (gfrc.ver < 0x3)
+               return;
+
+       raw_spin_lock_irqsave(&mcip_lock, flags);
+
+       __mcip_cmd(CMD_GFRC_READ_CORE, 0);
+       gfrc_halt_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+       gfrc_halt_mask |= BIT(cpu);
+       __mcip_cmd_data(CMD_GFRC_SET_CORE, 0, gfrc_halt_mask);
+
+       raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_update_debug_halt_mask(int cpu)
+{
+       u32 mcip_mask = 0;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&mcip_lock, flags);
+
+       /*
+        * mcip_mask is same for CMD_DEBUG_SET_SELECT and CMD_DEBUG_SET_MASK
+        * commands. So read it once instead of reading both CMD_DEBUG_READ_MASK
+        * and CMD_DEBUG_READ_SELECT.
+        */
+       __mcip_cmd(CMD_DEBUG_READ_SELECT, 0);
+       mcip_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+
+       mcip_mask |= BIT(cpu);
+
+       __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, mcip_mask);
+       /*
+        * Parameter specified halt cause:
+        * STATUS32[H]/actionpoint/breakpoint/self-halt
+        * We choose all of them (0xF).
+        */
+       __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xF, mcip_mask);
+
+       raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
 static void mcip_setup_per_cpu(int cpu)
 {
+       struct mcip_bcr mp;
+
+       READ_BCR(ARC_REG_MCIP_BCR, mp);
+
        smp_ipi_irq_setup(cpu, IPI_IRQ);
        smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ);
+
+       /* Update GFRC halt mask as new CPU came online */
+       if (mp.gfrc)
+               mcip_update_gfrc_halt_mask(cpu);
+
+       /* Update MCIP debug mask as new CPU came online */
+       if (mp.dbg)
+               mcip_update_debug_halt_mask(cpu);
 }
 
 static void mcip_ipi_send(int cpu)
@@ -101,11 +170,6 @@ static void mcip_probe_n_setup(void)
                IS_AVAIL1(mp.gfrc, "GFRC"));
 
        cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
-
-       if (mp.dbg) {
-               __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf);
-               __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf);
-       }
 }
 
 struct plat_smp_ops plat_smp_ops = {
index 9d27331fe69a0eb441b34e51324138ef375070b0..b2cae79a25d716165eaf65060cb8ed0be11f3b6c 100644 (file)
@@ -51,7 +51,7 @@ static const struct id_to_str arc_cpu_rel[] = {
        { 0x51, "R2.0" },
        { 0x52, "R2.1" },
        { 0x53, "R3.0" },
-       { 0x54, "R4.0" },
+       { 0x54, "R3.10a" },
 #endif
        { 0x00, NULL   }
 };
@@ -373,7 +373,7 @@ static void arc_chk_core_config(void)
 {
        struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
        int saved = 0, present = 0;
-       char *opt_nm = NULL;;
+       char *opt_nm = NULL;
 
        if (!cpu->extn.timer0)
                panic("Timer0 is not present!\n");
index efe8b4200a676529a9f3f0af52d50faca176a1e3..21d86c36692b4f9bbf2e6c255832d7c98377c545 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/reboot.h>
 #include <linux/irqdomain.h>
 #include <linux/export.h>
+#include <linux/of_fdt.h>
 
 #include <asm/processor.h>
 #include <asm/setup.h>
@@ -47,6 +48,42 @@ void __init smp_prepare_boot_cpu(void)
 {
 }
 
+static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask)
+{
+       unsigned long dt_root = of_get_flat_dt_root();
+       const char *buf;
+
+       buf = of_get_flat_dt_prop(dt_root, name, NULL);
+       if (!buf)
+               return -EINVAL;
+
+       if (cpulist_parse(buf, cpumask))
+               return -EINVAL;
+
+       return 0;
+}
+
+/*
+ * Read from DeviceTree and setup cpu possible mask. If there is no
+ * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist.
+ */
+static void __init arc_init_cpu_possible(void)
+{
+       struct cpumask cpumask;
+
+       if (arc_get_cpu_map("possible-cpus", &cpumask)) {
+               pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n",
+                       NR_CPUS);
+
+               cpumask_setall(&cpumask);
+       }
+
+       if (!cpumask_test_cpu(0, &cpumask))
+               panic("Master cpu (cpu[0]) is missed in cpu possible mask!");
+
+       init_cpu_possible(&cpumask);
+}
+
 /*
  * Called from setup_arch() before calling setup_processor()
  *
@@ -58,10 +95,7 @@ void __init smp_prepare_boot_cpu(void)
  */
 void __init smp_init_cpus(void)
 {
-       unsigned int i;
-
-       for (i = 0; i < NR_CPUS; i++)
-               set_cpu_possible(i, true);
+       arc_init_cpu_possible();
 
        if (plat_smp_ops.init_early_smp)
                plat_smp_ops.init_early_smp();
@@ -70,16 +104,12 @@ void __init smp_init_cpus(void)
 /* called from init ( ) =>  process 1 */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-       int i;
-
        /*
         * if platform didn't set the present map already, do it now
         * boot cpu is set to present already by init/main.c
         */
-       if (num_present_cpus() <= 1) {
-               for (i = 0; i < max_cpus; i++)
-                       set_cpu_present(i, true);
-       }
+       if (num_present_cpus() <= 1)
+               init_cpu_present(cpu_possible_mask);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
index 333daab7def028761478b4215bf7f50335d4f05d..183391d4d33a4138d04418da4b90d61efe38c6c4 100644 (file)
@@ -366,7 +366,7 @@ static void init_unwind_hdr(struct unwind_table *table,
        return;
 
 ret_err:
-       panic("Attention !!! Dwarf FDE parsing errors\n");;
+       panic("Attention !!! Dwarf FDE parsing errors\n");
 }
 
 #ifdef CONFIG_MODULES
index eee924dfffa6e1baf08221ee5e7e2cd23937d782..2072f3451e9c2127a076873113292a8ae2b5b9cb 100644 (file)
@@ -780,7 +780,10 @@ noinline static void slc_entire_op(const int op)
 
        write_aux_reg(r, ctrl);
 
-       write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
+       if (op & OP_INV)        /* Inv or flush-n-inv use same cmd reg */
+               write_aux_reg(ARC_REG_SLC_INVALIDATE, 0x1);
+       else
+               write_aux_reg(ARC_REG_SLC_FLUSH, 0x1);
 
        /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
        read_aux_reg(r);
index 8b2fa9a49967439b1a6a14973b5f2bf1a547b320..c28afb24239384d0c2395888bab51abdbc4bd3aa 100644 (file)
@@ -56,6 +56,7 @@
 
 /dts-v1/;
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/gpio/gpio.h>
 #include "armada-370.dtsi"
 
@@ -243,6 +244,8 @@ switch: switch@10 {
                #address-cells = <1>;
                #size-cells = <0>;
                reg = <0x10>;
+               interrupt-controller;
+               #interrupt-cells = <2>;
 
                ports {
                        #address-cells = <1>;
@@ -278,6 +281,35 @@ fixed-link {
                                };
                        };
                };
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       switchphy0: switchphy@0 {
+                               reg = <0>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+
+                       switchphy1: switchphy@1 {
+                               reg = <1>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+
+                       switchphy2: switchphy@2 {
+                               reg = <2>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+
+                       switchphy3: switchphy@3 {
+                               reg = <3>;
+                               interrupt-parent = <&switch>;
+                               interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+                       };
+               };
        };
 };
 
index 18045c38bcf1af1f01b4732ae200cf274e3b6eec..db7cded1b7ada534ef34f49c27cd0dce34d7cdcc 100644 (file)
@@ -55,7 +55,7 @@ gic: interrupt-controller@3ff00100 {
                      <0x3ff00100 0x100>;
        };
 
-       smc@0x3404c000 {
+       smc@3404c000 {
                compatible = "brcm,bcm11351-smc", "brcm,kona-smc";
                reg = <0x3404c000 0x400>; /* 1 KiB in SRAM */
        };
index 6dde95f21cef6b53c995b7311ddade7988dd965a..266f2611dc22126705aab35cb6cb67601e9f3704 100644 (file)
@@ -55,7 +55,7 @@ gic: interrupt-controller@3ff00100 {
                      <0x3ff00100 0x100>;
        };
 
-       smc@0x3404e000 {
+       smc@3404e000 {
                compatible = "brcm,bcm21664-smc", "brcm,kona-smc";
                reg = <0x3404e000 0x400>; /* 1 KiB in SRAM */
        };
index 0e3d2a5ff2081425bd5ccb0097736ebdef3f99a5..a5c3824c80563cf3222f77af9a6ffe39e22b1e0f 100644 (file)
@@ -18,10 +18,10 @@ cpu@0 {
        soc {
                ranges = <0x7e000000 0x20000000 0x02000000>;
                dma-ranges = <0x40000000 0x00000000 0x20000000>;
+       };
 
-               arm-pmu {
-                       compatible = "arm,arm1176-pmu";
-               };
+       arm-pmu {
+               compatible = "arm,arm1176-pmu";
        };
 };
 
index 1dfd7644277736fb58fa4ad8352091677a000d5e..c933e841388421045d908e0147012d0fca0ae670 100644 (file)
@@ -9,19 +9,19 @@ soc {
                         <0x40000000 0x40000000 0x00001000>;
                dma-ranges = <0xc0000000 0x00000000 0x3f000000>;
 
-               local_intc: local_intc {
+               local_intc: local_intc@40000000 {
                        compatible = "brcm,bcm2836-l1-intc";
                        reg = <0x40000000 0x100>;
                        interrupt-controller;
                        #interrupt-cells = <2>;
                        interrupt-parent = <&local_intc>;
                };
+       };
 
-               arm-pmu {
-                       compatible = "arm,cortex-a7-pmu";
-                       interrupt-parent = <&local_intc>;
-                       interrupts = <9 IRQ_TYPE_LEVEL_HIGH>;
-               };
+       arm-pmu {
+               compatible = "arm,cortex-a7-pmu";
+               interrupt-parent = <&local_intc>;
+               interrupts = <9 IRQ_TYPE_LEVEL_HIGH>;
        };
 
        timer {
index efa7d3387ab287fb72e66659644a36903c85e5fc..7704bb029605ed94f3ea8aad77dfe301e4b0beac 100644 (file)
@@ -8,7 +8,7 @@ soc {
                         <0x40000000 0x40000000 0x00001000>;
                dma-ranges = <0xc0000000 0x00000000 0x3f000000>;
 
-               local_intc: local_intc {
+               local_intc: local_intc@40000000 {
                        compatible = "brcm,bcm2836-l1-intc";
                        reg = <0x40000000 0x100>;
                        interrupt-controller;
index 18db25a5a66e0c1685457cd7959bdf239db5c76c..9d293decf8d353cdda502f5035212d967c364973 100644 (file)
@@ -465,7 +465,7 @@ thermal: thermal@7e212000 {
                        status = "disabled";
                };
 
-               aux: aux@0x7e215000 {
+               aux: aux@7e215000 {
                        compatible = "brcm,bcm2835-aux";
                        #clock-cells = <1>;
                        reg = <0x7e215000 0x8>;
index 6a44b8021702176c63d09e55925ffd3d7e02994e..f0e2008f7490146a22ae06fbc310dbe5cb18c4d5 100644 (file)
@@ -49,7 +49,7 @@ chosen {
 
        memory {
                device_type = "memory";
-               reg = <0x60000000 0x80000000>;
+               reg = <0x60000000 0x20000000>;
        };
 
        gpio-restart {
index 08568ce24d06fc9576b57c373ab96aedc80c23b7..da8bb9d60f99e6b5af6a3e7ff4c30215f6a91844 100644 (file)
@@ -269,7 +269,7 @@ conf7 {
 
                sata: sata@46000000 {
                        /* The ROM uses this muxmode */
-                       cortina,gemini-ata-muxmode = <3>;
+                       cortina,gemini-ata-muxmode = <0>;
                        cortina,gemini-enable-sata-bridge;
                        status = "okay";
                };
index cf42c2f5cdc7f9d13409efbd8db6d18d969f2511..1281bc39b7ab87a430b5edaaacf187b82526186d 100644 (file)
@@ -42,7 +42,7 @@
 
 /dts-v1/;
 
-#include "imx6q.dtsi"
+#include "imx6dl.dtsi"
 #include "imx6qdl-icore-rqs.dtsi"
 
 / {
index c1aa7a4518fbaca19e734795e25b4767a06a6005..a30ee9fcb3ae537dacabc5ff8e942ef26933bcfd 100644 (file)
@@ -71,6 +71,8 @@ nand@0,0 {
 };
 
 &i2c1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&i2c1_pins>;
        clock-frequency = <2600000>;
 
        twl: twl@48 {
@@ -189,7 +191,12 @@ OMAP3_CORE1_IOPAD(0x21b8, PIN_INPUT | MUX_MODE0)   /* hsusb0_data7.hsusb0_data7 */
                >;
        };
 
-
+       i2c1_pins: pinmux_i2c1_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */
+                       OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */
+               >;
+       };
 };
 
 &omap3_pmx_wkup {
index b50b796e15c778237926e91848e17802f584eb7c..47915447a82660c9a459b7041886be1cb6e52f1a 100644 (file)
@@ -66,6 +66,8 @@ nand@0,0 {
 };
 
 &i2c1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&i2c1_pins>;
        clock-frequency = <2600000>;
 
        twl: twl@48 {
@@ -136,6 +138,12 @@ OMAP3_CORE1_IOPAD(0x21b6, PIN_INPUT | MUX_MODE0)   /* hsusb0_data6.hsusb0_data6 */
                        OMAP3_CORE1_IOPAD(0x21b8, PIN_INPUT | MUX_MODE0)        /* hsusb0_data7.hsusb0_data7 */
                >;
        };
+       i2c1_pins: pinmux_i2c1_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */
+                       OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */
+               >;
+       };
 };
 
 &uart2 {
index ec2c8baef62ac00bf40b7ae1d28da2eab3a327d0..592e17fd4eeb7ccd1faa8f9d16635cf562092982 100644 (file)
@@ -47,7 +47,7 @@ btn1 {
                        gpios = <&gpio3 19 GPIO_ACTIVE_LOW>;    /* gpio3_83 */
                        wakeup-source;
                        autorepeat;
-                       debounce_interval = <50>;
+                       debounce-interval = <50>;
                };
        };
 
index 3b704cfed69ac1f7b39343925fb2037c9e78e10b..a97458112ff6e80ca198fe1377521372973e8ca4 100644 (file)
@@ -280,7 +280,7 @@ sdio: dwmmc@10218000 {
                max-frequency = <37500000>;
                clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
                         <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
                resets = <&cru SRST_SDIO>;
@@ -298,7 +298,7 @@ emmc: dwmmc@1021c000 {
                max-frequency = <37500000>;
                clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
                         <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                default-sample-phase = <158>;
                disable-wp;
                dmas = <&pdma 12>;
index 780ec3a99b21f857b414d4d47b8e773cd28c7788..341deaf62ff621ad9122b7b9c2c9950edf5eaa71 100644 (file)
@@ -621,7 +621,7 @@ sdmmc: dwmmc@30000000 {
                interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,
                         <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                pinctrl-names = "default";
                pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>;
@@ -634,7 +634,7 @@ sdio: dwmmc@30010000 {
                interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
                         <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                pinctrl-names = "default";
                pinctrl-0 = <&sdio_clk &sdio_cmd &sdio_bus4>;
@@ -649,7 +649,7 @@ emmc: dwmmc@30020000 {
                max-frequency = <37500000>;
                clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
                         <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                bus-width = <8>;
                default-sample-phase = <158>;
                fifo-depth = <0x100>;
index 99cfae875e12e6e3ec9e333c7534a058bbe0a3e8..5eae4776ffdeece372e74cc2ba799103b99cb04f 100644 (file)
@@ -110,26 +110,6 @@ vdd_misc_1v8: vdd-misc-1v8 {
        };
 };
 
-&cpu0 {
-       cpu0-supply = <&vdd_cpu>;
-       operating-points = <
-               /* KHz    uV */
-               1800000 1400000
-               1608000 1350000
-               1512000 1300000
-               1416000 1200000
-               1200000 1100000
-               1008000 1050000
-                816000 1000000
-                696000  950000
-                600000  900000
-                408000  900000
-                312000  900000
-                216000  900000
-                126000  900000
-       >;
-};
-
 &emmc {
        status = "okay";
        bus-width = <8>;
index 8a74efdb636062e28e309e130f557e5bc582d65f..240e7a23d81ff3cc2eb2facad8a126970775a152 100644 (file)
@@ -56,7 +56,7 @@ global_timer: timer@8000200 {
                        clocks = <&topclk ZX296702_A9_PERIPHCLK>;
                };
 
-               l2cc: l2-cache-controller@0x00c00000 {
+               l2cc: l2-cache-controller@c00000 {
                        compatible = "arm,pl310-cache";
                        reg = <0x00c00000 0x1000>;
                        cache-unified;
@@ -67,30 +67,30 @@ l2cc: l2-cache-controller@0x00c00000 {
                        arm,double-linefill-incr = <0>;
                };
 
-               pcu: pcu@0xa0008000 {
+               pcu: pcu@a0008000 {
                        compatible = "zte,zx296702-pcu";
                        reg = <0xa0008000 0x1000>;
                };
 
-               topclk: topclk@0x09800000 {
+               topclk: topclk@9800000 {
                        compatible = "zte,zx296702-topcrm-clk";
                        reg = <0x09800000 0x1000>;
                        #clock-cells = <1>;
                };
 
-               lsp1clk: lsp1clk@0x09400000 {
+               lsp1clk: lsp1clk@9400000 {
                        compatible = "zte,zx296702-lsp1crpm-clk";
                        reg = <0x09400000 0x1000>;
                        #clock-cells = <1>;
                };
 
-               lsp0clk: lsp0clk@0x0b000000 {
+               lsp0clk: lsp0clk@b000000 {
                        compatible = "zte,zx296702-lsp0crpm-clk";
                        reg = <0x0b000000 0x1000>;
                        #clock-cells = <1>;
                };
 
-               uart0: serial@0x09405000 {
+               uart0: serial@9405000 {
                        compatible = "zte,zx296702-uart";
                        reg = <0x09405000 0x1000>;
                        interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
@@ -98,7 +98,7 @@ uart0: serial@0x09405000 {
                        status = "disabled";
                };
 
-               uart1: serial@0x09406000 {
+               uart1: serial@9406000 {
                        compatible = "zte,zx296702-uart";
                        reg = <0x09406000 0x1000>;
                        interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
@@ -106,7 +106,7 @@ uart1: serial@0x09406000 {
                        status = "disabled";
                };
 
-               mmc0: mmc@0x09408000 {
+               mmc0: mmc@9408000 {
                        compatible = "snps,dw-mshc";
                        #address-cells = <1>;
                        #size-cells = <0>;
@@ -119,7 +119,7 @@ mmc0: mmc@0x09408000 {
                        status = "disabled";
                };
 
-               mmc1: mmc@0x0b003000 {
+               mmc1: mmc@b003000 {
                        compatible = "snps,dw-mshc";
                        #address-cells = <1>;
                        #size-cells = <0>;
@@ -132,7 +132,7 @@ mmc1: mmc@0x0b003000 {
                        status = "disabled";
                };
 
-               sysctrl: sysctrl@0xa0007000 {
+               sysctrl: sysctrl@a0007000 {
                        compatible = "zte,sysctrl", "syscon";
                        reg = <0xa0007000 0x1000>;
                };
index 2f145c4af93a0fb387d86fea028d0c82d0041be8..92674f247a12a48ee603f992c075a8d6ac5c7d71 100644 (file)
@@ -319,7 +319,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y
 CONFIG_RC_CORE=m
 CONFIG_MEDIA_CONTROLLER=y
 CONFIG_VIDEO_V4L2_SUBDEV_API=y
-CONFIG_LIRC=m
+CONFIG_LIRC=y
 CONFIG_RC_DEVICES=y
 CONFIG_IR_RX51=m
 CONFIG_V4L_PLATFORM_DRIVERS=y
index 629f8e9981f1ee775afa792d5cd86ddbded76493..cf2701cb0de8c67b605a19bc2f21bc1aa34daad2 100644 (file)
@@ -83,7 +83,7 @@ static void dummy_clock_access(struct timespec64 *ts)
 }
 
 static clock_access_fn __read_persistent_clock = dummy_clock_access;
-static clock_access_fn __read_boot_clock = dummy_clock_access;;
+static clock_access_fn __read_boot_clock = dummy_clock_access;
 
 void read_persistent_clock64(struct timespec64 *ts)
 {
index 5638ce0c95241f7f3afd3e994aaa9993347c9be7..63d6b404d88e39bf581c0434aff5d6b0e951279f 100644 (file)
@@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
 KVM=../../../../virt/kvm
 
+CFLAGS_ARMV7VE            :=$(call cc-option, -march=armv7ve)
+
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
@@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
 obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
 obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
+CFLAGS_banked-sr.o        += $(CFLAGS_ARMV7VE)
+
 obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
+CFLAGS_switch.o                   += $(CFLAGS_ARMV7VE)
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
index 111bda8cdebdc7e59789103838087920aedf0efe..be4b8b0a40ade5c8e412bbc75f2b311c389aa979 100644 (file)
 
 #include <asm/kvm_hyp.h>
 
+/*
+ * gcc before 4.9 doesn't understand -march=armv7ve, so we have to
+ * trick the assembler.
+ */
 __asm__(".arch_extension     virt");
 
 void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
index ee1f83b1a3324383b09106f01b830ea2802e210d..4c89a8e9a2e374cedfe92941661f38c2b6bc3b10 100644 (file)
@@ -69,7 +69,7 @@ static void clps711x_restart(enum reboot_mode mode, const char *cmd)
        soft_restart(0);
 }
 
-static const char *clps711x_compat[] __initconst = {
+static const char *const clps711x_compat[] __initconst = {
        "cirrus,ep7209",
        NULL
 };
index e457f299cd4430d359063788886d244a53d39f12..d6b11907380c83ddce9bc4998835a11e7a466ec7 100644 (file)
@@ -368,7 +368,7 @@ static struct spi_eeprom at25640a = {
        .flags          = EE_ADDR2,
 };
 
-static struct spi_board_info dm355_evm_spi_info[] __initconst = {
+static const struct spi_board_info dm355_evm_spi_info[] __initconst = {
        {
                .modalias       = "at25",
                .platform_data  = &at25640a,
index be997243447b949699fd116970ec0aacaffbf444..fad9a5611a5d276ce15f0fcfe9aedcc9a5d03802 100644 (file)
@@ -217,7 +217,7 @@ static struct spi_eeprom at25640a = {
        .flags          = EE_ADDR2,
 };
 
-static struct spi_board_info dm355_leopard_spi_info[] __initconst = {
+static const struct spi_board_info dm355_leopard_spi_info[] __initconst = {
        {
                .modalias       = "at25",
                .platform_data  = &at25640a,
index e75741fb2c1da095bd59ba4e0f411a6274ce6255..e3780986d2a3b40aff033f0a22759be80272cf39 100644 (file)
@@ -726,7 +726,7 @@ static struct spi_eeprom at25640 = {
        .flags          = EE_ADDR2,
 };
 
-static struct spi_board_info dm365_evm_spi_info[] __initconst = {
+static const struct spi_board_info dm365_evm_spi_info[] __initconst = {
        {
                .modalias       = "at25",
                .platform_data  = &at25640,
index 6b32dc527edcd58396dc94781992f3970cc8e793..2c20599cc3506326eddc2993afd8df2bd308e268 100644 (file)
@@ -41,7 +41,7 @@ config MACH_ARMADA_375
        depends on ARCH_MULTI_V7
        select ARMADA_370_XP_IRQ
        select ARM_ERRATA_720789
-       select ARM_ERRATA_753970
+       select PL310_ERRATA_753970
        select ARM_GIC
        select ARMADA_375_CLK
        select HAVE_ARM_SCU
@@ -57,7 +57,7 @@ config MACH_ARMADA_38X
        bool "Marvell Armada 380/385 boards"
        depends on ARCH_MULTI_V7
        select ARM_ERRATA_720789
-       select ARM_ERRATA_753970
+       select PL310_ERRATA_753970
        select ARM_GIC
        select ARM_GLOBAL_TIMER
        select CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
index 43e3e188f521341884d0a6e280d5724ef606e13f..fa512413a47172212483ebec6811bc5547aa729b 100644 (file)
@@ -1011,17 +1011,17 @@ static int clk_debugfs_register_one(struct clk *c)
                return -ENOMEM;
        c->dent = d;
 
-       d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount);
+       d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount);
        if (!d) {
                err = -ENOMEM;
                goto err_out;
        }
-       d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate);
+       d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate);
        if (!d) {
                err = -ENOMEM;
                goto err_out;
        }
-       d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags);
+       d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags);
        if (!d) {
                err = -ENOMEM;
                goto err_out;
index 4bb6751864a50e046e74c0952ad75571e1d979d0..fc5fb776a7101234bd64da673815d10a0b75f0f2 100644 (file)
@@ -299,8 +299,6 @@ static void irq_save_context(void)
        if (soc_is_dra7xx())
                return;
 
-       if (!sar_base)
-               sar_base = omap4_get_sar_ram_base();
        if (wakeupgen_ops && wakeupgen_ops->save_context)
                wakeupgen_ops->save_context();
 }
@@ -598,6 +596,8 @@ static int __init wakeupgen_init(struct device_node *node,
        irq_hotplug_init();
        irq_pm_init();
 
+       sar_base = omap4_get_sar_ram_base();
+
        return 0;
 }
 IRQCHIP_DECLARE(ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init);
index 124f9af34a15a3145edecb30108ced8a3b13a2f5..34156eca8e234f45745e849fb460fd42381cfa76 100644 (file)
@@ -977,6 +977,9 @@ static int _enable_clocks(struct omap_hwmod *oh)
 
        pr_debug("omap_hwmod: %s: enabling clocks\n", oh->name);
 
+       if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
+               _enable_optional_clocks(oh);
+
        if (oh->_clk)
                clk_enable(oh->_clk);
 
@@ -985,9 +988,6 @@ static int _enable_clocks(struct omap_hwmod *oh)
                        clk_enable(os->_clk);
        }
 
-       if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
-               _enable_optional_clocks(oh);
-
        /* The opt clocks are controlled by the device driver. */
 
        return 0;
index 366158a54fcd8beae9ff50d712e1b5c63f87d456..6f68576e56956a635acae35af565d09bb2ff2d0f 100644 (file)
@@ -186,7 +186,7 @@ static void omap_pm_end(void)
        cpu_idle_poll_ctrl(false);
 }
 
-static void omap_pm_finish(void)
+static void omap_pm_wake(void)
 {
        if (soc_is_omap34xx())
                omap_prcm_irq_complete();
@@ -196,7 +196,7 @@ static const struct platform_suspend_ops omap_pm_ops = {
        .begin          = omap_pm_begin,
        .end            = omap_pm_end,
        .enter          = omap_pm_enter,
-       .finish         = omap_pm_finish,
+       .wake           = omap_pm_wake,
        .valid          = suspend_valid_only_mem,
 };
 
index ece09c9461f78d9b3908095615a688522b69e9b3..d61fbd7a2840a4980205c16d1c675a957c6292c8 100644 (file)
@@ -156,12 +156,6 @@ static struct clock_event_device clockevent_gpt = {
        .tick_resume            = omap2_gp_timer_shutdown,
 };
 
-static struct property device_disabled = {
-       .name = "status",
-       .length = sizeof("disabled"),
-       .value = "disabled",
-};
-
 static const struct of_device_id omap_timer_match[] __initconst = {
        { .compatible = "ti,omap2420-timer", },
        { .compatible = "ti,omap3430-timer", },
@@ -203,8 +197,17 @@ static struct device_node * __init omap_get_timer_dt(const struct of_device_id *
                                  of_get_property(np, "ti,timer-secure", NULL)))
                        continue;
 
-               if (!of_device_is_compatible(np, "ti,omap-counter32k"))
-                       of_add_property(np, &device_disabled);
+               if (!of_device_is_compatible(np, "ti,omap-counter32k")) {
+                       struct property *prop;
+
+                       prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+                       if (!prop)
+                               return NULL;
+                       prop->name = "status";
+                       prop->value = "disabled";
+                       prop->length = strlen(prop->value);
+                       of_add_property(np, prop);
+               }
                return np;
        }
 
index 2a7bb6ccdcb7eb219f515c6e0f1ba2bfe573a349..a810f4dd34b1e266a001f20a920f421ba7dcf3e8 100644 (file)
@@ -58,7 +58,6 @@ config MACH_KUROBOX_PRO
 
 config MACH_DNS323
        bool "D-Link DNS-323"
-       select GENERIC_NET_UTILS
        select I2C_BOARDINFO if I2C
        help
          Say 'Y' here if you want your kernel to support the
@@ -66,7 +65,6 @@ config MACH_DNS323
 
 config MACH_TS209
        bool "QNAP TS-109/TS-209"
-       select GENERIC_NET_UTILS
        help
          Say 'Y' here if you want your kernel to support the
          QNAP TS-109/TS-209 platform.
@@ -101,7 +99,6 @@ config MACH_LINKSTATION_LS_HGL
 
 config MACH_TS409
        bool "QNAP TS-409"
-       select GENERIC_NET_UTILS
        help
          Say 'Y' here if you want your kernel to support the
          QNAP TS-409 platform.
index cd483bfb5ca82cd3d6289a47e6cc0a56f7787e1f..d13344b2ddcd4ef0ad6c8b8554733a0fc6063610 100644 (file)
@@ -173,10 +173,42 @@ static struct mv643xx_eth_platform_data dns323_eth_data = {
        .phy_addr = MV643XX_ETH_PHY_ADDR(8),
 };
 
+/* dns323_parse_hex_*() taken from tsx09-common.c; should a common copy of these
+ * functions be kept somewhere?
+ */
+static int __init dns323_parse_hex_nibble(char n)
+{
+       if (n >= '0' && n <= '9')
+               return n - '0';
+
+       if (n >= 'A' && n <= 'F')
+               return n - 'A' + 10;
+
+       if (n >= 'a' && n <= 'f')
+               return n - 'a' + 10;
+
+       return -1;
+}
+
+static int __init dns323_parse_hex_byte(const char *b)
+{
+       int hi;
+       int lo;
+
+       hi = dns323_parse_hex_nibble(b[0]);
+       lo = dns323_parse_hex_nibble(b[1]);
+
+       if (hi < 0 || lo < 0)
+               return -1;
+
+       return (hi << 4) | lo;
+}
+
 static int __init dns323_read_mac_addr(void)
 {
        u_int8_t addr[6];
-       void __iomem *mac_page;
+       int i;
+       char *mac_page;
 
        /* MAC address is stored as a regular ol' string in /dev/mtdblock4
         * (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80).
@@ -185,8 +217,23 @@ static int __init dns323_read_mac_addr(void)
        if (!mac_page)
                return -ENOMEM;
 
-       if (!mac_pton((__force const char *) mac_page, addr))
-               goto error_fail;
+       /* Sanity check the string we're looking at */
+       for (i = 0; i < 5; i++) {
+               if (*(mac_page + (i * 3) + 2) != ':') {
+                       goto error_fail;
+               }
+       }
+
+       for (i = 0; i < 6; i++) {
+               int byte;
+
+               byte = dns323_parse_hex_byte(mac_page + (i * 3));
+               if (byte < 0) {
+                       goto error_fail;
+               }
+
+               addr[i] = byte;
+       }
 
        iounmap(mac_page);
        printk("DNS-323: Found ethernet MAC address: %pM\n", addr);
index 89774985d3803fbc8c84a7eb993a7d3e18bf0d75..905d4f2dd0b827938862f1a089e18651eea2757f 100644 (file)
@@ -53,12 +53,53 @@ struct mv643xx_eth_platform_data qnap_tsx09_eth_data = {
        .phy_addr       = MV643XX_ETH_PHY_ADDR(8),
 };
 
+static int __init qnap_tsx09_parse_hex_nibble(char n)
+{
+       if (n >= '0' && n <= '9')
+               return n - '0';
+
+       if (n >= 'A' && n <= 'F')
+               return n - 'A' + 10;
+
+       if (n >= 'a' && n <= 'f')
+               return n - 'a' + 10;
+
+       return -1;
+}
+
+static int __init qnap_tsx09_parse_hex_byte(const char *b)
+{
+       int hi;
+       int lo;
+
+       hi = qnap_tsx09_parse_hex_nibble(b[0]);
+       lo = qnap_tsx09_parse_hex_nibble(b[1]);
+
+       if (hi < 0 || lo < 0)
+               return -1;
+
+       return (hi << 4) | lo;
+}
+
 static int __init qnap_tsx09_check_mac_addr(const char *addr_str)
 {
        u_int8_t addr[6];
+       int i;
 
-       if (!mac_pton(addr_str, addr))
-               return -1;
+       for (i = 0; i < 6; i++) {
+               int byte;
+
+               /*
+                * Enforce "xx:xx:xx:xx:xx:xx\n" format.
+                */
+               if (addr_str[(i * 3) + 2] != ((i < 5) ? ':' : '\n'))
+                       return -1;
+
+               byte = qnap_tsx09_parse_hex_byte(addr_str + (i * 3));
+               if (byte < 0)
+                       return -1;
+               addr[i] = byte;
+       }
 
        printk(KERN_INFO "tsx09: found ethernet mac address %pM\n", addr);
 
@@ -77,12 +118,12 @@ void __init qnap_tsx09_find_mac_addr(u32 mem_base, u32 size)
        unsigned long addr;
 
        for (addr = mem_base; addr < (mem_base + size); addr += 1024) {
-               void __iomem *nor_page;
+               char *nor_page;
                int ret = 0;
 
                nor_page = ioremap(addr, 1024);
                if (nor_page != NULL) {
-                       ret = qnap_tsx09_check_mac_addr((__force const char *)nor_page);
+                       ret = qnap_tsx09_check_mac_addr(nor_page);
                        iounmap(nor_page);
                }
 
index aff6994950ba6db7eb6579a90cc94e5b2bfc7329..a2399fd66e97cef3db011508dc73c718c9456bc9 100644 (file)
@@ -472,28 +472,27 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
 /*****************************************************************************
  * Ethernet switch
  ****************************************************************************/
-static __initconst const char *orion_ge00_mvmdio_bus_name = "orion-mii";
-static __initdata struct mdio_board_info
-                 orion_ge00_switch_board_info;
+static __initdata struct mdio_board_info orion_ge00_switch_board_info = {
+       .bus_id   = "orion-mii",
+       .modalias = "mv88e6085",
+};
 
 void __init orion_ge00_switch_init(struct dsa_chip_data *d)
 {
-       struct mdio_board_info *bd;
        unsigned int i;
 
        if (!IS_BUILTIN(CONFIG_PHYLIB))
                return;
 
-       for (i = 0; i < ARRAY_SIZE(d->port_names); i++)
-               if (!strcmp(d->port_names[i], "cpu"))
+       for (i = 0; i < ARRAY_SIZE(d->port_names); i++) {
+               if (!strcmp(d->port_names[i], "cpu")) {
+                       d->netdev[i] = &orion_ge00.dev;
                        break;
+               }
+       }
 
-       bd = &orion_ge00_switch_board_info;
-       bd->bus_id = orion_ge00_mvmdio_bus_name;
-       bd->mdio_addr = d->sw_addr;
-       d->netdev[i] = &orion_ge00.dev;
-       strcpy(bd->modalias, "mv88e6085");
-       bd->platform_data = d;
+       orion_ge00_switch_board_info.mdio_addr = d->sw_addr;
+       orion_ge00_switch_board_info.platform_data = d;
 
        mdiobus_register_board_info(&orion_ge00_switch_board_info, 1);
 }
index a80632641b39f8cb5dc8554aa2b2e85b6551e0ea..70c776ef7aa7321450def075b8f150ef1dbbbf9f 100644 (file)
@@ -165,14 +165,14 @@ spicc1: spi@15000 {
 
                        uart_A: serial@24000 {
                                compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart";
-                               reg = <0x0 0x24000 0x0 0x14>;
+                               reg = <0x0 0x24000 0x0 0x18>;
                                interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
 
                        uart_B: serial@23000 {
                                compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart";
-                               reg = <0x0 0x23000 0x0 0x14>;
+                               reg = <0x0 0x23000 0x0 0x18>;
                                interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
index 6cb3c2a52bafe5f0db8a0018b35e88998ed90f01..4ee2e7951482f43122620d2668b244de1744e6b9 100644 (file)
@@ -235,14 +235,14 @@ reset: reset-controller@4404 {
 
                        uart_A: serial@84c0 {
                                compatible = "amlogic,meson-gx-uart";
-                               reg = <0x0 0x84c0 0x0 0x14>;
+                               reg = <0x0 0x84c0 0x0 0x18>;
                                interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
 
                        uart_B: serial@84dc {
                                compatible = "amlogic,meson-gx-uart";
-                               reg = <0x0 0x84dc 0x0 0x14>;
+                               reg = <0x0 0x84dc 0x0 0x18>;
                                interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
@@ -287,7 +287,7 @@ pwm_ef: pwm@86c0 {
 
                        uart_C: serial@8700 {
                                compatible = "amlogic,meson-gx-uart";
-                               reg = <0x0 0x8700 0x0 0x14>;
+                               reg = <0x0 0x8700 0x0 0x18>;
                                interrupts = <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
@@ -404,14 +404,14 @@ sec_AO: ao-secure@140 {
 
                        uart_AO: serial@4c0 {
                                compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart";
-                               reg = <0x0 0x004c0 0x0 0x14>;
+                               reg = <0x0 0x004c0 0x0 0x18>;
                                interrupts = <GIC_SPI 193 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
 
                        uart_AO_B: serial@4e0 {
                                compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart";
-                               reg = <0x0 0x004e0 0x0 0x14>;
+                               reg = <0x0 0x004e0 0x0 0x18>;
                                interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
index 4f355f17eed6bcc29dcc14fb4546fce6a083a57f..c8514110b9da2dc2f40988ad0ae733437e33420a 100644 (file)
@@ -631,6 +631,7 @@ internal_mdio: mdio@e40908ff {
 
                        internal_phy: ethernet-phy@8 {
                                compatible = "ethernet-phy-id0181.4400", "ethernet-phy-ieee802.3-c22";
+                               interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
                                reg = <8>;
                                max-speed = <100>;
                        };
index 4220fbdcb24a7f18c5e3ab66574ba22c8c92c873..ff5c4c47b22bfecfa36f0090dc8be5c85b171271 100644 (file)
@@ -98,7 +98,7 @@ clk125mhz: uart_clk125mhz {
                clock-output-names = "clk125mhz";
        };
 
-       pci {
+       pcie@30000000 {
                compatible = "pci-host-ecam-generic";
                device_type = "pci";
                #interrupt-cells = <1>;
@@ -118,6 +118,7 @@ pci {
                ranges =
                  <0x02000000    0 0x40000000    0 0x40000000    0 0x20000000
                   0x43000000 0x40 0x00000000 0x40 0x00000000 0x20 0x00000000>;
+               bus-range = <0 0xff>;
                interrupt-map-mask = <0 0 0 7>;
                interrupt-map =
                      /* addr  pin  ic   icaddr  icintr */
index e94fa1a531922ee6b160246c4399435574909641..047641fe294c64c9dbc04dcb477827814a809677 100644 (file)
@@ -51,7 +51,7 @@ reserved-memory {
                #size-cells = <2>;
                ranges;
 
-               ramoops@0x21f00000 {
+               ramoops@21f00000 {
                        compatible = "ramoops";
                        reg = <0x0 0x21f00000 0x0 0x00100000>;
                        record-size     = <0x00020000>;
index 9fbe4705ee88bfaf1eb12a7208de0c5899d7f9d3..94597e33c8065eb4b12ee805885988991d43e42c 100644 (file)
@@ -341,7 +341,7 @@ syscfg_pctl_a: syscfg_pctl_a@10005000 {
                        reg = <0 0x10005000 0 0x1000>;
                };
 
-               pio: pinctrl@0x10005000 {
+               pio: pinctrl@10005000 {
                        compatible = "mediatek,mt8173-pinctrl";
                        reg = <0 0x1000b000 0 0x1000>;
                        mediatek,pctl-regmap = <&syscfg_pctl_a>;
index 492a011f14f6cef933dc16ce9cf591d8cdc5c79e..1c8f1b86472de9c149b706502dcc552f19376ae5 100644 (file)
@@ -140,16 +140,16 @@ usb2_id: usb2-id {
                };
 
                agnoc@0 {
-                       qcom,pcie@00600000 {
+                       qcom,pcie@600000 {
                                perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>;
                        };
 
-                       qcom,pcie@00608000 {
+                       qcom,pcie@608000 {
                                status = "okay";
                                perst-gpio = <&msmgpio 130 GPIO_ACTIVE_LOW>;
                        };
 
-                       qcom,pcie@00610000 {
+                       qcom,pcie@610000 {
                                status = "okay";
                                perst-gpio = <&msmgpio 114 GPIO_ACTIVE_LOW>;
                        };
index 4b2afcc4fdf4791da816c6bba3c6f2ef7741ad8d..0a6f7952bbb18d65847261957715e331756c46c4 100644 (file)
@@ -840,7 +840,7 @@ agnoc@0 {
                        #size-cells = <1>;
                        ranges;
 
-                       pcie0: qcom,pcie@00600000 {
+                       pcie0: qcom,pcie@600000 {
                                compatible = "qcom,pcie-msm8996", "snps,dw-pcie";
                                status = "disabled";
                                power-domains = <&gcc PCIE0_GDSC>;
@@ -893,7 +893,7 @@ pcie0: qcom,pcie@00600000 {
 
                        };
 
-                       pcie1: qcom,pcie@00608000 {
+                       pcie1: qcom,pcie@608000 {
                                compatible = "qcom,pcie-msm8996", "snps,dw-pcie";
                                power-domains = <&gcc PCIE1_GDSC>;
                                bus-range = <0x00 0xff>;
@@ -946,7 +946,7 @@ pcie1: qcom,pcie@00608000 {
                                                "bus_slave";
                        };
 
-                       pcie2: qcom,pcie@00610000 {
+                       pcie2: qcom,pcie@610000 {
                                compatible = "qcom,pcie-msm8996", "snps,dw-pcie";
                                power-domains = <&gcc PCIE2_GDSC>;
                                bus-range = <0x00 0xff>;
index 3890468678ce1caa78a411aeb0a4a9cdedfc1302..28257724a56e74b79b83c69a76bea0da4e0fd9ed 100644 (file)
@@ -132,17 +132,16 @@ &gmac2io {
        assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
        assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
        clock_in_out = "input";
-       /* shows instability at 1GBit right now */
-       max-speed = <100>;
        phy-supply = <&vcc_io>;
        phy-mode = "rgmii";
        pinctrl-names = "default";
        pinctrl-0 = <&rgmiim1_pins>;
+       snps,force_thresh_dma_mode;
        snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>;
        snps,reset-active-low;
        snps,reset-delays-us = <0 10000 50000>;
-       tx_delay = <0x26>;
-       rx_delay = <0x11>;
+       tx_delay = <0x24>;
+       rx_delay = <0x18>;
        status = "okay";
 };
 
index a037ee56fead6db1b0bcedbbf740f6a54ca82977..cae3415544862dfddf06a034ecea62bd50739fd2 100644 (file)
@@ -730,7 +730,7 @@ sdmmc: dwmmc@ff500000 {
                interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,
                         <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                status = "disabled";
        };
@@ -741,7 +741,7 @@ sdio: dwmmc@ff510000 {
                interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
                         <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                status = "disabled";
        };
@@ -752,7 +752,7 @@ emmc: dwmmc@ff520000 {
                interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
                         <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                status = "disabled";
        };
index aa4d07046a7ba9644316cd7c57b43b64b5327766..03458ac44201c7c66d0e991881ab24e292bfdee2 100644 (file)
@@ -257,7 +257,7 @@ sdio0: dwmmc@ff0d0000 {
                max-frequency = <150000000>;
                clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>,
                         <&cru SCLK_SDIO0_DRV>, <&cru SCLK_SDIO0_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
                resets = <&cru SRST_SDIO0>;
index 0f873c897d0de5a75f9d4e4d90d7c658b7a173d3..ce592a4c0c4cdeb473a1c96106620583a8a4abef 100644 (file)
@@ -457,7 +457,7 @@ &pcie0 {
        assigned-clocks = <&cru SCLK_PCIEPHY_REF>;
        assigned-clock-parents = <&cru SCLK_PCIEPHY_REF100M>;
        assigned-clock-rates = <100000000>;
-       ep-gpios = <&gpio3 RK_PB5 GPIO_ACTIVE_HIGH>;
+       ep-gpios = <&gpio2 RK_PA4 GPIO_ACTIVE_HIGH>;
        num-lanes = <4>;
        pinctrl-names = "default";
        pinctrl-0 = <&pcie_clkreqn_cpm>;
index 7aa2144e0d47d1fb8e30a2389d4d0fbc5fc4030f..2605118d4b4ce74755ced75843d91ca22ad38ce8 100644 (file)
@@ -1739,8 +1739,8 @@ edp: edp@ff970000 {
                compatible = "rockchip,rk3399-edp";
                reg = <0x0 0xff970000 0x0 0x8000>;
                interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH 0>;
-               clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>;
-               clock-names = "dp", "pclk";
+               clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>, <&cru PCLK_VIO_GRF>;
+               clock-names = "dp", "pclk", "grf";
                pinctrl-names = "default";
                pinctrl-0 = <&edp_hpd>;
                power-domains = <&power RK3399_PD_EDP>;
index 52f15cd896e11ad631ac3092d9709337a9629bb4..b5a28336c07712af8d10aa62f1669b8a798065d8 100644 (file)
@@ -178,7 +178,7 @@ static int enable_smccc_arch_workaround_1(void *data)
        case PSCI_CONDUIT_HVC:
                arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
                                  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-               if (res.a0)
+               if ((int)res.a0 < 0)
                        return 0;
                cb = call_hvc_arch_workaround_1;
                smccc_start = __smccc_workaround_1_hvc_start;
@@ -188,7 +188,7 @@ static int enable_smccc_arch_workaround_1(void *data)
        case PSCI_CONDUIT_SMC:
                arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
                                  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-               if (res.a0)
+               if ((int)res.a0 < 0)
                        return 0;
                cb = call_smc_arch_workaround_1;
                smccc_start = __smccc_workaround_1_smc_start;
index 6618036ae6d4697c78a93bd6d1659dee703be23c..9ae31f7e224365d054a9712551b9ac75a0b19e9e 100644 (file)
@@ -1419,7 +1419,7 @@ static int compat_ptrace_hbp_get(unsigned int note_type,
        u64 addr = 0;
        u32 ctrl = 0;
 
-       int err, idx = compat_ptrace_hbp_num_to_idx(num);;
+       int err, idx = compat_ptrace_hbp_num_to_idx(num);
 
        if (num & 1) {
                err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr);
index d7e3299a773460fcd3b39930864e078e72453475..959e50d2588c0f14b9eb9230522c3f12c3f7daf9 100644 (file)
@@ -363,8 +363,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 {
        int ret = 0;
 
-       vcpu_load(vcpu);
-
        trace_kvm_set_guest_debug(vcpu, dbg->control);
 
        if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
@@ -386,7 +384,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
        }
 
 out:
-       vcpu_put(vcpu);
        return ret;
 }
 
index 84a019f5502293dbe0c28cc078465e4ddd6c2096..2dbb2c9f1ec1770e7f9f5aca7176eac2cc153d32 100644 (file)
@@ -108,7 +108,7 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
         * The following mapping attributes may be updated in live
         * kernel mappings without the need for break-before-make.
         */
-       static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE;
+       static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
 
        /* creating or taking down mappings is always safe */
        if (old == 0 || new == 0)
@@ -118,9 +118,9 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
        if ((old | new) & PTE_CONT)
                return false;
 
-       /* Transitioning from Global to Non-Global is safe */
-       if (((old ^ new) == PTE_NG) && (new & PTE_NG))
-               return true;
+       /* Transitioning from Non-Global to Global is unsafe */
+       if (old & ~new & PTE_NG)
+               return false;
 
        return ((old ^ new) & ~mask) == 0;
 }
@@ -972,3 +972,13 @@ int pmd_clear_huge(pmd_t *pmdp)
        pmd_clear(pmdp);
        return 1;
 }
+
+int pud_free_pmd_page(pud_t *pud)
+{
+       return pud_none(*pud);
+}
+
+int pmd_free_pte_page(pmd_t *pmd)
+{
+       return pmd_none(*pmd);
+}
index ecff2d1ca5a389b82614600464f0ab70981a766a..6eaa7ad5fc2c99fcdaadb4ce3f7bf677fb968262 100644 (file)
@@ -2,7 +2,6 @@
 #ifndef __H8300_BYTEORDER_H__
 #define __H8300_BYTEORDER_H__
 
-#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
 #include <linux/byteorder/big_endian.h>
 
 #endif
index 762eeb0fcc1dcaec27a4dff140274113a2c31fbf..2524fb60fbc28518ad7635092ac1bdd5d62114fd 100644 (file)
@@ -66,38 +66,35 @@ ATOMIC_OPS(add, +)
 ATOMIC_OPS(sub, -)
 
 #ifdef __OPTIMIZE__
-#define __ia64_atomic_const(i) __builtin_constant_p(i) ?               \
+#define __ia64_atomic_const(i)                                         \
+       static const int __ia64_atomic_p = __builtin_constant_p(i) ?    \
                ((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 ||       \
-                (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0
+                (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0;\
+       __ia64_atomic_p
+#else
+#define __ia64_atomic_const(i) 0
+#endif
 
-#define atomic_add_return(i, v)                                                \
+#define atomic_add_return(i,v)                                         \
 ({                                                                     \
-       int __i = (i);                                                  \
-       static const int __ia64_atomic_p = __ia64_atomic_const(i);      \
-       __ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :      \
-                               ia64_atomic_add(__i, v);                \
+       int __ia64_aar_i = (i);                                         \
+       __ia64_atomic_const(i)                                          \
+               ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)       \
+               : ia64_atomic_add(__ia64_aar_i, v);                     \
 })
 
-#define atomic_sub_return(i, v)                                                \
+#define atomic_sub_return(i,v)                                         \
 ({                                                                     \
-       int __i = (i);                                                  \
-       static const int __ia64_atomic_p = __ia64_atomic_const(i);      \
-       __ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) :     \
-                               ia64_atomic_sub(__i, v);                \
+       int __ia64_asr_i = (i);                                         \
+       __ia64_atomic_const(i)                                          \
+               ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)      \
+               : ia64_atomic_sub(__ia64_asr_i, v);                     \
 })
-#else
-#define atomic_add_return(i, v)        ia64_atomic_add(i, v)
-#define atomic_sub_return(i, v)        ia64_atomic_sub(i, v)
-#endif
 
 #define atomic_fetch_add(i,v)                                          \
 ({                                                                     \
        int __ia64_aar_i = (i);                                         \
-       (__builtin_constant_p(i)                                        \
-        && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)           \
-            || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)           \
-            || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)           \
-            || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))         \
+       __ia64_atomic_const(i)                                          \
                ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)       \
                : ia64_atomic_fetch_add(__ia64_aar_i, v);               \
 })
@@ -105,11 +102,7 @@ ATOMIC_OPS(sub, -)
 #define atomic_fetch_sub(i,v)                                          \
 ({                                                                     \
        int __ia64_asr_i = (i);                                         \
-       (__builtin_constant_p(i)                                        \
-        && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)          \
-            || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)          \
-            || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)          \
-            || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))        \
+       __ia64_atomic_const(i)                                          \
                ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)      \
                : ia64_atomic_fetch_sub(__ia64_asr_i, v);               \
 })
@@ -170,11 +163,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_add_return(i,v)                                       \
 ({                                                                     \
        long __ia64_aar_i = (i);                                        \
-       (__builtin_constant_p(i)                                        \
-        && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)           \
-            || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)           \
-            || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)           \
-            || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))         \
+       __ia64_atomic_const(i)                                          \
                ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)       \
                : ia64_atomic64_add(__ia64_aar_i, v);                   \
 })
@@ -182,11 +171,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_sub_return(i,v)                                       \
 ({                                                                     \
        long __ia64_asr_i = (i);                                        \
-       (__builtin_constant_p(i)                                        \
-        && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)          \
-            || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)          \
-            || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)          \
-            || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))        \
+       __ia64_atomic_const(i)                                          \
                ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)      \
                : ia64_atomic64_sub(__ia64_asr_i, v);                   \
 })
@@ -194,11 +179,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_fetch_add(i,v)                                                \
 ({                                                                     \
        long __ia64_aar_i = (i);                                        \
-       (__builtin_constant_p(i)                                        \
-        && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)           \
-            || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)           \
-            || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)           \
-            || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))         \
+       __ia64_atomic_const(i)                                          \
                ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)       \
                : ia64_atomic64_fetch_add(__ia64_aar_i, v);             \
 })
@@ -206,11 +187,7 @@ ATOMIC64_OPS(sub, -)
 #define atomic64_fetch_sub(i,v)                                                \
 ({                                                                     \
        long __ia64_asr_i = (i);                                        \
-       (__builtin_constant_p(i)                                        \
-        && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)          \
-            || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)          \
-            || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)          \
-            || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))        \
+       __ia64_atomic_const(i)                                          \
                ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)      \
                : ia64_atomic64_fetch_sub(__ia64_asr_i, v);             \
 })
index 85bba43e7d5dc7638138f90f1eb0dd56609c5807..8b5b8e6bc9d9ad9937119531d102e410b005b7ac 100644 (file)
@@ -117,7 +117,7 @@ store_call_start(struct device *dev, struct device_attribute *attr,
 
 #ifdef ERR_INJ_DEBUG
        printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]);
-       printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]);
+       printk(KERN_DEBUG "capabilities=%lx,\n", capabilities[cpu]);
        printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);
 #endif
        return size;
@@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
        u64 virt_addr=simple_strtoull(buf, NULL, 16);
        int ret;
 
-       ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL);
+       ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL);
        if (ret<=0) {
 #ifdef ERR_INJ_DEBUG
                printk("Virtual address %lx is not existing.\n",virt_addr);
index 89f3a1480a637ffcf8b51ff64ddd1c7dd8e1aa46..c55276e31b6b68050a3b2ed84386cfd9d39b5eb3 100644 (file)
@@ -16,7 +16,7 @@ import re
 import sys
 
 if len(sys.argv) != 2:
-    print "Usage: %s FILE" % sys.argv[0]
+    print("Usage: %s FILE" % sys.argv[0])
     sys.exit(2)
 
 readelf = os.getenv("READELF", "readelf")
@@ -29,7 +29,7 @@ def check_func (func, slots, rlen_sum):
         global num_errors
         num_errors += 1
         if not func: func = "[%#x-%#x]" % (start, end)
-        print "ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum)
+        print("ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum))
     return
 
 num_funcs = 0
@@ -43,23 +43,23 @@ for line in os.popen("%s -u %s" % (readelf, sys.argv[1])):
         check_func(func, slots, rlen_sum)
 
         func  = m.group(1)
-        start = long(m.group(2), 16)
-        end   = long(m.group(3), 16)
+        start = int(m.group(2), 16)
+        end   = int(m.group(3), 16)
         slots = 3 * (end - start) / 16
-        rlen_sum = 0L
+        rlen_sum = 0
         num_funcs += 1
     else:
         m = rlen_pattern.match(line)
         if m:
-            rlen_sum += long(m.group(1))
+            rlen_sum += int(m.group(1))
 check_func(func, slots, rlen_sum)
 
 if num_errors == 0:
-    print "No errors detected in %u functions." % num_funcs
+    print("No errors detected in %u functions." % num_funcs)
 else:
     if num_errors > 1:
         err="errors"
     else:
         err="error"
-    print "%u %s detected in %u functions." % (num_errors, err, num_funcs)
+    print("%u %s detected in %u functions." % (num_errors, err, num_funcs))
     sys.exit(1)
index d3d435248a244fd1c36ac8a096961c6a01b3b774..c73eb820955532fc84f39c29df20f8cab2bc3b87 100644 (file)
@@ -1088,6 +1088,10 @@ int __init mac_platform_init(void)
            macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
                platform_device_register_simple("macsonic", -1, NULL, 0);
 
+       if (macintosh_config->expansion_type == MAC_EXP_PDS ||
+           macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+               platform_device_register_simple("mac89x0", -1, NULL, 0);
+
        if (macintosh_config->ether_type == MAC_ETHER_MACE)
                platform_device_register_simple("macmace", -1, NULL, 0);
 
index 4f798aa671ddd2f481c35689f03be1ea73318ae8..3817a3e2146cf3e807dad894c3e18ddae31d57a1 100644 (file)
@@ -24,6 +24,7 @@ config MICROBLAZE
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
+       select NO_BOOTMEM
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_OPROFILE
index 6996f397c16c1dbfddcffd69809142ce5d8a8d2b..f7f1739c11b9fef091b1176c0120c4613232cfeb 100644 (file)
@@ -8,7 +8,6 @@ menu "Platform options"
 
 config OPT_LIB_FUNCTION
        bool "Optimalized lib function"
-       depends on CPU_LITTLE_ENDIAN
        default y
        help
          Allows turn on optimalized library function (memcpy and memmove).
@@ -21,6 +20,7 @@ config OPT_LIB_FUNCTION
 config OPT_LIB_ASM
        bool "Optimalized lib function ASM"
        depends on OPT_LIB_FUNCTION && (XILINX_MICROBLAZE0_USE_BARREL = 1)
+       depends on CPU_BIG_ENDIAN
        default n
        help
          Allows turn on optimalized library function (memcpy and memmove).
index be84a4d3917fc1c901bc71c6ca44362c28680257..7c968c1d1729ed00233a716534843175a3c2bc27 100644 (file)
@@ -44,7 +44,6 @@ void machine_shutdown(void);
 void machine_halt(void);
 void machine_power_off(void);
 
-extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
 # endif /* __ASSEMBLY__ */
index 62021d7e249e0665cd6a1483c12de5b8b0bf5739..fdc48bb065d89fe3b2443ef054d1a6ece3744b54 100644 (file)
  *     between mem locations with size of xfer spec'd in bytes
  */
 
-#ifdef __MICROBLAZEEL__
-#error Microblaze LE not support ASM optimized lib func. Disable OPT_LIB_ASM.
-#endif
-
 #include <linux/linkage.h>
        .text
        .globl  memcpy
index 434639f9a3a6b024b5af2b2319b46376c88c6ada..df6de7ccdc2eb6fad45f93fcb1dbd74c0fff01e8 100644 (file)
@@ -32,9 +32,6 @@ int mem_init_done;
 #ifndef CONFIG_MMU
 unsigned int __page_offset;
 EXPORT_SYMBOL(__page_offset);
-
-#else
-static int init_bootmem_done;
 #endif /* CONFIG_MMU */
 
 char *klimit = _end;
@@ -117,7 +114,6 @@ static void __init paging_init(void)
 
 void __init setup_memory(void)
 {
-       unsigned long map_size;
        struct memblock_region *reg;
 
 #ifndef CONFIG_MMU
@@ -174,17 +170,6 @@ void __init setup_memory(void)
        pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
        pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
 
-       /*
-        * Find an area to use for the bootmem bitmap.
-        * We look for the first area which is at least
-        * 128kB in length (128kB is enough for a bitmap
-        * for 4GB of memory, using 4kB pages), plus 1 page
-        * (in case the address isn't page-aligned).
-        */
-       map_size = init_bootmem_node(NODE_DATA(0),
-               PFN_UP(TOPHYS((u32)klimit)), min_low_pfn, max_low_pfn);
-       memblock_reserve(PFN_UP(TOPHYS((u32)klimit)) << PAGE_SHIFT, map_size);
-
        /* Add active regions with valid PFNs */
        for_each_memblock(memory, reg) {
                unsigned long start_pfn, end_pfn;
@@ -196,32 +181,9 @@ void __init setup_memory(void)
                                  &memblock.memory, 0);
        }
 
-       /* free bootmem is whole main memory */
-       free_bootmem_with_active_regions(0, max_low_pfn);
-
-       /* reserve allocate blocks */
-       for_each_memblock(reserved, reg) {
-               unsigned long top = reg->base + reg->size - 1;
-
-               pr_debug("reserved - 0x%08x-0x%08x, %lx, %lx\n",
-                        (u32) reg->base, (u32) reg->size, top,
-                                               memory_start + lowmem_size - 1);
-
-               if (top <= (memory_start + lowmem_size - 1)) {
-                       reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
-               } else if (reg->base < (memory_start + lowmem_size - 1)) {
-                       unsigned long trunc_size = memory_start + lowmem_size -
-                                                               reg->base;
-                       reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT);
-               }
-       }
-
        /* XXX need to clip this if using highmem? */
        sparse_memory_present_with_active_regions(0);
 
-#ifdef CONFIG_MMU
-       init_bootmem_done = 1;
-#endif
        paging_init();
 }
 
@@ -398,30 +360,16 @@ asmlinkage void __init mmu_init(void)
 /* This is only called until mem_init is done. */
 void __init *early_get_page(void)
 {
-       void *p;
-       if (init_bootmem_done) {
-               p = alloc_bootmem_pages(PAGE_SIZE);
-       } else {
-               /*
-                * Mem start + kernel_tlb -> here is limit
-                * because of mem mapping from head.S
-                */
-               p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
-                                       memory_start + kernel_tlb));
-       }
-       return p;
+       /*
+        * Mem start + kernel_tlb -> here is limit
+        * because of mem mapping from head.S
+        */
+       return __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
+                               memory_start + kernel_tlb));
 }
 
 #endif /* CONFIG_MMU */
 
-void * __ref alloc_maybe_bootmem(size_t size, gfp_t mask)
-{
-       if (mem_init_done)
-               return kmalloc(size, mask);
-       else
-               return alloc_bootmem(size);
-}
-
 void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
        void *p;
index 9ab48ff80c1c8de3a5de0050e2d4d5f22dc0c773..6d11ae581ea775bc2e919e16bdd63cdb6b06d86f 100644 (file)
@@ -135,6 +135,8 @@ int __init ath25_find_config(phys_addr_t base, unsigned long size)
        }
 
        board_data = kzalloc(BOARD_CONFIG_BUFSZ, GFP_KERNEL);
+       if (!board_data)
+               goto error;
        ath25_board.config = (struct ath25_boarddata *)board_data;
        memcpy_fromio(board_data, bcfg, 0x100);
        if (broken_boarddata) {
index 5b3a3f6a9ad31fd845bd18cdd3197d9c75833c84..d99f5242169e7acb31f8cfa71cd6e14d24e94c82 100644 (file)
@@ -2277,6 +2277,8 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
        }
 
        host_data = kzalloc(sizeof(*host_data), GFP_KERNEL);
+       if (!host_data)
+               return -ENOMEM;
        raw_spin_lock_init(&host_data->lock);
 
        addr = of_get_address(ciu_node, 0, NULL, NULL);
index 9d41732a9146a31545b9114812cb12c669196478..159e83add4bb3e6b43f105b521761eb9cb80491b 100644 (file)
@@ -168,11 +168,11 @@ static void bmips_prepare_cpus(unsigned int max_cpus)
                return;
        }
 
-       if (request_irq(IPI0_IRQ, bmips_ipi_interrupt, IRQF_PERCPU,
-                       "smp_ipi0", NULL))
+       if (request_irq(IPI0_IRQ, bmips_ipi_interrupt,
+                       IRQF_PERCPU | IRQF_NO_SUSPEND, "smp_ipi0", NULL))
                panic("Can't request IPI0 interrupt");
-       if (request_irq(IPI1_IRQ, bmips_ipi_interrupt, IRQF_PERCPU,
-                       "smp_ipi1", NULL))
+       if (request_irq(IPI1_IRQ, bmips_ipi_interrupt,
+                       IRQF_PERCPU | IRQF_NO_SUSPEND, "smp_ipi1", NULL))
                panic("Can't request IPI1 interrupt");
 }
 
index bc2fdbfa8223c343e6ed455f44bf6393d8d43d0a..72af0c18396983df62327d5a4dcf000bf557d326 100644 (file)
@@ -7,6 +7,8 @@ choice
 config LEMOTE_FULOONG2E
        bool "Lemote Fuloong(2e) mini-PC"
        select ARCH_SPARSEMEM_ENABLE
+       select ARCH_MIGHT_HAVE_PC_PARPORT
+       select ARCH_MIGHT_HAVE_PC_SERIO
        select CEVT_R4K
        select CSRC_R4K
        select SYS_HAS_CPU_LOONGSON2E
@@ -33,6 +35,8 @@ config LEMOTE_FULOONG2E
 config LEMOTE_MACH2F
        bool "Lemote Loongson 2F family machines"
        select ARCH_SPARSEMEM_ENABLE
+       select ARCH_MIGHT_HAVE_PC_PARPORT
+       select ARCH_MIGHT_HAVE_PC_SERIO
        select BOARD_SCACHE
        select BOOT_ELF32
        select CEVT_R4K if ! MIPS_EXTERNAL_TIMER
@@ -62,6 +66,8 @@ config LEMOTE_MACH2F
 config LOONGSON_MACH3X
        bool "Generic Loongson 3 family machines"
        select ARCH_SPARSEMEM_ENABLE
+       select ARCH_MIGHT_HAVE_PC_PARPORT
+       select ARCH_MIGHT_HAVE_PC_SERIO
        select GENERIC_ISA_DMA_SUPPORT_BROKEN
        select BOOT_ELF32
        select BOARD_SCACHE
index 3742508cc5345882510d907dc5f58b48a08ad9e7..bd5ce31936f5b196c3ce0482ee962eb93df46216 100644 (file)
@@ -26,6 +26,7 @@ void flush_user_icache_range_asm(unsigned long, unsigned long);
 void flush_kernel_icache_range_asm(unsigned long, unsigned long);
 void flush_user_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
+void purge_kernel_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_page_asm(void *);
 void flush_kernel_icache_page(void *);
 
index 0e6ab6e4a4e9f87ba8ffc17f4d1c1e0041b5ab02..2dbe5580a1a4420ba693329a0f2622f68f759276 100644 (file)
@@ -316,6 +316,8 @@ extern int _parisc_requires_coherency;
 #define parisc_requires_coherency()    (0)
 #endif
 
+extern int running_on_qemu;
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_PARISC_PROCESSOR_H */
index 19c0c141bc3f9f0edd509708f978a2d7ca16c230..e3b45546d589b4e8ec3410d8229a3049bf7b062f 100644 (file)
@@ -465,10 +465,10 @@ EXPORT_SYMBOL(copy_user_page);
 int __flush_tlb_range(unsigned long sid, unsigned long start,
                      unsigned long end)
 {
-       unsigned long flags, size;
+       unsigned long flags;
 
-       size = (end - start);
-       if (size >= parisc_tlb_flush_threshold) {
+       if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+           end - start >= parisc_tlb_flush_threshold) {
                flush_tlb_all();
                return 1;
        }
@@ -539,13 +539,12 @@ void flush_cache_mm(struct mm_struct *mm)
        struct vm_area_struct *vma;
        pgd_t *pgd;
 
-       /* Flush the TLB to avoid speculation if coherency is required. */
-       if (parisc_requires_coherency())
-               flush_tlb_all();
-
        /* Flushing the whole cache on each cpu takes forever on
           rp3440, etc.  So, avoid it if the mm isn't too big.  */
-       if (mm_total_size(mm) >= parisc_cache_flush_threshold) {
+       if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+           mm_total_size(mm) >= parisc_cache_flush_threshold) {
+               if (mm->context)
+                       flush_tlb_all();
                flush_cache_all();
                return;
        }
@@ -553,9 +552,9 @@ void flush_cache_mm(struct mm_struct *mm)
        if (mm->context == mfsp(3)) {
                for (vma = mm->mmap; vma; vma = vma->vm_next) {
                        flush_user_dcache_range_asm(vma->vm_start, vma->vm_end);
-                       if ((vma->vm_flags & VM_EXEC) == 0)
-                               continue;
-                       flush_user_icache_range_asm(vma->vm_start, vma->vm_end);
+                       if (vma->vm_flags & VM_EXEC)
+                               flush_user_icache_range_asm(vma->vm_start, vma->vm_end);
+                       flush_tlb_range(vma, vma->vm_start, vma->vm_end);
                }
                return;
        }
@@ -573,6 +572,8 @@ void flush_cache_mm(struct mm_struct *mm)
                        pfn = pte_pfn(*ptep);
                        if (!pfn_valid(pfn))
                                continue;
+                       if (unlikely(mm->context))
+                               flush_tlb_page(vma, addr);
                        __flush_cache_page(vma, addr, PFN_PHYS(pfn));
                }
        }
@@ -581,30 +582,45 @@ void flush_cache_mm(struct mm_struct *mm)
 void flush_cache_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end)
 {
-       BUG_ON(!vma->vm_mm->context);
-
-       /* Flush the TLB to avoid speculation if coherency is required. */
-       if (parisc_requires_coherency())
-               flush_tlb_range(vma, start, end);
+       pgd_t *pgd;
+       unsigned long addr;
 
-       if ((end - start) >= parisc_cache_flush_threshold
-           || vma->vm_mm->context != mfsp(3)) {
+       if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+           end - start >= parisc_cache_flush_threshold) {
+               if (vma->vm_mm->context)
+                       flush_tlb_range(vma, start, end);
                flush_cache_all();
                return;
        }
 
-       flush_user_dcache_range_asm(start, end);
-       if (vma->vm_flags & VM_EXEC)
-               flush_user_icache_range_asm(start, end);
+       if (vma->vm_mm->context == mfsp(3)) {
+               flush_user_dcache_range_asm(start, end);
+               if (vma->vm_flags & VM_EXEC)
+                       flush_user_icache_range_asm(start, end);
+               flush_tlb_range(vma, start, end);
+               return;
+       }
+
+       pgd = vma->vm_mm->pgd;
+       for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+               unsigned long pfn;
+               pte_t *ptep = get_ptep(pgd, addr);
+               if (!ptep)
+                       continue;
+               pfn = pte_pfn(*ptep);
+               if (pfn_valid(pfn)) {
+                       if (unlikely(vma->vm_mm->context))
+                               flush_tlb_page(vma, addr);
+                       __flush_cache_page(vma, addr, PFN_PHYS(pfn));
+               }
+       }
 }
 
 void
 flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
 {
-       BUG_ON(!vma->vm_mm->context);
-
        if (pfn_valid(pfn)) {
-               if (parisc_requires_coherency())
+               if (likely(vma->vm_mm->context))
                        flush_tlb_page(vma, vmaddr);
                __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
        }
@@ -613,21 +629,33 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 void flush_kernel_vmap_range(void *vaddr, int size)
 {
        unsigned long start = (unsigned long)vaddr;
+       unsigned long end = start + size;
 
-       if ((unsigned long)size > parisc_cache_flush_threshold)
+       if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+           (unsigned long)size >= parisc_cache_flush_threshold) {
+               flush_tlb_kernel_range(start, end);
                flush_data_cache();
-       else
-               flush_kernel_dcache_range_asm(start, start + size);
+               return;
+       }
+
+       flush_kernel_dcache_range_asm(start, end);
+       flush_tlb_kernel_range(start, end);
 }
 EXPORT_SYMBOL(flush_kernel_vmap_range);
 
 void invalidate_kernel_vmap_range(void *vaddr, int size)
 {
        unsigned long start = (unsigned long)vaddr;
+       unsigned long end = start + size;
 
-       if ((unsigned long)size > parisc_cache_flush_threshold)
+       if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
+           (unsigned long)size >= parisc_cache_flush_threshold) {
+               flush_tlb_kernel_range(start, end);
                flush_data_cache();
-       else
-               flush_kernel_dcache_range_asm(start, start + size);
+               return;
+       }
+
+       purge_kernel_dcache_range_asm(start, end);
+       flush_tlb_kernel_range(start, end);
 }
 EXPORT_SYMBOL(invalidate_kernel_vmap_range);
index bbbe360b458f511c068620db2dd670a770ea8362..fbb4e43fda05332de57bdc135ec3711d4bd5ee77 100644 (file)
@@ -138,6 +138,16 @@ $pgt_fill_loop:
        std             %dp,0x18(%r10)
 #endif
 
+#ifdef CONFIG_64BIT
+       /* Get PDCE_PROC for monarch CPU. */
+#define MEM_PDC_LO 0x388
+#define MEM_PDC_HI 0x35C
+       ldw             MEM_PDC_LO(%r0),%r3
+       ldw             MEM_PDC_HI(%r0),%r10
+       depd            %r10, 31, 32, %r3        /* move to upper word */
+#endif
+
+
 #ifdef CONFIG_SMP
        /* Set the smp rendezvous address into page zero.
        ** It would be safer to do this in init_smp_config() but
@@ -196,12 +206,6 @@ common_stext:
         ** Someday, palo might not do this for the Monarch either.
         */
 2:
-#define MEM_PDC_LO 0x388
-#define MEM_PDC_HI 0x35C
-       ldw             MEM_PDC_LO(%r0),%r3
-       ldw             MEM_PDC_HI(%r0),%r6
-       depd            %r6, 31, 32, %r3        /* move to upper word */
-
        mfctl           %cr30,%r6               /* PCX-W2 firmware bug */
 
        ldo             PDC_PSW(%r0),%arg0              /* 21 */
@@ -268,6 +272,8 @@ $install_iva:
 aligned_rfi:
        pcxt_ssm_bug
 
+       copy            %r3, %arg0      /* PDCE_PROC for smp_callin() */
+
        rsm             PSW_SM_QUIET,%r0        /* off troublesome PSW bits */
        /* Don't need NOPs, have 8 compliant insn before rfi */
 
index 2d40c4ff3f6918ae9b2e2c6af71e20658a9850e1..67b0f7532e835f4db1214c6ccecf62183eb84e50 100644 (file)
@@ -1110,6 +1110,28 @@ ENTRY_CFI(flush_kernel_dcache_range_asm)
        .procend
 ENDPROC_CFI(flush_kernel_dcache_range_asm)
 
+ENTRY_CFI(purge_kernel_dcache_range_asm)
+       .proc
+       .callinfo NO_CALLS
+       .entry
+
+       ldil            L%dcache_stride, %r1
+       ldw             R%dcache_stride(%r1), %r23
+       ldo             -1(%r23), %r21
+       ANDCM           %r26, %r21, %r26
+
+1:      cmpb,COND(<<),n        %r26, %r25,1b
+       pdc,m           %r23(%r26)
+
+       sync
+       syncdma
+       bv              %r0(%r2)
+       nop
+       .exit
+
+       .procend
+ENDPROC_CFI(purge_kernel_dcache_range_asm)
+
 ENTRY_CFI(flush_user_icache_range_asm)
        .proc
        .callinfo NO_CALLS
index 30c28ab145409b5966f7237ec2b6ca07121adc10..4065b5e48c9d68e70b38da3743e219e02934fe5b 100644 (file)
@@ -292,10 +292,15 @@ smp_cpu_init(int cpunum)
  * Slaves start using C here. Indirectly called from smp_slave_stext.
  * Do what start_kernel() and main() do for boot strap processor (aka monarch)
  */
-void __init smp_callin(void)
+void __init smp_callin(unsigned long pdce_proc)
 {
        int slave_id = cpu_now_booting;
 
+#ifdef CONFIG_64BIT
+       WARN_ON(((unsigned long)(PAGE0->mem_pdc_hi) << 32
+                       | PAGE0->mem_pdc) != pdce_proc);
+#endif
+
        smp_cpu_init(slave_id);
        preempt_disable();
 
index 4b8fd6dc22dabebcf1da3b2e32381d2df3197eda..f7e684560186f9c3d5db133b8e66281c0f3c0e12 100644 (file)
@@ -76,10 +76,10 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
        next_tick = cpuinfo->it_value;
 
        /* Calculate how many ticks have elapsed. */
+       now = mfctl(16);
        do {
                ++ticks_elapsed;
                next_tick += cpt;
-               now = mfctl(16);
        } while (next_tick - now > cpt);
 
        /* Store (in CR16 cycles) up to when we are accounting right now. */
@@ -103,16 +103,17 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
         * if one or the other wrapped. If "now" is "bigger" we'll end up
         * with a very large unsigned number.
         */
-       while (next_tick - mfctl(16) > cpt)
+       now = mfctl(16);
+       while (next_tick - now > cpt)
                next_tick += cpt;
 
        /* Program the IT when to deliver the next interrupt.
         * Only bottom 32-bits of next_tick are writable in CR16!
         * Timer interrupt will be delivered at least a few hundred cycles
-        * after the IT fires, so if we are too close (<= 500 cycles) to the
+        * after the IT fires, so if we are too close (<= 8000 cycles) to the
         * next cycle, simply skip it.
         */
-       if (next_tick - mfctl(16) <= 500)
+       if (next_tick - now <= 8000)
                next_tick += cpt;
        mtctl(next_tick, 16);
 
@@ -248,7 +249,7 @@ static int __init init_cr16_clocksource(void)
         * different sockets, so mark them unstable and lower rating on
         * multi-socket SMP systems.
         */
-       if (num_online_cpus() > 1) {
+       if (num_online_cpus() > 1 && !running_on_qemu) {
                int cpu;
                unsigned long cpu0_loc;
                cpu0_loc = per_cpu(cpu_data, 0).cpu_loc;
index 48f41399fc0b8b63acd84d774a09ad2d0aba5086..cab32ee824d2ac4b7fe9adf4f3bb25533cc043c6 100644 (file)
@@ -629,7 +629,12 @@ void __init mem_init(void)
 #endif
 
        mem_init_print_info(NULL);
-#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */
+
+#if 0
+       /*
+        * Do not expose the virtual kernel memory layout to userspace.
+        * But keep code for debugging purposes.
+        */
        printk("virtual kernel memory layout:\n"
               "    vmalloc : 0x%px - 0x%px   (%4ld MB)\n"
               "    memory  : 0x%px - 0x%px   (%4ld MB)\n"
index ef6549e5715717003bf3fe9fc3a3c869e1fd5b2f..26d5d2a5b8e99bc923eeb2a2fabd67ce5b3af1ab 100644 (file)
@@ -101,7 +101,8 @@ $(addprefix $(obj)/,$(zlib-y)): \
 libfdt       := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c
 libfdtheader := fdt.h libfdt.h libfdt_internal.h
 
-$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \
+$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \
+       treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \
        $(addprefix $(obj)/,$(libfdtheader))
 
 src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
index 511acfd7ab0d3e20066dcd60e22bd3048ad34119..535add3f779133fefcb1422616d0fc3fef23588b 100644 (file)
@@ -52,7 +52,7 @@
 #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
 #define FW_FEATURE_PRRN                ASM_CONST(0x0000000200000000)
 #define FW_FEATURE_DRMEM_V2    ASM_CONST(0x0000000400000000)
-#define FW_FEATURE_DRC_INFO    ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRC_INFO    ASM_CONST(0x0000000800000000)
 
 #ifndef __ASSEMBLY__
 
index beea2182d754bcd2d1f79184bcef1d390525cbb3..0c0b66fc5bfb32f6c61e8fa454fbfe1a467db89b 100644 (file)
@@ -384,7 +384,8 @@ static void *eeh_report_resume(void *data, void *userdata)
        eeh_pcid_put(dev);
        pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
 #ifdef CONFIG_PCI_IOV
-       eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+       if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
+               eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
 #endif
        return NULL;
 }
index adf044daafd763a544685b92f60dbe434cc64f90..acf4b2e0530cb671df1e80d56927e8650f0c2f84 100644 (file)
@@ -874,7 +874,6 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
                .mmu = 0,
                .hash_ext = 0,
                .radix_ext = 0,
-               .byte22 = OV5_FEAT(OV5_DRC_INFO),
        },
 
        /* option vector 6: IBM PAPR hints */
index 0c854816e653e25238f87c1cf9c44a8ed911df44..5cb4e4687107e1204667e3314bee2ce49de32de7 100644 (file)
@@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep)
        kmem_cache_free(kvm_pte_cache, ptep);
 }
 
+/* Like pmd_huge() and pmd_large(), but works regardless of config options */
+static inline int pmd_is_leaf(pmd_t pmd)
+{
+       return !!(pmd_val(pmd) & _PAGE_PTE);
+}
+
 static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
                             unsigned int level, unsigned long mmu_seq)
 {
@@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
        else
                new_pmd = pmd_alloc_one(kvm->mm, gpa);
 
-       if (level == 0 && !(pmd && pmd_present(*pmd)))
+       if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
                new_ptep = kvmppc_pte_alloc();
 
        /* Check if we might have been invalidated; let the guest retry if so */
@@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
                new_pmd = NULL;
        }
        pmd = pmd_offset(pud, gpa);
-       if (pmd_large(*pmd)) {
-               /* Someone else has instantiated a large page here; retry */
-               ret = -EAGAIN;
-               goto out_unlock;
-       }
-       if (level == 1 && !pmd_none(*pmd)) {
+       if (pmd_is_leaf(*pmd)) {
+               unsigned long lgpa = gpa & PMD_MASK;
+
+               /*
+                * If we raced with another CPU which has just put
+                * a 2MB pte in after we saw a pte page, try again.
+                */
+               if (level == 0 && !new_ptep) {
+                       ret = -EAGAIN;
+                       goto out_unlock;
+               }
+               /* Valid 2MB page here already, remove it */
+               old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
+                                             ~0UL, 0, lgpa, PMD_SHIFT);
+               kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
+               if (old & _PAGE_DIRTY) {
+                       unsigned long gfn = lgpa >> PAGE_SHIFT;
+                       struct kvm_memory_slot *memslot;
+                       memslot = gfn_to_memslot(kvm, gfn);
+                       if (memslot && memslot->dirty_bitmap)
+                               kvmppc_update_dirty_map(memslot,
+                                                       gfn, PMD_SIZE);
+               }
+       } else if (level == 1 && !pmd_none(*pmd)) {
                /*
                 * There's a page table page here, but we wanted
                 * to install a large page.  Tell the caller and let
@@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        } else {
                page = pages[0];
                pfn = page_to_pfn(page);
-               if (PageHuge(page)) {
-                       page = compound_head(page);
-                       pte_size <<= compound_order(page);
+               if (PageCompound(page)) {
+                       pte_size <<= compound_order(compound_head(page));
                        /* See if we can insert a 2MB large-page PTE here */
                        if (pte_size >= PMD_SIZE &&
-                           (gpa & PMD_MASK & PAGE_MASK) ==
-                           (hva & PMD_MASK & PAGE_MASK)) {
+                           (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+                           (hva & (PMD_SIZE - PAGE_SIZE))) {
                                level = 1;
                                pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
                        }
                }
                /* See if we can provide write access */
                if (writing) {
-                       /*
-                        * We assume gup_fast has set dirty on the host PTE.
-                        */
                        pgflags |= _PAGE_WRITE;
                } else {
                        local_irq_save(flags);
                        ptep = find_current_mm_pte(current->mm->pgd,
                                                   hva, NULL, NULL);
-                       if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
+                       if (ptep && pte_write(*ptep))
                                pgflags |= _PAGE_WRITE;
                        local_irq_restore(flags);
                }
@@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                pte = pfn_pte(pfn, __pgprot(pgflags));
                ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
        }
-       if (ret == 0 || ret == -EAGAIN)
-               ret = RESUME_GUEST;
 
        if (page) {
-               /*
-                * We drop pages[0] here, not page because page might
-                * have been set to the head page of a compound, but
-                * we have to drop the reference on the correct tail
-                * page to match the get inside gup()
-                */
-               put_page(pages[0]);
+               if (!ret && (pgflags & _PAGE_WRITE))
+                       set_page_dirty_lock(page);
+               put_page(page);
        }
+
+       if (ret == 0 || ret == -EAGAIN)
+               ret = RESUME_GUEST;
        return ret;
 }
 
@@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm)
                                continue;
                        pmd = pmd_offset(pud, 0);
                        for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
-                               if (pmd_huge(*pmd)) {
+                               if (pmd_is_leaf(*pmd)) {
                                        pmd_clear(pmd);
                                        continue;
                                }
index 89707354c2efd89e95d1d1f861a170e8b6bfe51a..9cb9448163c4bf7021822d6632fb6c94452187ed 100644 (file)
@@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
         */
        trace_hardirqs_on();
 
-       guest_enter();
+       guest_enter_irqoff();
 
        srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
@@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
        srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
-       guest_exit();
-
        trace_hardirqs_off();
        set_irq_happened(trap);
 
@@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
        kvmppc_set_host_core(pcpu);
 
        local_irq_enable();
+       guest_exit();
 
        /* Let secondaries go back to the offline loop */
        for (i = 0; i < controlled_threads; ++i) {
@@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
                goto up_out;
 
        psize = vma_kernel_pagesize(vma);
-       porder = __ilog2(psize);
 
        up_read(&current->mm->mmap_sem);
 
        /* We can handle 4k, 64k or 16M pages in the VRMA */
-       err = -EINVAL;
-       if (!(psize == 0x1000 || psize == 0x10000 ||
-             psize == 0x1000000))
-               goto out_srcu;
+       if (psize >= 0x1000000)
+               psize = 0x1000000;
+       else if (psize >= 0x10000)
+               psize = 0x10000;
+       else
+               psize = 0x1000;
+       porder = __ilog2(psize);
 
        senc = slb_pgsize_encoding(psize);
        kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
index f31f357b8c5ae6657bac7a85fb89f66d86f13268..d33264697a31a13c2e84f2faaa02f055007a4948 100644 (file)
@@ -320,7 +320,6 @@ kvm_novcpu_exit:
        stw     r12, STACK_SLOT_TRAP(r1)
        bl      kvmhv_commence_exit
        nop
-       lwz     r12, STACK_SLOT_TRAP(r1)
        b       kvmhv_switch_to_host
 
 /*
@@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 secondary_too_late:
        li      r12, 0
+       stw     r12, STACK_SLOT_TRAP(r1)
        cmpdi   r4, 0
        beq     11f
        stw     r12, VCPU_TRAP(r4)
@@ -1558,12 +1558,12 @@ mc_cont:
 3:     stw     r5,VCPU_SLB_MAX(r9)
 
 guest_bypass:
+       stw     r12, STACK_SLOT_TRAP(r1)
        mr      r3, r12
        /* Increment exit count, poke other threads to exit */
        bl      kvmhv_commence_exit
        nop
        ld      r9, HSTATE_KVM_VCPU(r13)
-       lwz     r12, VCPU_TRAP(r9)
 
        /* Stop others sending VCPU interrupts to this physical CPU */
        li      r0, -1
@@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
         * POWER7/POWER8 guest -> host partition switch code.
         * We don't have to lock against tlbies but we do
         * have to coordinate the hardware threads.
+        * Here STACK_SLOT_TRAP(r1) contains the trap number.
         */
 kvmhv_switch_to_host:
        /* Secondary threads wait for primary to do partition switch */
@@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        /* If HMI, call kvmppc_realmode_hmi_handler() */
+       lwz     r12, STACK_SLOT_TRAP(r1)
        cmpwi   r12, BOOK3S_INTERRUPT_HMI
        bne     27f
        bl      kvmppc_realmode_hmi_handler
        nop
        cmpdi   r3, 0
-       li      r12, BOOK3S_INTERRUPT_HMI
        /*
         * At this point kvmppc_realmode_hmi_handler may have resync-ed
         * the TB, and if it has, we must not subtract the guest timebase
@@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION
        lwz     r8, KVM_SPLIT_DO_RESTORE(r3)
        cmpwi   r8, 0
        beq     47f
-       stw     r12, STACK_SLOT_TRAP(r1)
        bl      kvmhv_p9_restore_lpcr
        nop
-       lwz     r12, STACK_SLOT_TRAP(r1)
        b       48f
 47:
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
@@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
        li      r0, KVM_GUEST_MODE_NONE
        stb     r0, HSTATE_IN_GUEST(r13)
 
+       lwz     r12, STACK_SLOT_TRAP(r1)        /* return trap # in r12 */
        ld      r0, SFS+PPC_LR_STKOFF(r1)
        addi    r1, r1, SFS
        mtlr    r0
index f0f5cd4d2fe7cf796336cd56cc3d8c9011be8cab..f9818d7d3381d8c072605534202fb6c91b7d58e4 100644 (file)
@@ -188,7 +188,7 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
        if (!qpage) {
                pr_err("Failed to allocate queue %d for VCPU %d\n",
                       prio, xc->server_num);
-               return -ENOMEM;;
+               return -ENOMEM;
        }
        memset(qpage, 0, 1 << xive->q_order);
 
index 403e642c78f5170b81855ef329e7148f454bfa3b..52c2053739862d2e7c53210458b0fbc9943f510b 100644 (file)
@@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
 int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
                unsigned int rt, int is_default_endian)
 {
-       enum emulation_result emulated;
+       enum emulation_result emulated = EMULATE_DONE;
 
        while (vcpu->arch.mmio_vmx_copy_nums) {
                emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
@@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
        kvm_sigset_deactivate(vcpu);
 
+#ifdef CONFIG_ALTIVEC
 out:
+#endif
        vcpu_put(vcpu);
        return r;
 }
index 916844f99c64e59655d3372ac4e69c731f0751e9..3f1803672c9bb1062da0238999a82338f7c3c387 100644 (file)
@@ -98,7 +98,7 @@ static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
        dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
        dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
        dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
-       dr_cell->flags = cpu_to_be32(lmb->flags);
+       dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
 }
 
 static int drmem_update_dt_v2(struct device_node *memory,
@@ -121,7 +121,7 @@ static int drmem_update_dt_v2(struct device_node *memory,
                }
 
                if (prev_lmb->aa_index != lmb->aa_index ||
-                   prev_lmb->flags != lmb->flags)
+                   drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb))
                        lmb_sets++;
 
                prev_lmb = lmb;
@@ -150,7 +150,7 @@ static int drmem_update_dt_v2(struct device_node *memory,
                }
 
                if (prev_lmb->aa_index != lmb->aa_index ||
-                   prev_lmb->flags != lmb->flags) {
+                   drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) {
                        /* end of one set, start of another */
                        dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
                        dr_cell++;
index 872d1f6dd11e179793c4394e48313649028820e1..a9636d8cba153a1fb43469c1b8070b59ae4ba210 100644 (file)
@@ -327,6 +327,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
                        PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
                        break;
+               case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
+                       PPC_LWZ_OFFS(r_A, r_skb, K);
+                       break;
                case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
                        PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
                        break;
index 0a34b0cec7b7c87fd6bbc08a608fb80b7e13d958..0ef3d9580e98ca7c4f747edb115b16c56ccf6d43 100644 (file)
@@ -240,6 +240,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
         *   goto out;
         */
        PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
+       PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);
        PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
        PPC_BCC(COND_GE, out);
 
index 496e47696ed0c57c413b6ce4c920c7c3beda745f..a6c92c78c9b20b9cc2507cfb5c3e749e05a6e19d 100644 (file)
@@ -1854,7 +1854,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
        s64 rc;
 
        if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
-               return -ENODEV;;
+               return -ENODEV;
 
        pe = &phb->ioda.pe_array[pdn->pe_number];
        if (pe->tce_bypass_enabled) {
index 4fb21e17504aad72295a9e1cdffe54c5547379bd..092715b9674bb93309b3793ed4ccd2719407e5db 100644 (file)
@@ -80,6 +80,10 @@ static void pnv_setup_rfi_flush(void)
                if (np && of_property_read_bool(np, "disabled"))
                        enable--;
 
+               np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
+               if (np && of_property_read_bool(np, "disabled"))
+                       enable = 0;
+
                of_node_put(np);
                of_node_put(fw_features);
        }
index 372d7ada1a0c115aa9078cb1ba0d01d69a80a73d..1a527625acf78dea27f7bde430e23f02f12f5baf 100644 (file)
@@ -482,7 +482,8 @@ static void pseries_setup_rfi_flush(void)
                if (types == L1D_FLUSH_NONE)
                        types = L1D_FLUSH_FALLBACK;
 
-               if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+               if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
+                   (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
                        enable = false;
        } else {
                /* Default to fallback if case hcall is not available */
index c0319cbf1eec58d7ea8960259838b865c23f49a1..5510366d169aea821e9f09ffe613363a2009db93 100644 (file)
@@ -34,9 +34,9 @@
 #define wmb()          RISCV_FENCE(ow,ow)
 
 /* These barriers do not need to enforce ordering on devices, just memory. */
-#define smp_mb()       RISCV_FENCE(rw,rw)
-#define smp_rmb()      RISCV_FENCE(r,r)
-#define smp_wmb()      RISCV_FENCE(w,w)
+#define __smp_mb()     RISCV_FENCE(rw,rw)
+#define __smp_rmb()    RISCV_FENCE(r,r)
+#define __smp_wmb()    RISCV_FENCE(w,w)
 
 /*
  * This is a very specific barrier: it's currently only used in two places in
index 65154eaa3714a4e9182cb87654e7b896e7be3e2f..6c8ce15cde7b349d45c27c6d2cfbd7436e13322a 100644 (file)
@@ -63,6 +63,7 @@ static inline int init_new_context(struct task_struct *tsk,
                                   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
                /* pgd_alloc() did not account this pmd */
                mm_inc_nr_pmds(mm);
+               mm_inc_nr_puds(mm);
        }
        crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
        return 0;
index 13a133a6015c9adbe1c821269490199bec2cf438..a5621ea6d1234f1c3b6c8aec8612bd933f5cc6f3 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/ctl_reg.h>
+#include <asm/dwarf.h>
 #include <asm/errno.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
@@ -230,7 +231,7 @@ _PIF_WORK   = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
        .hidden \name
        .type \name,@function
 \name:
-       .cfi_startproc
+       CFI_STARTPROC
 #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
        exrl    0,0f
 #else
@@ -239,7 +240,7 @@ _PIF_WORK   = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
 #endif
        j       .
 0:     br      \reg
-       .cfi_endproc
+       CFI_ENDPROC
        .endm
 
        GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1
@@ -426,13 +427,13 @@ ENTRY(system_call)
        UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
        BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
        stmg    %r0,%r7,__PT_R0(%r11)
-       # clear user controlled register to prevent speculative use
-       xgr     %r0,%r0
        mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
        mvc     __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
        mvc     __PT_INT_CODE(4,%r11),__LC_SVC_ILC
        stg     %r14,__PT_FLAGS(%r11)
 .Lsysc_do_svc:
+       # clear user controlled register to prevent speculative use
+       xgr     %r0,%r0
        # load address of system call table
        lg      %r10,__THREAD_sysc_table(%r13,%r12)
        llgh    %r8,__PT_INT_CODE+2(%r11)
@@ -1439,6 +1440,7 @@ cleanup_critical:
        stg     %r15,__LC_SYSTEM_TIMER
 0:     # update accounting time stamp
        mvc     __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+       BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
        # set up saved register r11
        lg      %r15,__LC_KERNEL_STACK
        la      %r9,STACK_FRAME_OVERHEAD(%r15)
index 69d7fcf48158892e6e5aac74bb6b4ab82e7516b9..9aff72d3abda3148a0a6decb358713e6369a43a7 100644 (file)
@@ -2,8 +2,8 @@
 #include <linux/module.h>
 #include <asm/nospec-branch.h>
 
-int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF);
-int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL);
+int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
+int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL);
 
 static int __init nospectre_v2_setup_early(char *str)
 {
index 9c7d707158622e7f0743570db60599fa95a9de3b..07c6e81163bf5e248c1b744f69b75548aeae5d44 100644 (file)
 #include "trace.h"
 #include "trace-s390.h"
 
-
-static const intercept_handler_t instruction_handlers[256] = {
-       [0x01] = kvm_s390_handle_01,
-       [0x82] = kvm_s390_handle_lpsw,
-       [0x83] = kvm_s390_handle_diag,
-       [0xaa] = kvm_s390_handle_aa,
-       [0xae] = kvm_s390_handle_sigp,
-       [0xb2] = kvm_s390_handle_b2,
-       [0xb6] = kvm_s390_handle_stctl,
-       [0xb7] = kvm_s390_handle_lctl,
-       [0xb9] = kvm_s390_handle_b9,
-       [0xe3] = kvm_s390_handle_e3,
-       [0xe5] = kvm_s390_handle_e5,
-       [0xeb] = kvm_s390_handle_eb,
-};
-
 u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
@@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu)
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
        vcpu->stat.exit_instruction++;
        trace_kvm_s390_intercept_instruction(vcpu,
                                             vcpu->arch.sie_block->ipa,
                                             vcpu->arch.sie_block->ipb);
-       handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
-       if (handler)
-               return handler(vcpu);
-       return -EOPNOTSUPP;
+
+       switch (vcpu->arch.sie_block->ipa >> 8) {
+       case 0x01:
+               return kvm_s390_handle_01(vcpu);
+       case 0x82:
+               return kvm_s390_handle_lpsw(vcpu);
+       case 0x83:
+               return kvm_s390_handle_diag(vcpu);
+       case 0xaa:
+               return kvm_s390_handle_aa(vcpu);
+       case 0xae:
+               return kvm_s390_handle_sigp(vcpu);
+       case 0xb2:
+               return kvm_s390_handle_b2(vcpu);
+       case 0xb6:
+               return kvm_s390_handle_stctl(vcpu);
+       case 0xb7:
+               return kvm_s390_handle_lctl(vcpu);
+       case 0xb9:
+               return kvm_s390_handle_b9(vcpu);
+       case 0xe3:
+               return kvm_s390_handle_e3(vcpu);
+       case 0xe5:
+               return kvm_s390_handle_e5(vcpu);
+       case 0xeb:
+               return kvm_s390_handle_eb(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
index aabf46f5f883d44d71cddc88b5ef28ec677ff200..b04616b57a94713a24ed19b142adb94b8c96e748 100644 (file)
@@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 
 static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 {
-       if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm))
+       const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+       const u64 ckc = vcpu->arch.sie_block->ckc;
+
+       if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
+               if ((s64)ckc >= (s64)now)
+                       return 0;
+       } else if (ckc >= now) {
                return 0;
+       }
        return ckc_interrupts_enabled(vcpu);
 }
 
@@ -187,12 +194,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
        return kvm_s390_get_cpu_timer(vcpu) >> 63;
 }
 
-static inline int is_ioirq(unsigned long irq_type)
-{
-       return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
-               (irq_type <= IRQ_PEND_IO_ISC_0));
-}
-
 static uint64_t isc_to_isc_bits(int isc)
 {
        return (0x80 >> isc) << 24;
@@ -236,10 +237,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis
        return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
 }
 
-static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
+static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
 {
        return vcpu->kvm->arch.float_int.pending_irqs |
-               vcpu->arch.local_int.pending_irqs |
+               vcpu->arch.local_int.pending_irqs;
+}
+
+static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
+{
+       return pending_irqs_no_gisa(vcpu) |
                kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
 }
 
@@ -337,7 +343,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 
 static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
 {
-       if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
+       if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))
                return;
        else if (psw_ioint_disabled(vcpu))
                kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
@@ -1011,24 +1017,6 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
        return rc;
 }
 
-typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
-
-static const deliver_irq_t deliver_irq_funcs[] = {
-       [IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
-       [IRQ_PEND_MCHK_REP]       = __deliver_machine_check,
-       [IRQ_PEND_PROG]           = __deliver_prog,
-       [IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
-       [IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
-       [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
-       [IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
-       [IRQ_PEND_RESTART]        = __deliver_restart,
-       [IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
-       [IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
-       [IRQ_PEND_EXT_SERVICE]    = __deliver_service,
-       [IRQ_PEND_PFAULT_DONE]    = __deliver_pfault_done,
-       [IRQ_PEND_VIRTIO]         = __deliver_virtio,
-};
-
 /* Check whether an external call is pending (deliverable or not) */
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -1066,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
 {
-       u64 now, cputm, sltime = 0;
+       const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+       const u64 ckc = vcpu->arch.sie_block->ckc;
+       u64 cputm, sltime = 0;
 
        if (ckc_interrupts_enabled(vcpu)) {
-               now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
-               sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
-               /* already expired or overflow? */
-               if (!sltime || vcpu->arch.sie_block->ckc <= now)
+               if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
+                       if ((s64)now < (s64)ckc)
+                               sltime = tod_to_ns((s64)ckc - (s64)now);
+               } else if (now < ckc) {
+                       sltime = tod_to_ns(ckc - now);
+               }
+               /* already expired */
+               if (!sltime)
                        return 0;
                if (cpu_timer_interrupts_enabled(vcpu)) {
                        cputm = kvm_s390_get_cpu_timer(vcpu);
@@ -1192,7 +1186,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       deliver_irq_t func;
        int rc = 0;
        unsigned long irq_type;
        unsigned long irqs;
@@ -1212,16 +1205,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
        while ((irqs = deliverable_irqs(vcpu)) && !rc) {
                /* bits are in the reverse order of interrupt priority */
                irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
-               if (is_ioirq(irq_type)) {
+               switch (irq_type) {
+               case IRQ_PEND_IO_ISC_0:
+               case IRQ_PEND_IO_ISC_1:
+               case IRQ_PEND_IO_ISC_2:
+               case IRQ_PEND_IO_ISC_3:
+               case IRQ_PEND_IO_ISC_4:
+               case IRQ_PEND_IO_ISC_5:
+               case IRQ_PEND_IO_ISC_6:
+               case IRQ_PEND_IO_ISC_7:
                        rc = __deliver_io(vcpu, irq_type);
-               } else {
-                       func = deliver_irq_funcs[irq_type];
-                       if (!func) {
-                               WARN_ON_ONCE(func == NULL);
-                               clear_bit(irq_type, &li->pending_irqs);
-                               continue;
-                       }
-                       rc = func(vcpu);
+                       break;
+               case IRQ_PEND_MCHK_EX:
+               case IRQ_PEND_MCHK_REP:
+                       rc = __deliver_machine_check(vcpu);
+                       break;
+               case IRQ_PEND_PROG:
+                       rc = __deliver_prog(vcpu);
+                       break;
+               case IRQ_PEND_EXT_EMERGENCY:
+                       rc = __deliver_emergency_signal(vcpu);
+                       break;
+               case IRQ_PEND_EXT_EXTERNAL:
+                       rc = __deliver_external_call(vcpu);
+                       break;
+               case IRQ_PEND_EXT_CLOCK_COMP:
+                       rc = __deliver_ckc(vcpu);
+                       break;
+               case IRQ_PEND_EXT_CPU_TIMER:
+                       rc = __deliver_cpu_timer(vcpu);
+                       break;
+               case IRQ_PEND_RESTART:
+                       rc = __deliver_restart(vcpu);
+                       break;
+               case IRQ_PEND_SET_PREFIX:
+                       rc = __deliver_set_prefix(vcpu);
+                       break;
+               case IRQ_PEND_PFAULT_INIT:
+                       rc = __deliver_pfault_init(vcpu);
+                       break;
+               case IRQ_PEND_EXT_SERVICE:
+                       rc = __deliver_service(vcpu);
+                       break;
+               case IRQ_PEND_PFAULT_DONE:
+                       rc = __deliver_pfault_done(vcpu);
+                       break;
+               case IRQ_PEND_VIRTIO:
+                       rc = __deliver_virtio(vcpu);
+                       break;
+               default:
+                       WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
+                       clear_bit(irq_type, &li->pending_irqs);
                }
        }
 
@@ -1701,7 +1735,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
                kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
                break;
        case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
+               if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa))
+                       kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
                break;
        default:
                kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
index ba4c7092335ad254fe0d385b99aa231dbecd2d53..339ac0964590a1337935ea55ee0cbc7d7c2e441e 100644 (file)
@@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
        { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
        { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
+       { "deliver_io_interrupt", VCPU_STAT(deliver_io_int) },
        { "exit_wait_state", VCPU_STAT(exit_wait_state) },
        { "instruction_epsw", VCPU_STAT(instruction_epsw) },
        { "instruction_gs", VCPU_STAT(instruction_gs) },
@@ -179,6 +180,28 @@ int kvm_arch_hardware_enable(void)
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
                              unsigned long end);
 
+static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
+{
+       u8 delta_idx = 0;
+
+       /*
+        * The TOD jumps by delta, we have to compensate this by adding
+        * -delta to the epoch.
+        */
+       delta = -delta;
+
+       /* sign-extension - we're adding to signed values below */
+       if ((s64)delta < 0)
+               delta_idx = -1;
+
+       scb->epoch += delta;
+       if (scb->ecd & ECD_MEF) {
+               scb->epdx += delta_idx;
+               if (scb->epoch < delta)
+                       scb->epdx += 1;
+       }
+}
+
 /*
  * This callback is executed during stop_machine(). All CPUs are therefore
  * temporarily stopped. In order not to change guest behavior, we have to
@@ -194,13 +217,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
        unsigned long long *delta = v;
 
        list_for_each_entry(kvm, &vm_list, vm_list) {
-               kvm->arch.epoch -= *delta;
                kvm_for_each_vcpu(i, vcpu, kvm) {
-                       vcpu->arch.sie_block->epoch -= *delta;
+                       kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
+                       if (i == 0) {
+                               kvm->arch.epoch = vcpu->arch.sie_block->epoch;
+                               kvm->arch.epdx = vcpu->arch.sie_block->epdx;
+                       }
                        if (vcpu->arch.cputm_enabled)
                                vcpu->arch.cputm_start += *delta;
                        if (vcpu->arch.vsie_block)
-                               vcpu->arch.vsie_block->epoch -= *delta;
+                               kvm_clock_sync_scb(vcpu->arch.vsie_block,
+                                                  *delta);
                }
        }
        return NOTIFY_OK;
@@ -902,12 +929,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
                return -EFAULT;
 
-       if (test_kvm_facility(kvm, 139))
-               kvm_s390_set_tod_clock_ext(kvm, &gtod);
-       else if (gtod.epoch_idx == 0)
-               kvm_s390_set_tod_clock(kvm, gtod.tod);
-       else
+       if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
                return -EINVAL;
+       kvm_s390_set_tod_clock(kvm, &gtod);
 
        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
                gtod.epoch_idx, gtod.tod);
@@ -932,13 +956,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 
 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 {
-       u64 gtod;
+       struct kvm_s390_vm_tod_clock gtod = { 0 };
 
-       if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
+       if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
+                          sizeof(gtod.tod)))
                return -EFAULT;
 
-       kvm_s390_set_tod_clock(kvm, gtod);
-       VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
+       kvm_s390_set_tod_clock(kvm, &gtod);
+       VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
        return 0;
 }
 
@@ -2122,6 +2147,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu)
                /* we still need the basic sca for the ipte control */
                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+               return;
        }
        read_lock(&vcpu->kvm->arch.sca_lock);
        if (vcpu->kvm->arch.use_esca) {
@@ -2389,6 +2415,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
        mutex_lock(&vcpu->kvm->lock);
        preempt_disable();
        vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+       vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
        preempt_enable();
        mutex_unlock(&vcpu->kvm->lock);
        if (!kvm_is_ucontrol(vcpu->kvm)) {
@@ -3021,8 +3048,8 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
-                                const struct kvm_s390_vm_tod_clock *gtod)
+void kvm_s390_set_tod_clock(struct kvm *kvm,
+                           const struct kvm_s390_vm_tod_clock *gtod)
 {
        struct kvm_vcpu *vcpu;
        struct kvm_s390_tod_clock_ext htod;
@@ -3034,10 +3061,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
        get_tod_clock_ext((char *)&htod);
 
        kvm->arch.epoch = gtod->tod - htod.tod;
-       kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
-
-       if (kvm->arch.epoch > gtod->tod)
-               kvm->arch.epdx -= 1;
+       kvm->arch.epdx = 0;
+       if (test_kvm_facility(kvm, 139)) {
+               kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
+               if (kvm->arch.epoch > gtod->tod)
+                       kvm->arch.epdx -= 1;
+       }
 
        kvm_s390_vcpu_block_all(kvm);
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3050,22 +3079,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
        mutex_unlock(&kvm->lock);
 }
 
-void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
-{
-       struct kvm_vcpu *vcpu;
-       int i;
-
-       mutex_lock(&kvm->lock);
-       preempt_disable();
-       kvm->arch.epoch = tod - get_tod_clock();
-       kvm_s390_vcpu_block_all(kvm);
-       kvm_for_each_vcpu(i, vcpu, kvm)
-               vcpu->arch.sie_block->epoch = kvm->arch.epoch;
-       kvm_s390_vcpu_unblock_all(kvm);
-       preempt_enable();
-       mutex_unlock(&kvm->lock);
-}
-
 /**
  * kvm_arch_fault_in_page - fault-in guest page if necessary
  * @vcpu: The corresponding virtual cpu
index bd31b37b0e6f83905e7204b2eb439050aaeb1187..f55ac0ef99ea70bf3fb1d8f7eb7e303eb405b2b4 100644 (file)
@@ -19,8 +19,6 @@
 #include <asm/processor.h>
 #include <asm/sclp.h>
 
-typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
-
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)    ((vcpu->arch.sie_block->ecb & ECB_TE))
 #define TDB_FORMAT1            1
@@ -283,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 
 /* implemented in kvm-s390.c */
-void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
-                                const struct kvm_s390_vm_tod_clock *gtod);
-void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
+void kvm_s390_set_tod_clock(struct kvm *kvm,
+                           const struct kvm_s390_vm_tod_clock *gtod);
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
index c4c4e157c03631a1d745ccafdfec111a6a08a49e..f0b4185158afcb229cac5b06f23f5664afffebec 100644 (file)
@@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
 /* Handle SCK (SET CLOCK) interception */
 static int handle_set_clock(struct kvm_vcpu *vcpu)
 {
+       struct kvm_s390_vm_tod_clock gtod = { 0 };
        int rc;
        u8 ar;
-       u64 op2, val;
+       u64 op2;
 
        vcpu->stat.instruction_sck++;
 
@@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
        op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (op2 & 7)    /* Operand must be on a doubleword boundary */
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
+       rc = read_guest(vcpu, op2, ar, &gtod.tod, sizeof(gtod.tod));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
-       VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
-       kvm_s390_set_tod_clock(vcpu->kvm, val);
+       VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
+       kvm_s390_set_tod_clock(vcpu->kvm, &gtod);
 
        kvm_s390_set_psw_cc(vcpu, 0);
        return 0;
@@ -795,55 +796,60 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
        return rc;
 }
 
-static const intercept_handler_t b2_handlers[256] = {
-       [0x02] = handle_stidp,
-       [0x04] = handle_set_clock,
-       [0x10] = handle_set_prefix,
-       [0x11] = handle_store_prefix,
-       [0x12] = handle_store_cpu_address,
-       [0x14] = kvm_s390_handle_vsie,
-       [0x21] = handle_ipte_interlock,
-       [0x29] = handle_iske,
-       [0x2a] = handle_rrbe,
-       [0x2b] = handle_sske,
-       [0x2c] = handle_test_block,
-       [0x30] = handle_io_inst,
-       [0x31] = handle_io_inst,
-       [0x32] = handle_io_inst,
-       [0x33] = handle_io_inst,
-       [0x34] = handle_io_inst,
-       [0x35] = handle_io_inst,
-       [0x36] = handle_io_inst,
-       [0x37] = handle_io_inst,
-       [0x38] = handle_io_inst,
-       [0x39] = handle_io_inst,
-       [0x3a] = handle_io_inst,
-       [0x3b] = handle_io_inst,
-       [0x3c] = handle_io_inst,
-       [0x50] = handle_ipte_interlock,
-       [0x56] = handle_sthyi,
-       [0x5f] = handle_io_inst,
-       [0x74] = handle_io_inst,
-       [0x76] = handle_io_inst,
-       [0x7d] = handle_stsi,
-       [0xb1] = handle_stfl,
-       [0xb2] = handle_lpswe,
-};
-
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
-       /*
-        * A lot of B2 instructions are priviledged. Here we check for
-        * the privileged ones, that we can handle in the kernel.
-        * Anything else goes to userspace.
-        */
-       handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-       if (handler)
-               return handler(vcpu);
-
-       return -EOPNOTSUPP;
+       switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+       case 0x02:
+               return handle_stidp(vcpu);
+       case 0x04:
+               return handle_set_clock(vcpu);
+       case 0x10:
+               return handle_set_prefix(vcpu);
+       case 0x11:
+               return handle_store_prefix(vcpu);
+       case 0x12:
+               return handle_store_cpu_address(vcpu);
+       case 0x14:
+               return kvm_s390_handle_vsie(vcpu);
+       case 0x21:
+       case 0x50:
+               return handle_ipte_interlock(vcpu);
+       case 0x29:
+               return handle_iske(vcpu);
+       case 0x2a:
+               return handle_rrbe(vcpu);
+       case 0x2b:
+               return handle_sske(vcpu);
+       case 0x2c:
+               return handle_test_block(vcpu);
+       case 0x30:
+       case 0x31:
+       case 0x32:
+       case 0x33:
+       case 0x34:
+       case 0x35:
+       case 0x36:
+       case 0x37:
+       case 0x38:
+       case 0x39:
+       case 0x3a:
+       case 0x3b:
+       case 0x3c:
+       case 0x5f:
+       case 0x74:
+       case 0x76:
+               return handle_io_inst(vcpu);
+       case 0x56:
+               return handle_sthyi(vcpu);
+       case 0x7d:
+               return handle_stsi(vcpu);
+       case 0xb1:
+               return handle_stfl(vcpu);
+       case 0xb2:
+               return handle_lpswe(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int handle_epsw(struct kvm_vcpu *vcpu)
@@ -1105,25 +1111,22 @@ static int handle_essa(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static const intercept_handler_t b9_handlers[256] = {
-       [0x8a] = handle_ipte_interlock,
-       [0x8d] = handle_epsw,
-       [0x8e] = handle_ipte_interlock,
-       [0x8f] = handle_ipte_interlock,
-       [0xab] = handle_essa,
-       [0xaf] = handle_pfmf,
-};
-
 int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
-       /* This is handled just as for the B2 instructions. */
-       handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-       if (handler)
-               return handler(vcpu);
-
-       return -EOPNOTSUPP;
+       switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+       case 0x8a:
+       case 0x8e:
+       case 0x8f:
+               return handle_ipte_interlock(vcpu);
+       case 0x8d:
+               return handle_epsw(vcpu);
+       case 0xab:
+               return handle_essa(vcpu);
+       case 0xaf:
+               return handle_pfmf(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
@@ -1271,22 +1274,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
        return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
-static const intercept_handler_t eb_handlers[256] = {
-       [0x2f] = handle_lctlg,
-       [0x25] = handle_stctg,
-       [0x60] = handle_ri,
-       [0x61] = handle_ri,
-       [0x62] = handle_ri,
-};
-
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
-       handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
-       if (handler)
-               return handler(vcpu);
-       return -EOPNOTSUPP;
+       switch (vcpu->arch.sie_block->ipb & 0x000000ff) {
+       case 0x25:
+               return handle_stctg(vcpu);
+       case 0x2f:
+               return handle_lctlg(vcpu);
+       case 0x60:
+       case 0x61:
+       case 0x62:
+               return handle_ri(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int handle_tprot(struct kvm_vcpu *vcpu)
@@ -1346,10 +1347,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 
 int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
 {
-       /* For e5xx... instructions we only handle TPROT */
-       if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01)
+       switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+       case 0x01:
                return handle_tprot(vcpu);
-       return -EOPNOTSUPP;
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int handle_sckpf(struct kvm_vcpu *vcpu)
@@ -1380,17 +1383,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static const intercept_handler_t x01_handlers[256] = {
-       [0x04] = handle_ptff,
-       [0x07] = handle_sckpf,
-};
-
 int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
-       handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-       if (handler)
-               return handler(vcpu);
-       return -EOPNOTSUPP;
+       switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+       case 0x04:
+               return handle_ptff(vcpu);
+       case 0x07:
+               return handle_sckpf(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
index ec772700ff9659350543534272a92eb531bbd181..8961e3970901d4b06c87b20f147115b683ad5170 100644 (file)
@@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 {
        struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
        struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+       int guest_bp_isolation;
        int rc;
 
        handle_last_fault(vcpu, vsie_page);
@@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
                s390_handle_mcck();
 
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+
+       /* save current guest state of bp isolation override */
+       guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
+
+       /*
+        * The guest is running with BPBC, so we have to force it on for our
+        * nested guest. This is done by enabling BPBC globally, so the BPBC
+        * control in the SCB (which the nested guest can modify) is simply
+        * ignored.
+        */
+       if (test_kvm_facility(vcpu->kvm, 82) &&
+           vcpu->arch.sie_block->fpf & FPF_BPBC)
+               set_thread_flag(TIF_ISOLATE_BP_GUEST);
+
        local_irq_disable();
        guest_enter_irqoff();
        local_irq_enable();
@@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
        local_irq_disable();
        guest_exit_irqoff();
        local_irq_enable();
+
+       /* restore guest state for bp isolation override */
+       if (!guest_bp_isolation)
+               clear_thread_flag(TIF_ISOLATE_BP_GUEST);
+
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
        if (rc == -EINTR) {
index 715def00a436c21a1822e18251b83f6afe86941a..01d0f7fb14cce7b8e0afc17798d6cc8a1d2938d1 100644 (file)
@@ -1 +1,3 @@
-obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o
+ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"")
+obj-y += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o
+endif
index 847ddffbf38ad797afbdef3777cdfea552f282d3..b5cfab7116514814cd244fb89f485d75c104e946 100644 (file)
@@ -163,13 +163,10 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
        pte_unmap(pte);
 }
 
-void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-               pmd_t *pmdp, pmd_t pmd)
-{
-       pmd_t orig = *pmdp;
-
-       *pmdp = pmd;
 
+static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr,
+                          pmd_t orig, pmd_t pmd)
+{
        if (mm == &init_mm)
                return;
 
@@ -219,6 +216,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
        }
 }
 
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+               pmd_t *pmdp, pmd_t pmd)
+{
+       pmd_t orig = *pmdp;
+
+       *pmdp = pmd;
+       __set_pmd_acct(mm, addr, orig, pmd);
+}
+
 static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
                unsigned long address, pmd_t *pmdp, pmd_t pmd)
 {
@@ -227,6 +233,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
        do {
                old = *pmdp;
        } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
+       __set_pmd_acct(vma->vm_mm, address, old, pmd);
 
        return old;
 }
index c1236b187824e222a2c7fddd417272369767b06b..0fa71a78ec99a9ae2e4dcbbadfa74773a5ebc031 100644 (file)
@@ -430,6 +430,7 @@ config GOLDFISH
 config RETPOLINE
        bool "Avoid speculative indirect branches in kernel"
        default y
+       select STACK_VALIDATION if HAVE_STACK_VALIDATION
        help
          Compile kernel with the retpoline compiler options to guard against
          kernel-to-user data leaks by avoiding speculative indirect
@@ -2306,7 +2307,7 @@ choice
          it can be used to assist security vulnerability exploitation.
 
          This setting can be changed at boot time via the kernel command
-         line parameter vsyscall=[native|emulate|none].
+         line parameter vsyscall=[emulate|none].
 
          On a system with recent enough glibc (2.14 or newer) and no
          static binaries, you can say None without a performance penalty
@@ -2314,15 +2315,6 @@ choice
 
          If unsure, select "Emulate".
 
-       config LEGACY_VSYSCALL_NATIVE
-               bool "Native"
-               help
-                 Actual executable code is located in the fixed vsyscall
-                 address mapping, implementing time() efficiently. Since
-                 this makes the mapping executable, it can be used during
-                 security vulnerability exploitation (traditionally as
-                 ROP gadgets). This configuration is not recommended.
-
        config LEGACY_VSYSCALL_EMULATE
                bool "Emulate"
                help
index fad55160dcb94a28e60d537d3d69d471a1e10e2e..498c1b8123006add6ad685a1fea6239208509810 100644 (file)
@@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 # Avoid indirect branches in kernel to deal with Spectre
 ifdef CONFIG_RETPOLINE
-    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
-    ifneq ($(RETPOLINE_CFLAGS),)
-        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
-    endif
+ifneq ($(RETPOLINE_CFLAGS),)
+  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+endif
 endif
 
 archscripts: scripts_basic
index 353e20c3f114f3132ef18dbc03e7a8110f4e22df..886a9115af6229d058cab4f0600ebd666fc9e6d3 100644 (file)
@@ -439,7 +439,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
        struct efi_uga_draw_protocol *uga = NULL, *first_uga;
        efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
        unsigned long nr_ugas;
-       u32 *handles = (u32 *)uga_handle;;
+       u32 *handles = (u32 *)uga_handle;
        efi_status_t status = EFI_INVALID_PARAMETER;
        int i;
 
@@ -484,7 +484,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
        struct efi_uga_draw_protocol *uga = NULL, *first_uga;
        efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
        unsigned long nr_ugas;
-       u64 *handles = (u64 *)uga_handle;;
+       u64 *handles = (u64 *)uga_handle;
        efi_status_t status = EFI_INVALID_PARAMETER;
        int i;
 
index dce7092ab24a247c1165f80b17c687d255023a05..be63330c551129cb6bf0fcf07d8bf6d4600cb098 100644 (file)
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS  21*8
 
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
        /*
         * Push registers and sanitize registers of values that a
         * speculation attack might otherwise want to exploit. The
@@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with
         * could be put to use in a speculative execution gadget.
         * Interleave XOR with PUSH for better uop scheduling:
         */
+       .if \save_ret
+       pushq   %rsi            /* pt_regs->si */
+       movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
+       movq    %rdi, 8(%rsp)   /* pt_regs->di (overwriting original return address) */
+       .else
        pushq   %rdi            /* pt_regs->di */
        pushq   %rsi            /* pt_regs->si */
+       .endif
        pushq   \rdx            /* pt_regs->dx */
        pushq   %rcx            /* pt_regs->cx */
        pushq   \rax            /* pt_regs->ax */
        pushq   %r8             /* pt_regs->r8 */
-       xorq    %r8, %r8        /* nospec   r8 */
+       xorl    %r8d, %r8d      /* nospec   r8 */
        pushq   %r9             /* pt_regs->r9 */
-       xorq    %r9, %r9        /* nospec   r9 */
+       xorl    %r9d, %r9d      /* nospec   r9 */
        pushq   %r10            /* pt_regs->r10 */
-       xorq    %r10, %r10      /* nospec   r10 */
+       xorl    %r10d, %r10d    /* nospec   r10 */
        pushq   %r11            /* pt_regs->r11 */
-       xorq    %r11, %r11      /* nospec   r11*/
+       xorl    %r11d, %r11d    /* nospec   r11*/
        pushq   %rbx            /* pt_regs->rbx */
        xorl    %ebx, %ebx      /* nospec   rbx*/
        pushq   %rbp            /* pt_regs->rbp */
        xorl    %ebp, %ebp      /* nospec   rbp*/
        pushq   %r12            /* pt_regs->r12 */
-       xorq    %r12, %r12      /* nospec   r12*/
+       xorl    %r12d, %r12d    /* nospec   r12*/
        pushq   %r13            /* pt_regs->r13 */
-       xorq    %r13, %r13      /* nospec   r13*/
+       xorl    %r13d, %r13d    /* nospec   r13*/
        pushq   %r14            /* pt_regs->r14 */
-       xorq    %r14, %r14      /* nospec   r14*/
+       xorl    %r14d, %r14d    /* nospec   r14*/
        pushq   %r15            /* pt_regs->r15 */
-       xorq    %r15, %r15      /* nospec   r15*/
+       xorl    %r15d, %r15d    /* nospec   r15*/
        UNWIND_HINT_REGS
+       .if \save_ret
+       pushq   %rsi            /* return address on top of stack */
+       .endif
 .endm
 
 .macro POP_REGS pop_rdi=1 skip_r11rcx=0
@@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with
  */
 .macro ENCODE_FRAME_POINTER ptregs_offset=0
 #ifdef CONFIG_FRAME_POINTER
-       .if \ptregs_offset
-               leaq \ptregs_offset(%rsp), %rbp
-       .else
-               mov %rsp, %rbp
-       .endif
-       orq     $0x1, %rbp
+       leaq 1+\ptregs_offset(%rsp), %rbp
 #endif
 .endm
 
index 16c2c022540d42b9fc51ee7489383775bd319ac1..6ad064c8cf35e6fdfc9c384212a6f76c780ffd69 100644 (file)
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       /* Clobbers %ebx */
-       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
index 8971bd64d515c5bb4a9b95108fd802b8418764f2..805f52703ee3377f88180eb3467a34bf2bc40e87 100644 (file)
@@ -55,7 +55,7 @@ END(native_usergs_sysret64)
 
 .macro TRACE_IRQS_FLAGS flags:req
 #ifdef CONFIG_TRACE_IRQFLAGS
-       bt      $9, \flags              /* interrupts off? */
+       btl     $9, \flags              /* interrupts off? */
        jnc     1f
        TRACE_IRQS_ON
 1:
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       /* Clobbers %rbx */
-       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
@@ -449,9 +448,19 @@ END(irq_entries_start)
  *
  * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
  */
-.macro ENTER_IRQ_STACK regs=1 old_rsp
+.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
        DEBUG_ENTRY_ASSERT_IRQS_OFF
+
+       .if \save_ret
+       /*
+        * If save_ret is set, the original stack contains one additional
+        * entry -- the return address. Therefore, move the address one
+        * entry below %rsp to \old_rsp.
+        */
+       leaq    8(%rsp), \old_rsp
+       .else
        movq    %rsp, \old_rsp
+       .endif
 
        .if \regs
        UNWIND_HINT_REGS base=\old_rsp
@@ -497,6 +506,15 @@ END(irq_entries_start)
        .if \regs
        UNWIND_HINT_REGS indirect=1
        .endif
+
+       .if \save_ret
+       /*
+        * Push the return address to the stack. This return address can
+        * be found at the "real" original RSP, which was offset by 8 at
+        * the beginning of this macro.
+        */
+       pushq   -8(\old_rsp)
+       .endif
 .endm
 
 /*
@@ -520,27 +538,65 @@ END(irq_entries_start)
 .endm
 
 /*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers in fast path.
+ * Interrupt entry helper function.
  *
- * Entry runs with interrupts off.
+ * Entry runs with interrupts off. Stack layout at entry:
+ * +----------------------------------------------------+
+ * | regs->ss                                          |
+ * | regs->rsp                                         |
+ * | regs->eflags                                      |
+ * | regs->cs                                          |
+ * | regs->ip                                          |
+ * +----------------------------------------------------+
+ * | regs->orig_ax = ~(interrupt number)               |
+ * +----------------------------------------------------+
+ * | return address                                    |
+ * +----------------------------------------------------+
  */
-
-/* 0(%rsp): ~(interrupt number) */
-       .macro interrupt func
+ENTRY(interrupt_entry)
+       UNWIND_HINT_FUNC
+       ASM_CLAC
        cld
 
-       testb   $3, CS-ORIG_RAX(%rsp)
+       testb   $3, CS-ORIG_RAX+8(%rsp)
        jz      1f
        SWAPGS
-       call    switch_to_thread_stack
+
+       /*
+        * Switch to the thread stack. The IRET frame and orig_ax are
+        * on the stack, as well as the return address. RDI..R12 are
+        * not (yet) on the stack and space has not (yet) been
+        * allocated for them.
+        */
+       pushq   %rdi
+
+       /* Need to switch before accessing the thread stack. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+        /*
+         * We have RDI, return address, and orig_ax on the stack on
+         * top of the IRET frame. That means offset=24
+         */
+       UNWIND_HINT_IRET_REGS base=%rdi offset=24
+
+       pushq   7*8(%rdi)               /* regs->ss */
+       pushq   6*8(%rdi)               /* regs->rsp */
+       pushq   5*8(%rdi)               /* regs->eflags */
+       pushq   4*8(%rdi)               /* regs->cs */
+       pushq   3*8(%rdi)               /* regs->ip */
+       pushq   2*8(%rdi)               /* regs->orig_ax */
+       pushq   8(%rdi)                 /* return address */
+       UNWIND_HINT_FUNC
+
+       movq    (%rdi), %rdi
 1:
 
-       PUSH_AND_CLEAR_REGS
-       ENCODE_FRAME_POINTER
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
 
-       testb   $3, CS(%rsp)
+       testb   $3, CS+8(%rsp)
        jz      1f
 
        /*
@@ -548,7 +604,7 @@ END(irq_entries_start)
         *
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
-        * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
+        * (which can take locks).  Since TRACE_IRQS_OFF is idempotent,
         * the simplest way to handle it is to just call it twice if
         * we enter from user mode.  There's no reason to optimize this since
         * TRACE_IRQS_OFF is a no-op if lockdep is off.
@@ -558,12 +614,15 @@ END(irq_entries_start)
        CALL_enter_from_user_mode
 
 1:
-       ENTER_IRQ_STACK old_rsp=%rdi
+       ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
        /* We entered an interrupt context - irqs are off: */
        TRACE_IRQS_OFF
 
-       call    \func   /* rdi points to pt_regs */
-       .endm
+       ret
+END(interrupt_entry)
+
+
+/* Interrupt entry/exit. */
 
        /*
         * The interrupt stubs push (~vector+0x80) onto the stack and
@@ -571,9 +630,10 @@ END(irq_entries_start)
         */
        .p2align CONFIG_X86_L1_CACHE_SHIFT
 common_interrupt:
-       ASM_CLAC
        addq    $-0x80, (%rsp)                  /* Adjust vector to [-256, -1] range */
-       interrupt do_IRQ
+       call    interrupt_entry
+       UNWIND_HINT_REGS indirect=1
+       call    do_IRQ  /* rdi points to pt_regs */
        /* 0(%rsp): old RSP */
 ret_from_intr:
        DISABLE_INTERRUPTS(CLBR_ANY)
@@ -766,10 +826,11 @@ END(common_interrupt)
 .macro apicinterrupt3 num sym do_sym
 ENTRY(\sym)
        UNWIND_HINT_IRET_REGS
-       ASM_CLAC
        pushq   $~(\num)
 .Lcommon_\sym:
-       interrupt \do_sym
+       call    interrupt_entry
+       UNWIND_HINT_REGS indirect=1
+       call    \do_sym /* rdi points to pt_regs */
        jmp     ret_from_intr
 END(\sym)
 .endm
@@ -832,34 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
 
-/*
- * Switch to the thread stack.  This is called with the IRET frame and
- * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
- * space has not been allocated for them.)
- */
-ENTRY(switch_to_thread_stack)
-       UNWIND_HINT_FUNC
-
-       pushq   %rdi
-       /* Need to switch before accessing the thread stack. */
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-       movq    %rsp, %rdi
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
-
-       pushq   7*8(%rdi)               /* regs->ss */
-       pushq   6*8(%rdi)               /* regs->rsp */
-       pushq   5*8(%rdi)               /* regs->eflags */
-       pushq   4*8(%rdi)               /* regs->cs */
-       pushq   3*8(%rdi)               /* regs->ip */
-       pushq   2*8(%rdi)               /* regs->orig_ax */
-       pushq   8(%rdi)                 /* return address */
-       UNWIND_HINT_FUNC
-
-       movq    (%rdi), %rdi
-       ret
-END(switch_to_thread_stack)
-
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
        UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -875,12 +908,8 @@ ENTRY(\sym)
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
        .endif
 
-       /* Save all registers in pt_regs */
-       PUSH_AND_CLEAR_REGS
-       ENCODE_FRAME_POINTER
-
        .if \paranoid < 2
-       testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
+       testb   $3, CS-ORIG_RAX(%rsp)           /* If coming from userspace, switch stacks */
        jnz     .Lfrom_usermode_switch_stack_\@
        .endif
 
@@ -1130,13 +1159,15 @@ idtentry machine_check          do_mce                  has_error_code=0        paranoid=1
 #endif
 
 /*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch gs if needed.
  * Use slow, but surefire "are we in kernel?" check.
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
        UNWIND_HINT_FUNC
        cld
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
        movl    $1, %ebx
        movl    $MSR_GS_BASE, %ecx
        rdmsr
@@ -1181,12 +1212,14 @@ ENTRY(paranoid_exit)
 END(paranoid_exit)
 
 /*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch GS if needed.
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
-       UNWIND_HINT_REGS offset=8
+       UNWIND_HINT_FUNC
        cld
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
 
@@ -1577,8 +1610,6 @@ end_repeat_nmi:
         * frame to point back to repeat_nmi.
         */
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
-       PUSH_AND_CLEAR_REGS
-       ENCODE_FRAME_POINTER
 
        /*
         * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
index fd65e016e4133f5634545062d098fc25fb5c0b1d..08425c42f8b7c726e0daa70bb4e112582a513c20 100644 (file)
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
-       xorq    %r8, %r8                /* nospec   r8 */
+       xorl    %r8d, %r8d              /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
-       xorq    %r9, %r9                /* nospec   r9 */
+       xorl    %r9d, %r9d              /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
-       xorq    %r10, %r10              /* nospec   r10 */
+       xorl    %r10d, %r10d            /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
-       xorq    %r11, %r11              /* nospec   r11 */
+       xorl    %r11d, %r11d            /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
        xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
        xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
-       xorq    %r12, %r12              /* nospec   r12 */
+       xorl    %r12d, %r12d            /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
-       xorq    %r13, %r13              /* nospec   r13 */
+       xorl    %r13d, %r13d            /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
-       xorq    %r14, %r14              /* nospec   r14 */
+       xorl    %r14d, %r14d            /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
-       xorq    %r15, %r15              /* nospec   r15 */
+       xorl    %r15d, %r15d            /* nospec   r15 */
        cld
 
        /*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
        pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
-       xorq    %r8, %r8                /* nospec   r8 */
+       xorl    %r8d, %r8d              /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
-       xorq    %r9, %r9                /* nospec   r9 */
+       xorl    %r9d, %r9d              /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
-       xorq    %r10, %r10              /* nospec   r10 */
+       xorl    %r10d, %r10d            /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
-       xorq    %r11, %r11              /* nospec   r11 */
+       xorl    %r11d, %r11d            /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
        xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
        xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
-       xorq    %r12, %r12              /* nospec   r12 */
+       xorl    %r12d, %r12d            /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
-       xorq    %r13, %r13              /* nospec   r13 */
+       xorl    %r13d, %r13d            /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
-       xorq    %r14, %r14              /* nospec   r14 */
+       xorl    %r14d, %r14d            /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
-       xorq    %r15, %r15              /* nospec   r15 */
+       xorl    %r15d, %r15d            /* nospec   r15 */
 
        /*
         * User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
         */
        SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
 
-       xorq    %r8, %r8
-       xorq    %r9, %r9
-       xorq    %r10, %r10
+       xorl    %r8d, %r8d
+       xorl    %r9d, %r9d
+       xorl    %r10d, %r10d
        swapgs
        sysretl
 END(entry_SYSCALL_compat)
@@ -347,36 +347,47 @@ ENTRY(entry_INT80_compat)
         */
        movl    %eax, %eax
 
+       /* switch to thread stack expects orig_ax and rdi to be pushed */
        pushq   %rax                    /* pt_regs->orig_ax */
+       pushq   %rdi                    /* pt_regs->di */
 
-       /* switch to thread stack expects orig_ax to be pushed */
-       call    switch_to_thread_stack
+       /* Need to switch before accessing the thread stack. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-       pushq   %rdi                    /* pt_regs->di */
+       pushq   6*8(%rdi)               /* regs->ss */
+       pushq   5*8(%rdi)               /* regs->rsp */
+       pushq   4*8(%rdi)               /* regs->eflags */
+       pushq   3*8(%rdi)               /* regs->cs */
+       pushq   2*8(%rdi)               /* regs->ip */
+       pushq   1*8(%rdi)               /* regs->orig_ax */
+
+       pushq   (%rdi)                  /* pt_regs->di */
        pushq   %rsi                    /* pt_regs->si */
        pushq   %rdx                    /* pt_regs->dx */
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
-       xorq    %r8, %r8                /* nospec   r8 */
+       xorl    %r8d, %r8d              /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
-       xorq    %r9, %r9                /* nospec   r9 */
+       xorl    %r9d, %r9d              /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
-       xorq    %r10, %r10              /* nospec   r10 */
+       xorl    %r10d, %r10d            /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
-       xorq    %r11, %r11              /* nospec   r11 */
+       xorl    %r11d, %r11d            /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
        xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp */
        xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   %r12                    /* pt_regs->r12 */
-       xorq    %r12, %r12              /* nospec   r12 */
+       xorl    %r12d, %r12d            /* nospec   r12 */
        pushq   %r13                    /* pt_regs->r13 */
-       xorq    %r13, %r13              /* nospec   r13 */
+       xorl    %r13d, %r13d            /* nospec   r13 */
        pushq   %r14                    /* pt_regs->r14 */
-       xorq    %r14, %r14              /* nospec   r14 */
+       xorl    %r14d, %r14d            /* nospec   r14 */
        pushq   %r15                    /* pt_regs->r15 */
-       xorq    %r15, %r15              /* nospec   r15 */
+       xorl    %r15d, %r15d            /* nospec   r15 */
        cld
 
        /*
@@ -393,15 +404,3 @@ ENTRY(entry_INT80_compat)
        TRACE_IRQS_ON
        jmp     swapgs_restore_regs_and_return_to_usermode
 END(entry_INT80_compat)
-
-ENTRY(stub32_clone)
-       /*
-        * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
-        * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
-        *
-        * The native 64-bit kernel's sys_clone() implements the latter,
-        * so we need to swap arguments here before calling it:
-        */
-       xchg    %r8, %rcx
-       jmp     sys_clone
-ENDPROC(stub32_clone)
index 448ac2161112b7fd05c4388ff1327294499884f5..2a5e99cff8597278412685867f512858254f2b0c 100644 (file)
@@ -8,12 +8,12 @@
 #
 0      i386    restart_syscall         sys_restart_syscall
 1      i386    exit                    sys_exit
-2      i386    fork                    sys_fork                        sys_fork
+2      i386    fork                    sys_fork
 3      i386    read                    sys_read
 4      i386    write                   sys_write
 5      i386    open                    sys_open                        compat_sys_open
 6      i386    close                   sys_close
-7      i386    waitpid                 sys_waitpid                     sys32_waitpid
+7      i386    waitpid                 sys_waitpid                     compat_sys_x86_waitpid
 8      i386    creat                   sys_creat
 9      i386    link                    sys_link
 10     i386    unlink                  sys_unlink
@@ -78,7 +78,7 @@
 69     i386    ssetmask                sys_ssetmask
 70     i386    setreuid                sys_setreuid16
 71     i386    setregid                sys_setregid16
-72     i386    sigsuspend              sys_sigsuspend                  sys_sigsuspend
+72     i386    sigsuspend              sys_sigsuspend
 73     i386    sigpending              sys_sigpending                  compat_sys_sigpending
 74     i386    sethostname             sys_sethostname
 75     i386    setrlimit               sys_setrlimit                   compat_sys_setrlimit
@@ -96,7 +96,7 @@
 87     i386    swapon                  sys_swapon
 88     i386    reboot                  sys_reboot
 89     i386    readdir                 sys_old_readdir                 compat_sys_old_readdir
-90     i386    mmap                    sys_old_mmap                    sys32_mmap
+90     i386    mmap                    sys_old_mmap                    compat_sys_x86_mmap
 91     i386    munmap                  sys_munmap
 92     i386    truncate                sys_truncate                    compat_sys_truncate
 93     i386    ftruncate               sys_ftruncate                   compat_sys_ftruncate
 117    i386    ipc                     sys_ipc                         compat_sys_ipc
 118    i386    fsync                   sys_fsync
 119    i386    sigreturn               sys_sigreturn                   sys32_sigreturn
-120    i386    clone                   sys_clone                       stub32_clone
+120    i386    clone                   sys_clone                       compat_sys_x86_clone
 121    i386    setdomainname           sys_setdomainname
 122    i386    uname                   sys_newuname
 123    i386    modify_ldt              sys_modify_ldt
 177    i386    rt_sigtimedwait         sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait
 178    i386    rt_sigqueueinfo         sys_rt_sigqueueinfo             compat_sys_rt_sigqueueinfo
 179    i386    rt_sigsuspend           sys_rt_sigsuspend
-180    i386    pread64                 sys_pread64                     sys32_pread
-181    i386    pwrite64                sys_pwrite64                    sys32_pwrite
+180    i386    pread64                 sys_pread64                     compat_sys_x86_pread
+181    i386    pwrite64                sys_pwrite64                    compat_sys_x86_pwrite
 182    i386    chown                   sys_chown16
 183    i386    getcwd                  sys_getcwd
 184    i386    capget                  sys_capget
 187    i386    sendfile                sys_sendfile                    compat_sys_sendfile
 188    i386    getpmsg
 189    i386    putpmsg
-190    i386    vfork                   sys_vfork                       sys_vfork
+190    i386    vfork                   sys_vfork
 191    i386    ugetrlimit              sys_getrlimit                   compat_sys_getrlimit
 192    i386    mmap2                   sys_mmap_pgoff
-193    i386    truncate64              sys_truncate64                  sys32_truncate64
-194    i386    ftruncate64             sys_ftruncate64                 sys32_ftruncate64
-195    i386    stat64                  sys_stat64                      sys32_stat64
-196    i386    lstat64                 sys_lstat64                     sys32_lstat64
-197    i386    fstat64                 sys_fstat64                     sys32_fstat64
+193    i386    truncate64              sys_truncate64                  compat_sys_x86_truncate64
+194    i386    ftruncate64             sys_ftruncate64                 compat_sys_x86_ftruncate64
+195    i386    stat64                  sys_stat64                      compat_sys_x86_stat64
+196    i386    lstat64                 sys_lstat64                     compat_sys_x86_lstat64
+197    i386    fstat64                 sys_fstat64                     compat_sys_x86_fstat64
 198    i386    lchown32                sys_lchown
 199    i386    getuid32                sys_getuid
 200    i386    getgid32                sys_getgid
 # 222 is unused
 # 223 is unused
 224    i386    gettid                  sys_gettid
-225    i386    readahead               sys_readahead                   sys32_readahead
+225    i386    readahead               sys_readahead                   compat_sys_x86_readahead
 226    i386    setxattr                sys_setxattr
 227    i386    lsetxattr               sys_lsetxattr
 228    i386    fsetxattr               sys_fsetxattr
 247    i386    io_getevents            sys_io_getevents                compat_sys_io_getevents
 248    i386    io_submit               sys_io_submit                   compat_sys_io_submit
 249    i386    io_cancel               sys_io_cancel
-250    i386    fadvise64               sys_fadvise64                   sys32_fadvise64
+250    i386    fadvise64               sys_fadvise64                   compat_sys_x86_fadvise64
 # 251 is available for reuse (was briefly sys_set_zone_reclaim)
 252    i386    exit_group              sys_exit_group
 253    i386    lookup_dcookie          sys_lookup_dcookie              compat_sys_lookup_dcookie
 269    i386    fstatfs64               sys_fstatfs64                   compat_sys_fstatfs64
 270    i386    tgkill                  sys_tgkill
 271    i386    utimes                  sys_utimes                      compat_sys_utimes
-272    i386    fadvise64_64            sys_fadvise64_64                sys32_fadvise64_64
+272    i386    fadvise64_64            sys_fadvise64_64                compat_sys_x86_fadvise64_64
 273    i386    vserver
 274    i386    mbind                   sys_mbind
 275    i386    get_mempolicy           sys_get_mempolicy               compat_sys_get_mempolicy
 297    i386    mknodat                 sys_mknodat
 298    i386    fchownat                sys_fchownat
 299    i386    futimesat               sys_futimesat                   compat_sys_futimesat
-300    i386    fstatat64               sys_fstatat64                   sys32_fstatat
+300    i386    fstatat64               sys_fstatat64                   compat_sys_x86_fstatat
 301    i386    unlinkat                sys_unlinkat
 302    i386    renameat                sys_renameat
 303    i386    linkat                  sys_linkat
 311    i386    set_robust_list         sys_set_robust_list             compat_sys_set_robust_list
 312    i386    get_robust_list         sys_get_robust_list             compat_sys_get_robust_list
 313    i386    splice                  sys_splice
-314    i386    sync_file_range         sys_sync_file_range             sys32_sync_file_range
+314    i386    sync_file_range         sys_sync_file_range             compat_sys_x86_sync_file_range
 315    i386    tee                     sys_tee
 316    i386    vmsplice                sys_vmsplice                    compat_sys_vmsplice
 317    i386    move_pages              sys_move_pages                  compat_sys_move_pages
 321    i386    signalfd                sys_signalfd                    compat_sys_signalfd
 322    i386    timerfd_create          sys_timerfd_create
 323    i386    eventfd                 sys_eventfd
-324    i386    fallocate               sys_fallocate                   sys32_fallocate
+324    i386    fallocate               sys_fallocate                   compat_sys_x86_fallocate
 325    i386    timerfd_settime         sys_timerfd_settime             compat_sys_timerfd_settime
 326    i386    timerfd_gettime         sys_timerfd_gettime             compat_sys_timerfd_gettime
 327    i386    signalfd4               sys_signalfd4                   compat_sys_signalfd4
index 577fa8adb785baf5ea1c993a2bbc88adf43fbbcc..8560ef68a9d631163934a40415df65d2a495f2bc 100644 (file)
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
-#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
-       NATIVE;
-#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
+static enum { EMULATE, NONE } vsyscall_mode =
+#ifdef CONFIG_LEGACY_VSYSCALL_NONE
        NONE;
 #else
        EMULATE;
@@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
        if (str) {
                if (!strcmp("emulate", str))
                        vsyscall_mode = EMULATE;
-               else if (!strcmp("native", str))
-                       vsyscall_mode = NATIVE;
                else if (!strcmp("none", str))
                        vsyscall_mode = NONE;
                else
@@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
        WARN_ON_ONCE(address != regs->ip);
 
-       /* This should be unreachable in NATIVE mode. */
-       if (WARN_ON(vsyscall_mode == NATIVE))
-               return false;
-
        if (vsyscall_mode == NONE) {
                warn_bad_vsyscall(KERN_INFO, regs,
                                  "vsyscall attempted with vsyscall=none");
@@ -370,9 +362,7 @@ void __init map_vsyscall(void)
 
        if (vsyscall_mode != NONE) {
                __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
-                            vsyscall_mode == NATIVE
-                            ? PAGE_KERNEL_VSYSCALL
-                            : PAGE_KERNEL_VVAR);
+                            PAGE_KERNEL_VVAR);
                set_vsyscall_pgtable_user_bits(swapper_pg_dir);
        }
 
index 6d8044ab10607b6c668bfee0d6366266401e7e2f..22ec65bc033a93c0acd850b76bbb59d202f8c452 100644 (file)
@@ -3606,7 +3606,7 @@ static struct intel_uncore_type skx_uncore_imc = {
 };
 
 static struct attribute *skx_upi_uncore_formats_attr[] = {
-       &format_attr_event_ext.attr,
+       &format_attr_event.attr,
        &format_attr_umask_ext.attr,
        &format_attr_edge.attr,
        &format_attr_inv.attr,
index 96cd33bbfc85494f52e4131f50b37129393d3b8a..6512498bbef69ced1f98c72afb5b11ade91ef4a5 100644 (file)
 #define AA(__x)                ((unsigned long)(__x))
 
 
-asmlinkage long sys32_truncate64(const char __user *filename,
-                                unsigned long offset_low,
-                                unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename,
+                      unsigned long, offset_low, unsigned long, offset_high)
 {
        return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
 }
 
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
-                                 unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd,
+                      unsigned long, offset_low, unsigned long, offset_high)
 {
        return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
 }
@@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
        return 0;
 }
 
-asmlinkage long sys32_stat64(const char __user *filename,
-                            struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename,
+                      struct stat64 __user *, statbuf)
 {
        struct kstat stat;
        int ret = vfs_stat(filename, &stat);
@@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename,
        return ret;
 }
 
-asmlinkage long sys32_lstat64(const char __user *filename,
-                             struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename,
+                      struct stat64 __user *, statbuf)
 {
        struct kstat stat;
        int ret = vfs_lstat(filename, &stat);
@@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename,
        return ret;
 }
 
-asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd,
+                      struct stat64 __user *, statbuf)
 {
        struct kstat stat;
        int ret = vfs_fstat(fd, &stat);
@@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
        return ret;
 }
 
-asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename,
-                             struct stat64 __user *statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd,
+                      const char __user *, filename,
+                      struct stat64 __user *, statbuf, int, flag)
 {
        struct kstat stat;
        int error;
@@ -153,7 +154,7 @@ struct mmap_arg_struct32 {
        unsigned int offset;
 };
 
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
+COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)
 {
        struct mmap_arg_struct32 a;
 
@@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
                               a.offset>>PAGE_SHIFT);
 }
 
-asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
-                             int options)
+COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *,
+                      stat_addr, int, options)
 {
        return compat_sys_wait4(pid, stat_addr, options, NULL);
 }
 
 /* warning: next two assume little endian */
-asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
-                           u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf,
+                      u32, count, u32, poslo, u32, poshi)
 {
        return sys_pread64(fd, ubuf, count,
                         ((loff_t)AA(poshi) << 32) | AA(poslo));
 }
 
-asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
-                            u32 count, u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf,
+                      u32, count, u32, poslo, u32, poshi)
 {
        return sys_pwrite64(fd, ubuf, count,
                          ((loff_t)AA(poshi) << 32) | AA(poslo));
@@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
  * Some system calls that need sign extended arguments. This could be
  * done by a generic wrapper.
  */
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
-                       __u32 len_low, __u32 len_high, int advice)
+COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low,
+                      __u32, offset_high, __u32, len_low, __u32, len_high,
+                      int, advice)
 {
        return sys_fadvise64_64(fd,
                               (((u64)offset_high)<<32) | offset_low,
@@ -202,31 +204,43 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
                                advice);
 }
 
-asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
-                                  size_t count)
+COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo,
+                      unsigned int, off_hi, size_t, count)
 {
        return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
 }
 
-asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
-                                     unsigned n_low, unsigned n_hi,  int flags)
+COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low,
+                      unsigned int, off_hi, unsigned int, n_low,
+                      unsigned int, n_hi, int, flags)
 {
        return sys_sync_file_range(fd,
                                   ((u64)off_hi << 32) | off_low,
                                   ((u64)n_hi << 32) | n_low, flags);
 }
 
-asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
-                               size_t len, int advice)
+COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo,
+                      unsigned int, offset_hi, size_t, len, int, advice)
 {
        return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
                                len, advice);
 }
 
-asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
-                               unsigned offset_hi, unsigned len_lo,
-                               unsigned len_hi)
+COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode,
+                      unsigned int, offset_lo, unsigned int, offset_hi,
+                      unsigned int, len_lo, unsigned int, len_hi)
 {
        return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
                             ((u64)len_hi << 32) | len_lo);
 }
+
+/*
+ * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
+ */
+COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
+                      unsigned long, newsp, int __user *, parent_tidptr,
+                      unsigned long, tls_val, int __user *, child_tidptr)
+{
+       return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr,
+                       tls_val);
+}
index 4d4015ddcf2633e9e8388216f9e9c8639e2eced8..c356098b6fb92b8ff7d42b2fd813c2a8551d3db1 100644 (file)
@@ -7,6 +7,8 @@
 #ifndef _ASM_X86_MACH_DEFAULT_APM_H
 #define _ASM_X86_MACH_DEFAULT_APM_H
 
+#include <asm/nospec-branch.h>
+
 #ifdef APM_ZERO_SEGS
 #      define APM_DO_ZERO_SEGS \
                "pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
         * N.B. We do NOT need a cld after the BIOS call
         * because we always save and restore the flags.
         */
+       firmware_restrict_branch_speculation_start();
        __asm__ __volatile__(APM_DO_ZERO_SEGS
                "pushl %%edi\n\t"
                "pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
                  "=S" (*esi)
                : "a" (func), "b" (ebx_in), "c" (ecx_in)
                : "memory", "cc");
+       firmware_restrict_branch_speculation_end();
 }
 
 static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
         * N.B. We do NOT need a cld after the BIOS call
         * because we always save and restore the flags.
         */
+       firmware_restrict_branch_speculation_start();
        __asm__ __volatile__(APM_DO_ZERO_SEGS
                "pushl %%edi\n\t"
                "pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
                  "=S" (si)
                : "a" (func), "b" (ebx_in), "c" (ecx_in)
                : "memory", "cc");
+       firmware_restrict_branch_speculation_end();
        return error;
 }
 
index 4d111616524b2ee3c8d929ec98dd329b332811aa..1908214b91257f1d2442e2e6fc08b2f4c4f5bf65 100644 (file)
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
 INDIRECT_THUNK(si)
 INDIRECT_THUNK(di)
 INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
 #endif /* CONFIG_RETPOLINE */
index 3fa039855b8f70aa2738468e33bef882b0a398cb..9f645ba57dbb263822600aae5d82138316c8f6e3 100644 (file)
@@ -78,7 +78,7 @@ set_bit(long nr, volatile unsigned long *addr)
                        : "iq" ((u8)CONST_MASK(nr))
                        : "memory");
        } else {
-               asm volatile(LOCK_PREFIX "bts %1,%0"
+               asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
                        : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
        }
 }
@@ -94,7 +94,7 @@ set_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
 {
-       asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
+       asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
 }
 
 /**
@@ -115,7 +115,7 @@ clear_bit(long nr, volatile unsigned long *addr)
                        : CONST_MASK_ADDR(nr, addr)
                        : "iq" ((u8)~CONST_MASK(nr)));
        } else {
-               asm volatile(LOCK_PREFIX "btr %1,%0"
+               asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
                        : BITOP_ADDR(addr)
                        : "Ir" (nr));
        }
@@ -137,7 +137,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
 
 static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
-       asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
+       asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
 }
 
 static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -182,7 +182,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
  */
 static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
 {
-       asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
+       asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
 }
 
 /**
@@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
                        : CONST_MASK_ADDR(nr, addr)
                        : "iq" ((u8)CONST_MASK(nr)));
        } else {
-               asm volatile(LOCK_PREFIX "btc %1,%0"
+               asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
                        : BITOP_ADDR(addr)
                        : "Ir" (nr));
        }
@@ -217,7 +217,8 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
+       GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts),
+                        *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -246,7 +247,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
 {
        bool oldbit;
 
-       asm("bts %2,%1"
+       asm(__ASM_SIZE(bts) " %2,%1"
            CC_SET(c)
            : CC_OUT(c) (oldbit), ADDR
            : "Ir" (nr));
@@ -263,7 +264,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
  */
 static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
+       GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr),
+                        *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -286,7 +288,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
 {
        bool oldbit;
 
-       asm volatile("btr %2,%1"
+       asm volatile(__ASM_SIZE(btr) " %2,%1"
                     CC_SET(c)
                     : CC_OUT(c) (oldbit), ADDR
                     : "Ir" (nr));
@@ -298,7 +300,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
 {
        bool oldbit;
 
-       asm volatile("btc %2,%1"
+       asm volatile(__ASM_SIZE(btc) " %2,%1"
                     CC_SET(c)
                     : CC_OUT(c) (oldbit), ADDR
                     : "Ir" (nr) : "memory");
@@ -316,7 +318,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
  */
 static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
+       GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc),
+                        *addr, "Ir", nr, "%0", c);
 }
 
 static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
@@ -329,7 +332,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
 {
        bool oldbit;
 
-       asm volatile("bt %2,%1"
+       asm volatile(__ASM_SIZE(bt) " %2,%1"
                     CC_SET(c)
                     : CC_OUT(c) (oldbit)
                     : "m" (*(unsigned long *)addr), "Ir" (nr));
index 0dfe4d3f74e24d6655fc40f0460b9e489fb9ef69..d554c11e01ff46742d53148df0ffb9c3476e8d6e 100644 (file)
 #define X86_FEATURE_SEV                        ( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB           ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW                ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW         ( 8*32+ 0) /* Intel TPR Shadow */
 #define X86_FEATURE_VPCLMULQDQ         (16*32+10) /* Carry-Less Multiplication Double Quadword */
 #define X86_FEATURE_AVX512_VNNI                (16*32+11) /* Vector Neural Network Instructions */
 #define X86_FEATURE_AVX512_BITALG      (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME                        (16*32+13) /* Intel Total Memory Encryption */
 #define X86_FEATURE_AVX512_VPOPCNTDQ   (16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57               (16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID              (16*32+22) /* RDPID instruction */
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW      (18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS      (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_PCONFIG            (18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_SPEC_CTRL          (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP                (18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ARCH_CAPABILITIES  (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
index 85f6ccb80b91771029f347f1875f7a6d923ef184..a399c1ebf6f0e6d974da0b773248b9c0cfe811a9 100644 (file)
@@ -6,6 +6,7 @@
 #include <asm/pgtable.h>
 #include <asm/processor-flags.h>
 #include <asm/tlb.h>
+#include <asm/nospec-branch.h>
 
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
 
 extern asmlinkage unsigned long efi_call_phys(void *, ...);
 
-#define arch_efi_call_virt_setup()     kernel_fpu_begin()
-#define arch_efi_call_virt_teardown()  kernel_fpu_end()
+#define arch_efi_call_virt_setup()                                     \
+({                                                                     \
+       kernel_fpu_begin();                                             \
+       firmware_restrict_branch_speculation_start();                   \
+})
+
+#define arch_efi_call_virt_teardown()                                  \
+({                                                                     \
+       firmware_restrict_branch_speculation_end();                     \
+       kernel_fpu_end();                                               \
+})
+
 
 /*
  * Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
        efi_sync_low_kernel_mappings();                                 \
        preempt_disable();                                              \
        __kernel_fpu_begin();                                           \
+       firmware_restrict_branch_speculation_start();                   \
                                                                        \
        if (efi_scratch.use_pgd) {                                      \
                efi_scratch.prev_cr3 = __read_cr3();                    \
@@ -91,6 +103,7 @@ struct efi_scratch {
                __flush_tlb_all();                                      \
        }                                                               \
                                                                        \
+       firmware_restrict_branch_speculation_end();                     \
        __kernel_fpu_end();                                             \
        preempt_enable();                                               \
 })
index dd6f57a54a2626c080c8505cd670ef8c54c7bf56..b605a5b6a30c38f1bdcf647be241516b5e1310b8 100644 (file)
@@ -507,6 +507,7 @@ struct kvm_vcpu_arch {
        u64 smi_count;
        bool tpr_access_reporting;
        u64 ia32_xss;
+       u64 microcode_version;
 
        /*
         * Paging state of the vcpu
@@ -1095,6 +1096,8 @@ struct kvm_x86_ops {
        int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
        int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
        int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+
+       int (*get_msr_feature)(struct kvm_msr_entry *entry);
 };
 
 struct kvm_arch_async_pf {
@@ -1464,7 +1467,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define put_smstate(type, buf, offset, val)                      \
        *(type *)((buf) + (offset) - 0x7e00) = val
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-               unsigned long start, unsigned long end);
-
 #endif /* _ASM_X86_KVM_HOST_H */
index 55520cec8b27d69727092e6fd81d3a0c0f4db252..6cf0e4cb7b9763a7d4d10438017a73aac737720b 100644 (file)
@@ -37,7 +37,13 @@ struct cpu_signature {
 
 struct device;
 
-enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
+enum ucode_state {
+       UCODE_OK        = 0,
+       UCODE_NEW,
+       UCODE_UPDATED,
+       UCODE_NFOUND,
+       UCODE_ERROR,
+};
 
 struct microcode_ops {
        enum ucode_state (*request_microcode_user) (int cpu,
@@ -54,7 +60,7 @@ struct microcode_ops {
         * are being called.
         * See also the "Synchronization" section in microcode_core.c.
         */
-       int (*apply_microcode) (int cpu);
+       enum ucode_state (*apply_microcode) (int cpu);
        int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
 };
 
index c931b88982a0ff59e3b67947cc606e452f327dc0..1de72ce514cd5561dbafea43996d909f449f8766 100644 (file)
@@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)
        return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
 #else
        BUG();
+       return (void *)fix_to_virt(FIX_HOLE);
 #endif
 }
 
index 81a1be3265711acea06a2538b6b1fdd332b0ab19..f928ad9b143fedea1085dedc508658fa745b4ceb 100644 (file)
@@ -8,6 +8,50 @@
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS         16      /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version â€” two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)      \
+       mov     $(nr/2), reg;                   \
+771:                                           \
+       call    772f;                           \
+773:   /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     773b;                           \
+772:                                           \
+       call    774f;                           \
+775:   /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     775b;                           \
+774:                                           \
+       dec     reg;                            \
+       jnz     771b;                           \
+       add     $(BITS_PER_LONG/8) * nr, sp;
+
 #ifdef __ASSEMBLY__
 
 /*
        .popsection
 .endm
 
+/*
+ * This should be used immediately before an indirect jump/call. It tells
+ * objtool the subsequent indirect jump/call is vouched safe for retpoline
+ * builds.
+ */
+.macro ANNOTATE_RETPOLINE_SAFE
+       .Lannotate_\@:
+       .pushsection .discard.retpoline_safe
+       _ASM_PTR .Lannotate_\@
+       .popsection
+.endm
+
 /*
  * These are the bare retpoline primitives for indirect jmp and call.
  * Do not use these directly; they only exist to make the ALTERNATIVE
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
        ANNOTATE_NOSPEC_ALTERNATIVE
-       ALTERNATIVE_2 __stringify(jmp *\reg),                           \
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),  \
                __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
-               __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+               __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
 #else
        jmp     *\reg
 #endif
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
        ANNOTATE_NOSPEC_ALTERNATIVE
-       ALTERNATIVE_2 __stringify(call *\reg),                          \
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
                __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
-               __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+               __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
 #else
        call    *\reg
 #endif
 .endm
 
-/* This clobbers the BX register */
-.macro FILL_RETURN_BUFFER nr:req ftr:req
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
 #ifdef CONFIG_RETPOLINE
-       ALTERNATIVE "", "call __clear_rsb", \ftr
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE "jmp .Lskip_rsb_\@",                                \
+               __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))    \
+               \ftr
+.Lskip_rsb_\@:
 #endif
 .endm
 
        ".long 999b - .\n\t"                                    \
        ".popsection\n\t"
 
+#define ANNOTATE_RETPOLINE_SAFE                                        \
+       "999:\n\t"                                              \
+       ".pushsection .discard.retpoline_safe\n\t"              \
+       _ASM_PTR " 999b\n\t"                                    \
+       ".popsection\n\t"
+
 #if defined(CONFIG_X86_64) && defined(RETPOLINE)
 
 /*
 # define CALL_NOSPEC                                           \
        ANNOTATE_NOSPEC_ALTERNATIVE                             \
        ALTERNATIVE(                                            \
+       ANNOTATE_RETPOLINE_SAFE                                 \
        "call *%[thunk_target]\n",                              \
        "call __x86_indirect_thunk_%V[thunk_target]\n",         \
        X86_FEATURE_RETPOLINE)
  * otherwise we'll run out of registers. We don't care about CET
  * here, anyway.
  */
-# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",    \
+# define CALL_NOSPEC                                           \
+       ALTERNATIVE(                                            \
+       ANNOTATE_RETPOLINE_SAFE                                 \
+       "call *%[thunk_target]\n",                              \
        "       jmp    904f;\n"                                 \
        "       .align 16\n"                                    \
        "901:   call   903f;\n"                                 \
@@ -156,26 +229,54 @@ extern char __indirect_thunk_end[];
 static inline void vmexit_fill_RSB(void)
 {
 #ifdef CONFIG_RETPOLINE
-       alternative_input("",
-                         "call __fill_rsb",
-                         X86_FEATURE_RETPOLINE,
-                         ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+       unsigned long loops;
+
+       asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+                     ALTERNATIVE("jmp 910f",
+                                 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+                                 X86_FEATURE_RETPOLINE)
+                     "910:"
+                     : "=r" (loops), ASM_CALL_CONSTRAINT
+                     : : "memory" );
 #endif
 }
 
+#define alternative_msr_write(_msr, _val, _feature)            \
+       asm volatile(ALTERNATIVE("",                            \
+                                "movl %[msr], %%ecx\n\t"       \
+                                "movl %[val], %%eax\n\t"       \
+                                "movl $0, %%edx\n\t"           \
+                                "wrmsr",                       \
+                                _feature)                      \
+                    : : [msr] "i" (_msr), [val] "i" (_val)     \
+                    : "eax", "ecx", "edx", "memory")
+
 static inline void indirect_branch_prediction_barrier(void)
 {
-       asm volatile(ALTERNATIVE("",
-                                "movl %[msr], %%ecx\n\t"
-                                "movl %[val], %%eax\n\t"
-                                "movl $0, %%edx\n\t"
-                                "wrmsr",
-                                X86_FEATURE_USE_IBPB)
-                    : : [msr] "i" (MSR_IA32_PRED_CMD),
-                        [val] "i" (PRED_CMD_IBPB)
-                    : "eax", "ecx", "edx", "memory");
+       alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+                             X86_FEATURE_USE_IBPB);
 }
 
+/*
+ * With retpoline, we must use IBRS to restrict branch prediction
+ * before calling into firmware.
+ *
+ * (Implemented as CPP macros due to header hell.)
+ */
+#define firmware_restrict_branch_speculation_start()                   \
+do {                                                                   \
+       preempt_disable();                                              \
+       alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,       \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+} while (0)
+
+#define firmware_restrict_branch_speculation_end()                     \
+do {                                                                   \
+       alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,                    \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+       preempt_enable();                                               \
+} while (0)
+
 #endif /* __ASSEMBLY__ */
 
 /*
index 554841fab717aef09d2b5cc57410a6eed8c2df0c..c83a2f418cea097bb2c5a9545409c5e6efcbbc50 100644 (file)
@@ -7,6 +7,7 @@
 #ifdef CONFIG_PARAVIRT
 #include <asm/pgtable_types.h>
 #include <asm/asm.h>
+#include <asm/nospec-branch.h>
 
 #include <asm/paravirt_types.h>
 
@@ -879,23 +880,27 @@ extern void default_banner(void);
 
 #define INTERRUPT_RETURN                                               \
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,       \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
 
 #define DISABLE_INTERRUPTS(clobbers)                                   \
        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
                  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);    \
                  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
 #define ENABLE_INTERRUPTS(clobbers)                                    \
        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,  \
                  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);     \
                  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
 #ifdef CONFIG_X86_32
 #define GET_CR0_INTO_EAX                               \
        push %ecx; push %edx;                           \
+       ANNOTATE_RETPOLINE_SAFE;                                \
        call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
        pop %edx; pop %ecx
 #else  /* !CONFIG_X86_32 */
@@ -917,21 +922,25 @@ extern void default_banner(void);
  */
 #define SWAPGS                                                         \
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
-                 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)          \
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
+                 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);         \
                 )
 
 #define GET_CR2_INTO_RAX                               \
-       call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
+       ANNOTATE_RETPOLINE_SAFE;                                \
+       call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
 
 #define USERGS_SYSRET64                                                        \
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
                  CLBR_NONE,                                            \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
 
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(clobbers)                                        \
        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
                  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
+                 ANNOTATE_RETPOLINE_SAFE;                                  \
                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
                  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 #endif
index f624f1f10316c248911585f757ea5bd257e98434..180bc0bff0fbd98195b8e81ce9aa7232dde06ee2 100644 (file)
@@ -43,6 +43,7 @@
 #include <asm/desc_defs.h>
 #include <asm/kmap_types.h>
 #include <asm/pgtable_types.h>
+#include <asm/nospec-branch.h>
 
 struct page;
 struct thread_struct;
@@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
  * offset into the paravirt_patch_template structure, and can therefore be
  * freely converted back into a structure offset.
  */
-#define PARAVIRT_CALL  "call *%c[paravirt_opptr];"
+#define PARAVIRT_CALL                                  \
+       ANNOTATE_RETPOLINE_SAFE                         \
+       "call *%c[paravirt_opptr];"
 
 /*
  * These macros are intended to wrap calls through one of the paravirt
index ba3c523aaf1618fdf6dfb35ade4c643799deec29..a06b07399d172c06b428936dc14e033ae9697298 100644 (file)
@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
 {
        bool oldbit;
 
-       asm volatile("bt "__percpu_arg(2)",%1"
+       asm volatile("btl "__percpu_arg(2)",%1"
                        CC_SET(c)
                        : CC_OUT(c) (oldbit)
                        : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
index 63c2552b6b6547b71bd7aa0934bc3c8c2cb54dc1..b444d83cfc952fc121d77599a938bbcc6c0d864b 100644 (file)
@@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
 {
        pmdval_t v = native_pmd_val(pmd);
 
-       return __pmd(v | set);
+       return native_make_pmd(v | set);
 }
 
 static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
 {
        pmdval_t v = native_pmd_val(pmd);
 
-       return __pmd(v & ~clear);
+       return native_make_pmd(v & ~clear);
 }
 
 static inline pmd_t pmd_mkold(pmd_t pmd)
@@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
 {
        pudval_t v = native_pud_val(pud);
 
-       return __pud(v | set);
+       return native_make_pud(v | set);
 }
 
 static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
 {
        pudval_t v = native_pud_val(pud);
 
-       return __pud(v & ~clear);
+       return native_make_pud(v & ~clear);
 }
 
 static inline pud_t pud_mkold(pud_t pud)
index e55466760ff8e031433132eab676535e614e224f..b3ec519e39827e58eaeb8a567303e37a6bc2e919 100644 (file)
@@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[];
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
 void paging_init(void);
+void sync_initial_page_table(void);
 
 /*
  * Define this if things work differently on an i386 and an i486:
index 81462e9a34f6af49645a08f55c7d67e0144dbb77..1149d2112b2e17347e8f85c4cae355f4522cc40a 100644 (file)
@@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[];
 #define swapper_pg_dir init_top_pgt
 
 extern void paging_init(void);
+static inline void sync_initial_page_table(void) { }
 
 #define pte_ERROR(e)                                   \
        pr_err("%s:%d: bad pte %p(%016lx)\n",           \
index 3696398a9475fe78500c8c0a62922620d975453c..acfe755562a6aa85ecd74294fa1d063093223976 100644 (file)
@@ -174,7 +174,6 @@ enum page_cache_mode {
 #define __PAGE_KERNEL_RO               (__PAGE_KERNEL & ~_PAGE_RW)
 #define __PAGE_KERNEL_RX               (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
 #define __PAGE_KERNEL_NOCACHE          (__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VSYSCALL         (__PAGE_KERNEL_RX | _PAGE_USER)
 #define __PAGE_KERNEL_VVAR             (__PAGE_KERNEL_RO | _PAGE_USER)
 #define __PAGE_KERNEL_LARGE            (__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC       (__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -206,7 +205,6 @@ enum page_cache_mode {
 #define PAGE_KERNEL_NOCACHE    __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE      __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VSYSCALL   __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
 #define PAGE_KERNEL_VVAR       __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
 
 #define PAGE_KERNEL_IO         __pgprot(__PAGE_KERNEL_IO)
@@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud)
 #else
 #include <asm-generic/pgtable-nopud.h>
 
+static inline pud_t native_make_pud(pudval_t val)
+{
+       return (pud_t) { .p4d.pgd = native_make_pgd(val) };
+}
+
 static inline pudval_t native_pud_val(pud_t pud)
 {
        return native_pgd_val(pud.p4d.pgd);
@@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 #else
 #include <asm-generic/pgtable-nopmd.h>
 
+static inline pmd_t native_make_pmd(pmdval_t val)
+{
+       return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
+}
+
 static inline pmdval_t native_pmd_val(pmd_t pmd)
 {
        return native_pgd_val(pmd.pud.p4d.pgd);
index 1bd9ed87606f45f5a22f2510bde9ba34029a0551..b0ccd4847a58ab671476f412905a1df912029f1a 100644 (file)
@@ -977,4 +977,5 @@ bool xen_set_default_idle(void);
 
 void stop_this_cpu(void *dummy);
 void df_debug(struct pt_regs *regs, long error_code);
+void microcode_check(void);
 #endif /* _ASM_X86_PROCESSOR_H */
index 4e44250e7d0d75c6db385dbf151583b31d3e8c19..4cf11d88d3b35f48b89fc0413d6db70ec9877075 100644 (file)
@@ -17,7 +17,7 @@
 #define _REFCOUNT_EXCEPTION                            \
        ".pushsection .text..refcount\n"                \
        "111:\tlea %[counter], %%" _ASM_CX "\n"         \
-       "112:\t" ASM_UD0 "\n"                           \
+       "112:\t" ASM_UD2 "\n"                           \
        ASM_UNREACHABLE                                 \
        ".popsection\n"                                 \
        "113:\n"                                        \
@@ -67,13 +67,13 @@ static __always_inline __must_check
 bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 {
        GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
-                                 r->refs.counter, "er", i, "%0", e);
+                                 r->refs.counter, "er", i, "%0", e, "cx");
 }
 
 static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
 {
        GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
-                                r->refs.counter, "%0", e);
+                                r->refs.counter, "%0", e, "cx");
 }
 
 static __always_inline __must_check
index f91c365e57c36d2454806ff21a2d336dae5c6863..4914a3e7c8035538a167c0dc23a2a33afd4a1ca2 100644 (file)
@@ -2,8 +2,7 @@
 #ifndef _ASM_X86_RMWcc
 #define _ASM_X86_RMWcc
 
-#define __CLOBBERS_MEM         "memory"
-#define __CLOBBERS_MEM_CC_CX   "memory", "cc", "cx"
+#define __CLOBBERS_MEM(clb...) "memory", ## clb
 
 #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
 
@@ -40,18 +39,19 @@ do {                                                                        \
 #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 #define GEN_UNARY_RMWcc(op, var, arg0, cc)                             \
-       __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
+       __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
 
-#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc)            \
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
        __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc,                 \
-                   __CLOBBERS_MEM_CC_CX)
+                   __CLOBBERS_MEM(clobbers))
 
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)                 \
        __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc,                \
-                   __CLOBBERS_MEM, vcon (val))
+                   __CLOBBERS_MEM(), vcon (val))
 
-#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc)        \
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc,        \
+                                 clobbers...)                          \
        __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc,  \
-                   __CLOBBERS_MEM_CC_CX, vcon (val))
+                   __CLOBBERS_MEM(clobbers), vcon (val))
 
 #endif /* _ASM_X86_RMWcc */
index d6baf23782bcc23811c12bac8a6a181c2a6cdb5b..5c019d23d06b1168da0ea965d7c35bebd4d02307 100644 (file)
@@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[];
 
 #if defined(CONFIG_X86_64)
 extern char __end_rodata_hpage_align[];
+extern char __entry_trampoline_start[], __entry_trampoline_end[];
 #endif
 
 #endif /* _ASM_X86_SECTIONS_H */
index 82c34ee25a651760c9950ce6c54625896fd9ea2f..906794aa034e732ec57d32a8be0ef77085a553a6 100644 (file)
 #include <asm/ia32.h>
 
 /* ia32/sys_ia32.c */
-asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long);
-asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
+asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long,
+                                         unsigned long);
+asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long,
+                                          unsigned long);
 
-asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
-asmlinkage long sys32_fstatat(unsigned int, const char __user *,
+asmlinkage long compat_sys_x86_stat64(const char __user *,
+                                     struct stat64 __user *);
+asmlinkage long compat_sys_x86_lstat64(const char __user *,
+                                      struct stat64 __user *);
+asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *);
+asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *,
                              struct stat64 __user *, int);
 struct mmap_arg_struct32;
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
+asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *);
 
-asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
+asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *,
+                                      int);
 
-asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
-asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
+asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32,
+                                    u32);
+asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32,
+                                     u32, u32);
 
-long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
-long sys32_vm86_warning(void);
+asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32,
+                                           int);
 
-asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
-asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
-                                     unsigned, unsigned, int);
-asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
-asmlinkage long sys32_fallocate(int, int, unsigned,
-                               unsigned, unsigned, unsigned);
+asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int,
+                                           size_t);
+asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int,
+                                              unsigned int, unsigned int,
+                                              int);
+asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int,
+                                        size_t, int);
+asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int,
+                                        unsigned int, unsigned int);
+asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *,
+                                    unsigned long, int __user *);
 
 /* ia32/ia32_signal.c */
 asmlinkage long sys32_sigreturn(void);
index 8b67807511329eae2eff2ced3733f68fcf8392c1..5db8b0b1076649fa287ad8ebfe2ba9853cd949d8 100644 (file)
@@ -352,6 +352,7 @@ enum vmcs_field {
 #define INTR_TYPE_NMI_INTR             (2 << 8) /* NMI */
 #define INTR_TYPE_HARD_EXCEPTION       (3 << 8) /* processor exception */
 #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
+#define INTR_TYPE_PRIV_SW_EXCEPTION    (5 << 8) /* ICE breakpoint - undocumented */
 #define INTR_TYPE_SOFT_EXCEPTION       (6 << 8) /* software exception */
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
index 197c2e6c73765c364e519d8c70a1fa953d7d5053..099414345865d588e6c55b63078f8b168fc8e907 100644 (file)
 #define HV_X64_MSR_REENLIGHTENMENT_CONTROL     0x40000106
 
 struct hv_reenlightenment_control {
-       u64 vector:8;
-       u64 reserved1:8;
-       u64 enabled:1;
-       u64 reserved2:15;
-       u64 target_vp:32;
+       __u64 vector:8;
+       __u64 reserved1:8;
+       __u64 enabled:1;
+       __u64 reserved2:15;
+       __u64 target_vp:32;
 };
 
 #define HV_X64_MSR_TSC_EMULATION_CONTROL       0x40000107
 #define HV_X64_MSR_TSC_EMULATION_STATUS                0x40000108
 
 struct hv_tsc_emulation_control {
-       u64 enabled:1;
-       u64 reserved:63;
+       __u64 enabled:1;
+       __u64 reserved:63;
 };
 
 struct hv_tsc_emulation_status {
-       u64 inprogress:1;
-       u64 reserved:63;
+       __u64 inprogress:1;
+       __u64 reserved:63;
 };
 
 #define HV_X64_MSR_HYPERCALL_ENABLE            0x00000001
index 7a2ade4aa235380a8c28af6934d30566bb24de73..6cfa9c8cb7d650bef22010de0eca622a86364a73 100644 (file)
@@ -26,6 +26,7 @@
 #define KVM_FEATURE_PV_EOI             6
 #define KVM_FEATURE_PV_UNHALT          7
 #define KVM_FEATURE_PV_TLB_FLUSH       9
+#define KVM_FEATURE_ASYNC_PF_VMEXIT    10
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
index 91723461dc1feb0d63352c653fe0dafe625af379..435db58a7badec77e38e3d9e7f6ead5954efc4cc 100644 (file)
@@ -30,6 +30,7 @@ struct mce {
        __u64 synd;     /* MCA_SYND MSR: only valid on SMCA systems */
        __u64 ipid;     /* MCA_IPID MSR: only valid on SMCA systems */
        __u64 ppin;     /* Protected Processor Inventory Number */
+       __u32 microcode;/* Microcode revision */
 };
 
 #define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
index 8ad2e410974f2d4b71a44f033e8c046671b89622..7c5538769f7e43f7bac9c8ff9a30b6a26ac89433 100644 (file)
@@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)
        do {
                rep_nop();
                now = rdtsc();
-       } while ((now - start) < 40000000000UL / HZ &&
+       } while ((now - start) < 40000000000ULL / HZ &&
                time_before_eq(jiffies, end));
 }
 
index 3cc471beb50b499d89148bbdee37c697c596f743..bb6f7a2148d7781f64836a14f6f9884bd7955740 100644 (file)
@@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
 {
        struct apic_chip_data *apicd = apic_chip_data(irqd);
        struct irq_desc *desc = irq_data_to_desc(irqd);
+       bool managed = irqd_affinity_is_managed(irqd);
 
        lockdep_assert_held(&vector_lock);
 
        trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
                            apicd->cpu);
 
-       /* Setup the vector move, if required  */
-       if (apicd->vector && cpu_online(apicd->cpu)) {
+       /*
+        * If there is no vector associated or if the associated vector is
+        * the shutdown vector, which is associated to make PCI/MSI
+        * shutdown mode work, then there is nothing to release. Clear out
+        * prev_vector for this and the offlined target case.
+        */
+       apicd->prev_vector = 0;
+       if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR)
+               goto setnew;
+       /*
+        * If the target CPU of the previous vector is online, then mark
+        * the vector as move in progress and store it for cleanup when the
+        * first interrupt on the new vector arrives. If the target CPU is
+        * offline then the regular release mechanism via the cleanup
+        * vector is not possible and the vector can be immediately freed
+        * in the underlying matrix allocator.
+        */
+       if (cpu_online(apicd->cpu)) {
                apicd->move_in_progress = true;
                apicd->prev_vector = apicd->vector;
                apicd->prev_cpu = apicd->cpu;
        } else {
-               apicd->prev_vector = 0;
+               irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
+                               managed);
        }
 
+setnew:
        apicd->vector = newvec;
        apicd->cpu = newcpu;
        BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
index d71c8b54b696d4593ffb15ff894468ad3e524a50..bfca937bdcc36ce8d9523f03dcc92e93d3c39d5c 100644 (file)
@@ -300,6 +300,15 @@ static void __init spectre_v2_select_mitigation(void)
                setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
                pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
        }
+
+       /*
+        * Retpoline means the kernel is safe because it has no indirect
+        * branches. But firmware isn't, so use IBRS to protect that.
+        */
+       if (boot_cpu_has(X86_FEATURE_IBRS)) {
+               setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+               pr_info("Enabling Restricted Speculation for firmware calls\n");
+       }
 }
 
 #undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
                return sprintf(buf, "Not affected\n");
 
-       return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+       return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
                       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+                      boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
                       spectre_v2_module_string());
 }
 #endif
index 824aee0117bb5402d52fb5c8958e98b0bebfdf0c..348cf48212405077bd731e25cf7a41de585ba61d 100644 (file)
@@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)
        return 0;
 }
 core_initcall(init_cpu_syscore);
+
+/*
+ * The microcode loader calls this upon late microcode load to recheck features,
+ * only when microcode has been updated. Caller holds microcode_mutex and CPU
+ * hotplug lock.
+ */
+void microcode_check(void)
+{
+       struct cpuinfo_x86 info;
+
+       perf_check_microcode();
+
+       /* Reload CPUID max function as it might've changed. */
+       info.cpuid_level = cpuid_eax(0);
+
+       /*
+        * Copy all capability leafs to pick up the synthetic ones so that
+        * memcmp() below doesn't fail on that. The ones coming from CPUID will
+        * get overwritten in get_cpu_cap().
+        */
+       memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
+
+       get_cpu_cap(&info);
+
+       if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
+               return;
+
+       pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
+       pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
+}
index d19e903214b403289aaf304eba85cc585c100c5e..c3af167d0a70c8e0220d3ae81383b8f8dee046de 100644 (file)
@@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
 /*
  * Early microcode releases for the Spectre v2 mitigation were broken.
  * Information taken from;
- * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
  * - https://kb.vmware.com/s/article/52345
  * - Microcode revisions observed in the wild
  * - Release note from 20180108 microcode release
@@ -123,7 +123,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
        { INTEL_FAM6_KABYLAKE_MOBILE,   0x09,   0x80 },
        { INTEL_FAM6_SKYLAKE_X,         0x03,   0x0100013e },
        { INTEL_FAM6_SKYLAKE_X,         0x04,   0x0200003c },
-       { INTEL_FAM6_SKYLAKE_DESKTOP,   0x03,   0xc2 },
        { INTEL_FAM6_BROADWELL_CORE,    0x04,   0x28 },
        { INTEL_FAM6_BROADWELL_GT3E,    0x01,   0x1b },
        { INTEL_FAM6_BROADWELL_XEON_D,  0x02,   0x14 },
@@ -144,6 +143,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 {
        int i;
 
+       /*
+        * We know that the hypervisor lie to us on the microcode version so
+        * we may as well hope that it is running the correct version.
+        */
+       if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+               return false;
+
        for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
                if (c->x86_model == spectre_bad_microcodes[i].model &&
                    c->x86_stepping == spectre_bad_microcodes[i].stepping)
index bdab7d2f51af4c2a32d18d891f54b5bc60f60bd4..fca759d272a1783e76d86dfa38e213d167f39ade 100644 (file)
@@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
                goto out_common_fail;
        }
        closid = ret;
+       ret = 0;
 
        rdtgrp->closid = closid;
        list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
index 8ff94d1e2dce54e87cc72c63812365d610476ec8..466f47301334ba0c8e64c17fd8fa5b7903cca44f 100644 (file)
@@ -56,6 +56,9 @@
 
 static DEFINE_MUTEX(mce_log_mutex);
 
+/* sysfs synchronization */
+static DEFINE_MUTEX(mce_sysfs_mutex);
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/mce.h>
 
@@ -130,6 +133,8 @@ void mce_setup(struct mce *m)
 
        if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
                rdmsrl(MSR_PPIN, m->ppin);
+
+       m->microcode = boot_cpu_data.microcode;
 }
 
 DEFINE_PER_CPU(struct mce, injectm);
@@ -262,7 +267,7 @@ static void __print_mce(struct mce *m)
         */
        pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
                m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
-               cpu_data(m->extcpu).microcode);
+               m->microcode);
 }
 
 static void print_mce(struct mce *m)
@@ -2086,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s,
        if (kstrtou64(buf, 0, &new) < 0)
                return -EINVAL;
 
+       mutex_lock(&mce_sysfs_mutex);
        if (mca_cfg.ignore_ce ^ !!new) {
                if (new) {
                        /* disable ce features */
@@ -2098,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s,
                        on_each_cpu(mce_enable_ce, (void *)1, 1);
                }
        }
+       mutex_unlock(&mce_sysfs_mutex);
+
        return size;
 }
 
@@ -2110,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s,
        if (kstrtou64(buf, 0, &new) < 0)
                return -EINVAL;
 
+       mutex_lock(&mce_sysfs_mutex);
        if (mca_cfg.cmci_disabled ^ !!new) {
                if (new) {
                        /* disable cmci */
@@ -2121,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s,
                        on_each_cpu(mce_enable_ce, NULL, 1);
                }
        }
+       mutex_unlock(&mce_sysfs_mutex);
+
        return size;
 }
 
@@ -2128,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s,
                                      struct device_attribute *attr,
                                      const char *buf, size_t size)
 {
-       ssize_t ret = device_store_int(s, attr, buf, size);
+       unsigned long old_check_interval = check_interval;
+       ssize_t ret = device_store_ulong(s, attr, buf, size);
+
+       if (check_interval == old_check_interval)
+               return ret;
+
+       if (check_interval < 1)
+               check_interval = 1;
+
+       mutex_lock(&mce_sysfs_mutex);
        mce_restart();
+       mutex_unlock(&mce_sysfs_mutex);
+
        return ret;
 }
 
index 330b8462d426faad0dccdc480f34eec34cd8b92f..48179928ff38cf12476ce27cd096383aee1feb93 100644 (file)
@@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
                return -EINVAL;
 
        ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
-       if (ret != UCODE_OK)
+       if (ret > UCODE_UPDATED)
                return -EINVAL;
 
        return 0;
@@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
        return patch_size;
 }
 
-static int apply_microcode_amd(int cpu)
+static enum ucode_state apply_microcode_amd(int cpu)
 {
        struct cpuinfo_x86 *c = &cpu_data(cpu);
        struct microcode_amd *mc_amd;
@@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
 
        p = find_patch(cpu);
        if (!p)
-               return 0;
+               return UCODE_NFOUND;
 
        mc_amd  = p->data;
        uci->mc = p->data;
@@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
        if (rev >= mc_amd->hdr.patch_id) {
                c->microcode = rev;
                uci->cpu_sig.rev = rev;
-               return 0;
+               return UCODE_OK;
        }
 
        if (__apply_microcode_amd(mc_amd)) {
                pr_err("CPU%d: update failed for patch_level=0x%08x\n",
                        cpu, mc_amd->hdr.patch_id);
-               return -1;
+               return UCODE_ERROR;
        }
        pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
                mc_amd->hdr.patch_id);
@@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
        uci->cpu_sig.rev = mc_amd->hdr.patch_id;
        c->microcode = mc_amd->hdr.patch_id;
 
-       return 0;
+       return UCODE_UPDATED;
 }
 
 static int install_equiv_cpu_table(const u8 *buf)
@@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
 static enum ucode_state
 load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
 {
+       struct ucode_patch *p;
        enum ucode_state ret;
 
        /* free old equiv table */
        free_equiv_cpu_table();
 
        ret = __load_microcode_amd(family, data, size);
-
-       if (ret != UCODE_OK)
+       if (ret != UCODE_OK) {
                cleanup();
+               return ret;
+       }
 
-#ifdef CONFIG_X86_32
-       /* save BSP's matching patch for early load */
-       if (save) {
-               struct ucode_patch *p = find_patch(0);
-               if (p) {
-                       memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
-                       memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
-                                                              PATCH_MAX_SIZE));
-               }
+       p = find_patch(0);
+       if (!p) {
+               return ret;
+       } else {
+               if (boot_cpu_data.microcode == p->patch_id)
+                       return ret;
+
+               ret = UCODE_NEW;
        }
-#endif
+
+       /* save BSP's matching patch for early load */
+       if (!save)
+               return ret;
+
+       memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
+       memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE));
+
        return ret;
 }
 
index 319dd65f98a25530d3a55cb6ea1cf61f6c2004bc..10c4fc2c91f8ed1dd6879c8b0b8aa24d7275cd3b 100644 (file)
 #define pr_fmt(fmt) "microcode: " fmt
 
 #include <linux/platform_device.h>
+#include <linux/stop_machine.h>
 #include <linux/syscore_ops.h>
 #include <linux/miscdevice.h>
 #include <linux/capability.h>
 #include <linux/firmware.h>
 #include <linux/kernel.h>
+#include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/cpu.h>
+#include <linux/nmi.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 
@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
  */
 static DEFINE_MUTEX(microcode_mutex);
 
+/*
+ * Serialize late loading so that CPUs get updated one-by-one.
+ */
+static DEFINE_SPINLOCK(update_lock);
+
 struct ucode_cpu_info          ucode_cpu_info[NR_CPUS];
 
 struct cpu_info_ctx {
@@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu)
        return ret;
 }
 
-struct apply_microcode_ctx {
-       int err;
-};
-
 static void apply_microcode_local(void *arg)
 {
-       struct apply_microcode_ctx *ctx = arg;
+       enum ucode_state *err = arg;
 
-       ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+       *err = microcode_ops->apply_microcode(smp_processor_id());
 }
 
 static int apply_microcode_on_target(int cpu)
 {
-       struct apply_microcode_ctx ctx = { .err = 0 };
+       enum ucode_state err;
        int ret;
 
-       ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
-       if (!ret)
-               ret = ctx.err;
-
+       ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1);
+       if (!ret) {
+               if (err == UCODE_ERROR)
+                       ret = 1;
+       }
        return ret;
 }
 
@@ -489,31 +494,124 @@ static void __exit microcode_dev_exit(void)
 /* fake device for request_firmware */
 static struct platform_device  *microcode_pdev;
 
-static int reload_for_cpu(int cpu)
+/*
+ * Late loading dance. Why the heavy-handed stomp_machine effort?
+ *
+ * - HT siblings must be idle and not execute other code while the other sibling
+ *   is loading microcode in order to avoid any negative interactions caused by
+ *   the loading.
+ *
+ * - In addition, microcode update on the cores must be serialized until this
+ *   requirement can be relaxed in the future. Right now, this is conservative
+ *   and good.
+ */
+#define SPINUNIT 100 /* 100 nsec */
+
+static int check_online_cpus(void)
 {
-       struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-       enum ucode_state ustate;
-       int err = 0;
+       if (num_online_cpus() == num_present_cpus())
+               return 0;
 
-       if (!uci->valid)
-               return err;
+       pr_err("Not all CPUs online, aborting microcode update.\n");
 
-       ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
-       if (ustate == UCODE_OK)
-               apply_microcode_on_target(cpu);
-       else
-               if (ustate == UCODE_ERROR)
-                       err = -EINVAL;
-       return err;
+       return -EINVAL;
+}
+
+static atomic_t late_cpus_in;
+static atomic_t late_cpus_out;
+
+static int __wait_for_cpus(atomic_t *t, long long timeout)
+{
+       int all_cpus = num_online_cpus();
+
+       atomic_inc(t);
+
+       while (atomic_read(t) < all_cpus) {
+               if (timeout < SPINUNIT) {
+                       pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
+                               all_cpus - atomic_read(t));
+                       return 1;
+               }
+
+               ndelay(SPINUNIT);
+               timeout -= SPINUNIT;
+
+               touch_nmi_watchdog();
+       }
+       return 0;
+}
+
+/*
+ * Returns:
+ * < 0 - on error
+ *   0 - no update done
+ *   1 - microcode was updated
+ */
+static int __reload_late(void *info)
+{
+       int cpu = smp_processor_id();
+       enum ucode_state err;
+       int ret = 0;
+
+       /*
+        * Wait for all CPUs to arrive. A load will not be attempted unless all
+        * CPUs show up.
+        * */
+       if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
+               return -1;
+
+       spin_lock(&update_lock);
+       apply_microcode_local(&err);
+       spin_unlock(&update_lock);
+
+       if (err > UCODE_NFOUND) {
+               pr_warn("Error reloading microcode on CPU %d\n", cpu);
+               return -1;
+       /* siblings return UCODE_OK because their engine got updated already */
+       } else if (err == UCODE_UPDATED || err == UCODE_OK) {
+               ret = 1;
+       } else {
+               return ret;
+       }
+
+       /*
+        * Increase the wait timeout to a safe value here since we're
+        * serializing the microcode update and that could take a while on a
+        * large number of CPUs. And that is fine as the *actual* timeout will
+        * be determined by the last CPU finished updating and thus cut short.
+        */
+       if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
+               panic("Timeout during microcode update!\n");
+
+       return ret;
+}
+
+/*
+ * Reload microcode late on all CPUs. Wait for a sec until they
+ * all gather together.
+ */
+static int microcode_reload_late(void)
+{
+       int ret;
+
+       atomic_set(&late_cpus_in,  0);
+       atomic_set(&late_cpus_out, 0);
+
+       ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+       if (ret > 0)
+               microcode_check();
+
+       return ret;
 }
 
 static ssize_t reload_store(struct device *dev,
                            struct device_attribute *attr,
                            const char *buf, size_t size)
 {
+       enum ucode_state tmp_ret = UCODE_OK;
+       int bsp = boot_cpu_data.cpu_index;
        unsigned long val;
-       int cpu;
-       ssize_t ret = 0, tmp_ret;
+       ssize_t ret = 0;
 
        ret = kstrtoul(buf, 0, &val);
        if (ret)
@@ -522,23 +620,24 @@ static ssize_t reload_store(struct device *dev,
        if (val != 1)
                return size;
 
+       tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
+       if (tmp_ret != UCODE_NEW)
+               return size;
+
        get_online_cpus();
-       mutex_lock(&microcode_mutex);
-       for_each_online_cpu(cpu) {
-               tmp_ret = reload_for_cpu(cpu);
-               if (tmp_ret != 0)
-                       pr_warn("Error reloading microcode on CPU %d\n", cpu);
 
-               /* save retval of the first encountered reload error */
-               if (!ret)
-                       ret = tmp_ret;
-       }
-       if (!ret)
-               perf_check_microcode();
+       ret = check_online_cpus();
+       if (ret)
+               goto put;
+
+       mutex_lock(&microcode_mutex);
+       ret = microcode_reload_late();
        mutex_unlock(&microcode_mutex);
+
+put:
        put_online_cpus();
 
-       if (!ret)
+       if (ret >= 0)
                ret = size;
 
        return ret;
@@ -606,10 +705,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
        if (system_state != SYSTEM_RUNNING)
                return UCODE_NFOUND;
 
-       ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
-                                                    refresh_fw);
-
-       if (ustate == UCODE_OK) {
+       ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, refresh_fw);
+       if (ustate == UCODE_NEW) {
                pr_debug("CPU%d updated upon init\n", cpu);
                apply_microcode_on_target(cpu);
        }
index a15db2b4e0d66a8b5c4d2468359eeafb85401151..32b8e5724f966abbc67153065dd17b5ddcfd6d70 100644 (file)
@@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
        if (!mc)
                return 0;
 
+       /*
+        * Save us the MSR write below - which is a particular expensive
+        * operation - when the other hyperthread has updated the microcode
+        * already.
+        */
+       rev = intel_get_microcode_revision();
+       if (rev >= mc->hdr.rev) {
+               uci->cpu_sig.rev = rev;
+               return UCODE_OK;
+       }
+
+       /*
+        * Writeback and invalidate caches before updating microcode to avoid
+        * internal issues depending on what the microcode is updating.
+        */
+       native_wbinvd();
+
        /* write microcode via MSR 0x79 */
        native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
 
@@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
        return 0;
 }
 
-static int apply_microcode_intel(int cpu)
+static enum ucode_state apply_microcode_intel(int cpu)
 {
+       struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+       struct cpuinfo_x86 *c = &cpu_data(cpu);
        struct microcode_intel *mc;
-       struct ucode_cpu_info *uci;
-       struct cpuinfo_x86 *c;
        static int prev_rev;
        u32 rev;
 
        /* We should bind the task to the CPU */
        if (WARN_ON(raw_smp_processor_id() != cpu))
-               return -1;
+               return UCODE_ERROR;
 
-       uci = ucode_cpu_info + cpu;
-       mc = uci->mc;
+       /* Look for a newer patch in our cache: */
+       mc = find_patch(uci);
        if (!mc) {
-               /* Look for a newer patch in our cache: */
-               mc = find_patch(uci);
+               mc = uci->mc;
                if (!mc)
-                       return 0;
+                       return UCODE_NFOUND;
        }
 
+       /*
+        * Save us the MSR write below - which is a particular expensive
+        * operation - when the other hyperthread has updated the microcode
+        * already.
+        */
+       rev = intel_get_microcode_revision();
+       if (rev >= mc->hdr.rev) {
+               uci->cpu_sig.rev = rev;
+               c->microcode = rev;
+               return UCODE_OK;
+       }
+
+       /*
+        * Writeback and invalidate caches before updating microcode to avoid
+        * internal issues depending on what the microcode is updating.
+        */
+       native_wbinvd();
+
        /* write microcode via MSR 0x79 */
        wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
 
@@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu)
        if (rev != mc->hdr.rev) {
                pr_err("CPU%d update to revision 0x%x failed\n",
                       cpu, mc->hdr.rev);
-               return -1;
+               return UCODE_ERROR;
        }
 
        if (rev != prev_rev) {
@@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu)
                prev_rev = rev;
        }
 
-       c = &cpu_data(cpu);
-
        uci->cpu_sig.rev = rev;
        c->microcode = rev;
 
-       return 0;
+       return UCODE_UPDATED;
 }
 
 static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
@@ -830,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
        unsigned int leftover = size;
        unsigned int curr_mc_size = 0, new_mc_size = 0;
        unsigned int csig, cpf;
+       enum ucode_state ret = UCODE_OK;
 
        while (leftover) {
                struct microcode_header_intel mc_header;
@@ -871,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
                        new_mc  = mc;
                        new_mc_size = mc_size;
                        mc = NULL;      /* trigger new vmalloc */
+                       ret = UCODE_NEW;
                }
 
                ucode_ptr += mc_size;
@@ -900,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
        pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
                 cpu, new_rev, uci->cpu_sig.rev);
 
-       return UCODE_OK;
+       return ret;
 }
 
 static int get_ucode_fw(void *to, const void *from, size_t n)
index 04a625f0fcda322dab7c9d8459a19ee56b71c936..0f545b3cf926787bd986d763c843498540ef530d 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/nops.h>
 #include "../entry/calling.h"
 #include <asm/export.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
@@ -134,6 +135,7 @@ ENTRY(secondary_startup_64)
 
        /* Ensure I am executing from virtual addresses */
        movq    $1f, %rax
+       ANNOTATE_RETPOLINE_SAFE
        jmp     *%rax
 1:
        UNWIND_HINT_EMPTY
index 2f723301eb58fc5ad0d6796b342446ae2ee0c9e6..38deafebb21b726227fb2a12a7386f49603189fe 100644 (file)
@@ -23,7 +23,7 @@
 /*
  * this changes the io permissions bitmap in the current task.
  */
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
 {
        struct thread_struct *t = &current->thread;
        struct tss_struct *tss;
index bd36f3c33cd0f96f47b61034b02c205301fe87d1..0715f827607c4a2742e140f8d9a656ed4514d226 100644 (file)
@@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler);
 
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
+       bool is_in_entry_trampoline_section = false;
+
+#ifdef CONFIG_X86_64
+       is_in_entry_trampoline_section =
+               (addr >= (unsigned long)__entry_trampoline_start &&
+                addr < (unsigned long)__entry_trampoline_end);
+#endif
        return  (addr >= (unsigned long)__kprobes_text_start &&
                 addr < (unsigned long)__kprobes_text_end) ||
                (addr >= (unsigned long)__entry_text_start &&
-                addr < (unsigned long)__entry_text_end);
+                addr < (unsigned long)__entry_text_end) ||
+               is_in_entry_trampoline_section;
 }
 
 int __init arch_init_kprobes(void)
index 4e37d1a851a62df3f9f841f3bbd66827af0c1920..bc1a27280c4bf77899afad4b85bf53212d385cab 100644 (file)
@@ -49,7 +49,7 @@
 
 static int kvmapf = 1;
 
-static int parse_no_kvmapf(char *arg)
+static int __init parse_no_kvmapf(char *arg)
 {
         kvmapf = 0;
         return 0;
@@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
 early_param("no-kvmapf", parse_no_kvmapf);
 
 static int steal_acc = 1;
-static int parse_no_stealacc(char *arg)
+static int __init parse_no_stealacc(char *arg)
 {
         steal_acc = 0;
         return 0;
@@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
 early_param("no-steal-acc", parse_no_stealacc);
 
 static int kvmclock_vsyscall = 1;
-static int parse_no_kvmclock_vsyscall(char *arg)
+static int __init parse_no_kvmclock_vsyscall(char *arg)
 {
         kvmclock_vsyscall = 0;
         return 0;
@@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
 #endif
                pa |= KVM_ASYNC_PF_ENABLED;
 
-               /* Async page fault support for L1 hypervisor is optional */
-               if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
-                       (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
-                       wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
+               if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
+                       pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+
+               wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
                __this_cpu_write(apf_reason.enabled, 1);
                printk(KERN_INFO"KVM setup async PF for cpu %d\n",
                       smp_processor_id());
@@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)
                pv_time_ops.steal_clock = kvm_steal_clock;
        }
 
-       if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
+       if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+           !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
                pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
 
        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)
 {
        int cpu;
 
-       if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
+       if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+           !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
                for_each_possible_cpu(cpu) {
                        zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
                                GFP_KERNEL, cpu_to_node(cpu));
index 1ae67e982af70b193498c8186f65ff27788acd9f..4c616be28506fe100f88092474e193521fa81f78 100644 (file)
@@ -1204,20 +1204,13 @@ void __init setup_arch(char **cmdline_p)
 
        kasan_init();
 
-#ifdef CONFIG_X86_32
-       /* sync back kernel address range */
-       clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       KERNEL_PGD_PTRS);
-
        /*
-        * sync back low identity map too.  It is used for example
-        * in the 32-bit EFI stub.
+        * Sync back kernel address range.
+        *
+        * FIXME: Can the later sync in setup_cpu_entry_areas() replace
+        * this call?
         */
-       clone_pgd_range(initial_page_table,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
+       sync_initial_page_table();
 
        tboot_probe();
 
index 497aa766fab38e21e5d1c24048e65a1e9c5b1e22..ea554f812ee18e46289bb1fc9b65cc7408189a74 100644 (file)
@@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void)
        /* Setup cpu initialized, callin, callout masks */
        setup_cpu_local_masks();
 
-#ifdef CONFIG_X86_32
        /*
         * Sync back kernel address range again.  We already did this in
         * setup_arch(), but percpu data also needs to be available in
         * the smpboot asm.  We can't reliably pick up percpu mappings
         * using vmalloc_fault(), because exception dispatch needs
         * percpu data.
+        *
+        * FIXME: Can the later sync in setup_cpu_entry_areas() replace
+        * this call?
         */
-       clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       KERNEL_PGD_PTRS);
-
-       /*
-        * sync back low identity map too.  It is used for example
-        * in the 32-bit EFI stub.
-        */
-       clone_pgd_range(initial_page_table,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
+       sync_initial_page_table();
 }
index ac057f9b076360168704438f9f808d152a2dce8d..0d930d8987cc7c88454ff96acc671222583ac1c5 100644 (file)
@@ -43,6 +43,13 @@ static inline void signal_compat_build_tests(void)
        BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));
 #define CHECK_CSI_OFFSET(name)   BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name))
 
+       BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_code)  != 8);
+
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_signo) != 0);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_errno) != 4);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_code)  != 8);
         /*
         * Ensure that the size of each si_field never changes.
         * If it does, it is a sign that the
@@ -63,36 +70,94 @@ static inline void signal_compat_build_tests(void)
        CHECK_CSI_SIZE  (_kill, 2*sizeof(int));
        CHECK_SI_SIZE   (_kill, 2*sizeof(int));
 
+       BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0xC);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+
        CHECK_CSI_OFFSET(_timer);
        CHECK_CSI_SIZE  (_timer, 3*sizeof(int));
        CHECK_SI_SIZE   (_timer, 6*sizeof(int));
 
+       BUILD_BUG_ON(offsetof(siginfo_t, si_tid)     != 0x10);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x14);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_value)   != 0x18);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_tid)     != 0x0C);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_overrun) != 0x10);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value)   != 0x14);
+
        CHECK_CSI_OFFSET(_rt);
        CHECK_CSI_SIZE  (_rt, 3*sizeof(int));
        CHECK_SI_SIZE   (_rt, 4*sizeof(int));
 
+       BUILD_BUG_ON(offsetof(siginfo_t, si_pid)   != 0x10);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_uid)   != 0x14);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid)   != 0x0C);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid)   != 0x10);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14);
+
        CHECK_CSI_OFFSET(_sigchld);
        CHECK_CSI_SIZE  (_sigchld, 5*sizeof(int));
        CHECK_SI_SIZE   (_sigchld, 8*sizeof(int));
 
+       BUILD_BUG_ON(offsetof(siginfo_t, si_pid)    != 0x10);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_uid)    != 0x14);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x18);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_utime)  != 0x20);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_stime)  != 0x28);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid)    != 0x0C);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid)    != 0x10);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_status) != 0x14);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_utime)  != 0x18);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_stime)  != 0x1C);
+
 #ifdef CONFIG_X86_X32_ABI
        CHECK_CSI_OFFSET(_sigchld_x32);
        CHECK_CSI_SIZE  (_sigchld_x32, 7*sizeof(int));
        /* no _sigchld_x32 in the generic siginfo_t */
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime)  != 0x18);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime)  != 0x20);
 #endif
 
        CHECK_CSI_OFFSET(_sigfault);
        CHECK_CSI_SIZE  (_sigfault, 4*sizeof(int));
        CHECK_SI_SIZE   (_sigfault, 8*sizeof(int));
 
+       BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C);
+
+       BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10);
+
+       BUILD_BUG_ON(offsetof(siginfo_t, si_lower) != 0x20);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_upper) != 0x28);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_lower) != 0x14);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_upper) != 0x18);
+
+       BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14);
+
        CHECK_CSI_OFFSET(_sigpoll);
        CHECK_CSI_SIZE  (_sigpoll, 2*sizeof(int));
        CHECK_SI_SIZE   (_sigpoll, 4*sizeof(int));
 
+       BUILD_BUG_ON(offsetof(siginfo_t, si_band)   != 0x10);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_fd)     != 0x18);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_band) != 0x0C);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_fd)   != 0x10);
+
        CHECK_CSI_OFFSET(_sigsys);
        CHECK_CSI_SIZE  (_sigsys, 3*sizeof(int));
        CHECK_SI_SIZE   (_sigsys, 4*sizeof(int));
 
+       BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x10);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_syscall)   != 0x18);
+       BUILD_BUG_ON(offsetof(siginfo_t, si_arch)      != 0x1C);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_call_addr) != 0x0C);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_syscall)   != 0x10);
+       BUILD_BUG_ON(offsetof(compat_siginfo_t, si_arch)      != 0x14);
+
        /* any new si_fields should be added here */
 }
 
index 9eee25d07586c6a310b16e3471efc02e501f93c7..ff99e2b6fc541a0faf8afaa17328679533b7838a 100644 (file)
@@ -1437,6 +1437,7 @@ static void remove_siblinginfo(int cpu)
        cpumask_clear(topology_sibling_cpumask(cpu));
        cpumask_clear(topology_core_cpumask(cpu));
        c->cpu_core_id = 0;
+       c->booted_cores = 0;
        cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
        recompute_smt_state();
 }
index 1f9188f5357cb38e45295c07ba5f2e902563257e..feb28fee6cea7f9fbad1a4b06ee9932ac8690d75 100644 (file)
@@ -5,7 +5,6 @@
 #include <asm/unwind.h>
 #include <asm/orc_types.h>
 #include <asm/orc_lookup.h>
-#include <asm/sections.h>
 
 #define orc_warn(fmt, ...) \
        printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
@@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
        }
 
        /* vmlinux .init slow lookup: */
-       if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
+       if (init_kernel_text(ip))
                return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
                                  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
 
index 5edb27f1a2c407ff8173161fb87cbd9130b76e8e..9d0b5af7db915c60adf23389ac47c3312dd3683a 100644 (file)
@@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
        return;
 
 check_vip:
-       if (VEFLAGS & X86_EFLAGS_VIP) {
+       if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) ==
+           (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) {
                save_v86_state(regs, VM86_STI);
                return;
        }
index 9b138a06c1a468e6a6d3fe41748abef3a436ace3..b854ebf5851b7c8fb6225b53e7d3a81b16ec43db 100644 (file)
@@ -118,9 +118,11 @@ SECTIONS
 
 #ifdef CONFIG_X86_64
                . = ALIGN(PAGE_SIZE);
+               VMLINUX_SYMBOL(__entry_trampoline_start) = .;
                _entry_trampoline = .;
                *(.entry_trampoline)
                . = ALIGN(PAGE_SIZE);
+               VMLINUX_SYMBOL(__entry_trampoline_end) = .;
                ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
 #endif
 
index a0c5a69bc7c4a324078db14ad27753443d65aa85..b671fc2d0422717f06ca6291b2a76742ee45537a 100644 (file)
@@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                             (1 << KVM_FEATURE_PV_EOI) |
                             (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
                             (1 << KVM_FEATURE_PV_UNHALT) |
-                            (1 << KVM_FEATURE_PV_TLB_FLUSH);
+                            (1 << KVM_FEATURE_PV_TLB_FLUSH) |
+                            (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
 
                if (sched_info_on())
                        entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
index 924ac8ce9d5004f9db4126a81f178c2bf0e6ff40..391dda8d43b7a44d3cedd0b41edbb51ba7171bf4 100644 (file)
@@ -2002,14 +2002,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 
 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
-       struct kvm_lapic *apic;
+       struct kvm_lapic *apic = vcpu->arch.apic;
        int i;
 
-       apic_debug("%s\n", __func__);
+       if (!apic)
+               return;
 
-       ASSERT(vcpu);
-       apic = vcpu->arch.apic;
-       ASSERT(apic != NULL);
+       apic_debug("%s\n", __func__);
 
        /* Stop the timer in case it's a reset to an active apic */
        hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2165,7 +2164,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
         */
        vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
        static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
-       kvm_lapic_reset(vcpu, false);
        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
 
        return 0;
@@ -2569,7 +2567,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 
        pe = xchg(&apic->pending_events, 0);
        if (test_bit(KVM_APIC_INIT, &pe)) {
-               kvm_lapic_reset(vcpu, true);
                kvm_vcpu_reset(vcpu, true);
                if (kvm_vcpu_is_bsp(apic->vcpu))
                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
index 46ff304140c71fad1fa818324c0cda017257d2a5..763bb3bade63f38df3f3f92cc2727499a3ed160f 100644 (file)
@@ -2770,8 +2770,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
        else
                pte_access &= ~ACC_WRITE_MASK;
 
+       if (!kvm_is_mmio_pfn(pfn))
+               spte |= shadow_me_mask;
+
        spte |= (u64)pfn << PAGE_SHIFT;
-       spte |= shadow_me_mask;
 
        if (pte_access & ACC_WRITE_MASK) {
 
@@ -3029,7 +3031,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
                return RET_PF_RETRY;
        }
 
-       return -EFAULT;
+       return RET_PF_EMULATE;
 }
 
 static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
index b3e488a748281aa5f3d319861ae2ab4bfca3e65c..be9c839e2c89967d689485dbb8e51763980dd151 100644 (file)
@@ -49,6 +49,7 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
+#include <asm/microcode.h>
 #include <asm/nospec-branch.h>
 
 #include <asm/virtext.h>
@@ -178,6 +179,8 @@ struct vcpu_svm {
        uint64_t sysenter_eip;
        uint64_t tsc_aux;
 
+       u64 msr_decfg;
+
        u64 next_rip;
 
        u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
@@ -300,6 +303,8 @@ module_param(vgif, int, 0444);
 static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
 module_param(sev, int, 0444);
 
+static u8 rsm_ins_bytes[] = "\x0f\xaa";
+
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -1383,6 +1388,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        set_intercept(svm, INTERCEPT_SKINIT);
        set_intercept(svm, INTERCEPT_WBINVD);
        set_intercept(svm, INTERCEPT_XSETBV);
+       set_intercept(svm, INTERCEPT_RSM);
 
        if (!kvm_mwait_in_guest()) {
                set_intercept(svm, INTERCEPT_MONITOR);
@@ -1902,6 +1908,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        u32 dummy;
        u32 eax = 1;
 
+       vcpu->arch.microcode_version = 0x01000065;
        svm->spec_ctrl = 0;
 
        if (!init_event) {
@@ -3699,6 +3706,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
        return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 }
 
+static int rsm_interception(struct vcpu_svm *svm)
+{
+       return x86_emulate_instruction(&svm->vcpu, 0, 0,
+                                      rsm_ins_bytes, 2) == EMULATE_DONE;
+}
+
 static int rdpmc_interception(struct vcpu_svm *svm)
 {
        int err;
@@ -3860,6 +3873,22 @@ static int cr8_write_interception(struct vcpu_svm *svm)
        return 0;
 }
 
+static int svm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+       msr->data = 0;
+
+       switch (msr->index) {
+       case MSR_F10H_DECFG:
+               if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
+                       msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
+               break;
+       default:
+               return 1;
+       }
+
+       return 0;
+}
+
 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -3935,9 +3964,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
                msr_info->data = svm->spec_ctrl;
                break;
-       case MSR_IA32_UCODE_REV:
-               msr_info->data = 0x01000065;
-               break;
        case MSR_F15H_IC_CFG: {
 
                int family, model;
@@ -3955,6 +3981,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        msr_info->data = 0x1E;
                }
                break;
+       case MSR_F10H_DECFG:
+               msr_info->data = svm->msr_decfg;
+               break;
        default:
                return kvm_get_msr_common(vcpu, msr_info);
        }
@@ -4133,6 +4162,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
        case MSR_VM_IGNNE:
                vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
                break;
+       case MSR_F10H_DECFG: {
+               struct kvm_msr_entry msr_entry;
+
+               msr_entry.index = msr->index;
+               if (svm_get_msr_feature(&msr_entry))
+                       return 1;
+
+               /* Check the supported bits */
+               if (data & ~msr_entry.data)
+                       return 1;
+
+               /* Don't allow the guest to change a bit, #GP */
+               if (!msr->host_initiated && (data ^ msr_entry.data))
+                       return 1;
+
+               svm->msr_decfg = data;
+               break;
+       }
        case MSR_IA32_APICBASE:
                if (kvm_vcpu_apicv_active(vcpu))
                        avic_update_vapic_bar(to_svm(vcpu), data);
@@ -4541,7 +4588,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
        [SVM_EXIT_NPF]                          = npf_interception,
-       [SVM_EXIT_RSM]                          = emulate_on_interception,
+       [SVM_EXIT_RSM]                          = rsm_interception,
        [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
        [SVM_EXIT_AVIC_UNACCELERATED_ACCESS]    = avic_unaccelerated_access_interception,
 };
@@ -5355,7 +5402,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
         * being speculatively taken.
         */
        if (svm->spec_ctrl)
-               wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
 
        asm volatile (
                "push %%" _ASM_BP "; \n\t"
@@ -5464,11 +5511,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
         * If the L02 MSR bitmap does not intercept the MSR, then we need to
         * save it.
         */
-       if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-               rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+       if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+               svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
        if (svm->spec_ctrl)
-               wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
        /* Eliminate branch target predictions from guest mode */
        vmexit_fill_RSB();
@@ -6236,16 +6283,18 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
 
 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
+       void __user *measure = (void __user *)(uintptr_t)argp->data;
        struct kvm_sev_info *sev = &kvm->arch.sev_info;
        struct sev_data_launch_measure *data;
        struct kvm_sev_launch_measure params;
+       void __user *p = NULL;
        void *blob = NULL;
        int ret;
 
        if (!sev_guest(kvm))
                return -ENOTTY;
 
-       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+       if (copy_from_user(&params, measure, sizeof(params)))
                return -EFAULT;
 
        data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -6256,17 +6305,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (!params.len)
                goto cmd;
 
-       if (params.uaddr) {
+       p = (void __user *)(uintptr_t)params.uaddr;
+       if (p) {
                if (params.len > SEV_FW_BLOB_MAX_SIZE) {
                        ret = -EINVAL;
                        goto e_free;
                }
 
-               if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
-                       ret = -EFAULT;
-                       goto e_free;
-               }
-
                ret = -ENOMEM;
                blob = kmalloc(params.len, GFP_KERNEL);
                if (!blob)
@@ -6290,13 +6335,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
                goto e_free_blob;
 
        if (blob) {
-               if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+               if (copy_to_user(p, blob, params.len))
                        ret = -EFAULT;
        }
 
 done:
        params.len = data->len;
-       if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+       if (copy_to_user(measure, &params, sizeof(params)))
                ret = -EFAULT;
 e_free_blob:
        kfree(blob);
@@ -6597,7 +6642,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
        struct page **pages;
        void *blob, *hdr;
        unsigned long n;
-       int ret;
+       int ret, offset;
 
        if (!sev_guest(kvm))
                return -ENOTTY;
@@ -6623,6 +6668,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (!data)
                goto e_unpin_memory;
 
+       offset = params.guest_uaddr & (PAGE_SIZE - 1);
+       data->guest_address = __sme_page_pa(pages[0]) + offset;
+       data->guest_len = params.guest_len;
+
        blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
        if (IS_ERR(blob)) {
                ret = PTR_ERR(blob);
@@ -6637,8 +6686,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
                ret = PTR_ERR(hdr);
                goto e_free_blob;
        }
-       data->trans_address = __psp_pa(blob);
-       data->trans_len = params.trans_len;
+       data->hdr_address = __psp_pa(hdr);
+       data->hdr_len = params.hdr_len;
 
        data->handle = sev->handle;
        ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
@@ -6821,6 +6870,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
        .vcpu_unblocking = svm_vcpu_unblocking,
 
        .update_bp_intercept = update_bp_intercept,
+       .get_msr_feature = svm_get_msr_feature,
        .get_msr = svm_get_msr,
        .set_msr = svm_set_msr,
        .get_segment_base = svm_get_segment_base,
index 3dec126aa3022eb11f49d5de695d3658183a48ee..2d87603f91795b29c5ea0f84703d316a49c0573d 100644 (file)
@@ -51,6 +51,7 @@
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
+#include <asm/microcode.h>
 #include <asm/nospec-branch.h>
 
 #include "trace.h"
@@ -1044,6 +1045,13 @@ static inline bool is_machine_check(u32 intr_info)
                (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
 }
 
+/* Undocumented: icebp/int1 */
+static inline bool is_icebp(u32 intr_info)
+{
+       return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
+               == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
+}
+
 static inline bool cpu_has_vmx_msr_bitmap(void)
 {
        return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
@@ -3226,6 +3234,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
        return !(val & ~valid_bits);
 }
 
+static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
+{
+       return 1;
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -4485,7 +4498,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
                              SECONDARY_EXEC_DESC);
                hw_cr4 &= ~X86_CR4_UMIP;
-       } else
+       } else if (!is_guest_mode(vcpu) ||
+                  !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
                vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
                                SECONDARY_EXEC_DESC);
 
@@ -5765,6 +5779,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        vmx->rmode.vm86_active = 0;
        vmx->spec_ctrl = 0;
 
+       vcpu->arch.microcode_version = 0x100000000ULL;
        vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
        kvm_set_cr8(vcpu, 0);
 
@@ -6171,7 +6186,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
                        vcpu->arch.dr6 &= ~15;
                        vcpu->arch.dr6 |= dr6 | DR6_RTM;
-                       if (!(dr6 & ~DR6_RESERVED)) /* icebp */
+                       if (is_icebp(intr_info))
                                skip_emulated_instruction(vcpu);
 
                        kvm_queue_exception(vcpu, DB_VECTOR);
@@ -9452,7 +9467,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
         * being speculatively taken.
         */
        if (vmx->spec_ctrl)
-               wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 
        vmx->__launched = vmx->loaded_vmcs->launched;
        asm(
@@ -9587,11 +9602,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
         * If the L02 MSR bitmap does not intercept the MSR, then we need to
         * save it.
         */
-       if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-               rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+       if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+               vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
        if (vmx->spec_ctrl)
-               wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
        /* Eliminate branch target predictions from guest mode */
        vmexit_fill_RSB();
@@ -11199,7 +11214,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        if (ret)
                return ret;
 
-       if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
+       /*
+        * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
+        * by event injection, halt vcpu.
+        */
+       if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
+           !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
                return kvm_vcpu_halt(vcpu);
 
        vmx->nested.nested_run_pending = 1;
@@ -12290,6 +12310,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
        .vcpu_put = vmx_vcpu_put,
 
        .update_bp_intercept = update_exception_bitmap,
+       .get_msr_feature = vmx_get_msr_feature,
        .get_msr = vmx_get_msr,
        .set_msr = vmx_set_msr,
        .get_segment_base = vmx_get_segment_base,
index c8a0b545ac20c71a464738a1dd0cd7e1c3df388e..18b5ca7a31974757f0dfdd43f5d7e0805f4f3aaa 100644 (file)
@@ -1049,6 +1049,45 @@ static u32 emulated_msrs[] = {
 
 static unsigned num_emulated_msrs;
 
+/*
+ * List of msr numbers which are used to expose MSR-based features that
+ * can be used by a hypervisor to validate requested CPU features.
+ */
+static u32 msr_based_features[] = {
+       MSR_F10H_DECFG,
+       MSR_IA32_UCODE_REV,
+};
+
+static unsigned int num_msr_based_features;
+
+static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+       switch (msr->index) {
+       case MSR_IA32_UCODE_REV:
+               rdmsrl(msr->index, msr->data);
+               break;
+       default:
+               if (kvm_x86_ops->get_msr_feature(msr))
+                       return 1;
+       }
+       return 0;
+}
+
+static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+       struct kvm_msr_entry msr;
+       int r;
+
+       msr.index = index;
+       r = kvm_get_msr_feature(&msr);
+       if (r)
+               return r;
+
+       *data = msr.data;
+
+       return 0;
+}
+
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
        if (efer & efer_reserved_bits)
@@ -2222,7 +2261,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
        switch (msr) {
        case MSR_AMD64_NB_CFG:
-       case MSR_IA32_UCODE_REV:
        case MSR_IA32_UCODE_WRITE:
        case MSR_VM_HSAVE_PA:
        case MSR_AMD64_PATCH_LOADER:
@@ -2230,6 +2268,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_AMD64_DC_CFG:
                break;
 
+       case MSR_IA32_UCODE_REV:
+               if (msr_info->host_initiated)
+                       vcpu->arch.microcode_version = data;
+               break;
        case MSR_EFER:
                return set_efer(vcpu, data);
        case MSR_K7_HWCR:
@@ -2525,7 +2567,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                msr_info->data = 0;
                break;
        case MSR_IA32_UCODE_REV:
-               msr_info->data = 0x100000000ULL;
+               msr_info->data = vcpu->arch.microcode_version;
                break;
        case MSR_MTRRcap:
        case 0x200 ... 0x2ff:
@@ -2680,13 +2722,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
                    int (*do_msr)(struct kvm_vcpu *vcpu,
                                  unsigned index, u64 *data))
 {
-       int i, idx;
+       int i;
 
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
        for (i = 0; i < msrs->nmsrs; ++i)
                if (do_msr(vcpu, entries[i].index, &entries[i].data))
                        break;
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
        return i;
 }
@@ -2785,6 +2825,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_SET_BOOT_CPU_ID:
        case KVM_CAP_SPLIT_IRQCHIP:
        case KVM_CAP_IMMEDIATE_EXIT:
+       case KVM_CAP_GET_MSR_FEATURES:
                r = 1;
                break;
        case KVM_CAP_ADJUST_CLOCK:
@@ -2899,6 +2940,31 @@ long kvm_arch_dev_ioctl(struct file *filp,
                        goto out;
                r = 0;
                break;
+       case KVM_GET_MSR_FEATURE_INDEX_LIST: {
+               struct kvm_msr_list __user *user_msr_list = argp;
+               struct kvm_msr_list msr_list;
+               unsigned int n;
+
+               r = -EFAULT;
+               if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
+                       goto out;
+               n = msr_list.nmsrs;
+               msr_list.nmsrs = num_msr_based_features;
+               if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
+                       goto out;
+               r = -E2BIG;
+               if (n < msr_list.nmsrs)
+                       goto out;
+               r = -EFAULT;
+               if (copy_to_user(user_msr_list->indices, &msr_based_features,
+                                num_msr_based_features * sizeof(u32)))
+                       goto out;
+               r = 0;
+               break;
+       }
+       case KVM_GET_MSRS:
+               r = msr_io(NULL, argp, do_get_msr_feature, 1);
+               break;
        }
        default:
                r = -EINVAL;
@@ -3636,12 +3702,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = 0;
                break;
        }
-       case KVM_GET_MSRS:
+       case KVM_GET_MSRS: {
+               int idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = msr_io(vcpu, argp, do_get_msr, 1);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
-       case KVM_SET_MSRS:
+       }
+       case KVM_SET_MSRS: {
+               int idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = msr_io(vcpu, argp, do_set_msr, 0);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
+       }
        case KVM_TPR_ACCESS_REPORTING: {
                struct kvm_tpr_access_ctl tac;
 
@@ -4464,6 +4536,19 @@ static void kvm_init_msr_list(void)
                j++;
        }
        num_emulated_msrs = j;
+
+       for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+               struct kvm_msr_entry msr;
+
+               msr.index = msr_based_features[i];
+               if (kvm_get_msr_feature(&msr))
+                       continue;
+
+               if (j < i)
+                       msr_based_features[j] = msr_based_features[i];
+               j++;
+       }
+       num_msr_based_features = j;
 }
 
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -8017,6 +8102,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
+       kvm_lapic_reset(vcpu, init_event);
+
        vcpu->arch.hflags = 0;
 
        vcpu->arch.smi_pending = 0;
@@ -8460,10 +8547,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
                        return r;
        }
 
-       if (!size) {
-               r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
-               WARN_ON(r < 0);
-       }
+       if (!size)
+               vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
 
        return 0;
 }
index 91e9700cc6dcd2152f7488740ad3f9a6e1b766c4..25a972c61b0ae9816a817eb9681f4cd374e9e32a 100644 (file)
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 lib-$(CONFIG_RETPOLINE) += retpoline.o
-OBJECT_FILES_NON_STANDARD_retpoline.o :=y
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
index 480edc3a5e03002dd6f0a0316477cbd7b0971cc8..c909961e678a594bd3812cb14936bdf035af2bb9 100644 (file)
@@ -7,7 +7,6 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
-#include <asm/bitsperlong.h>
 
 .macro THUNK reg
        .section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
 GENERATE_THUNK(r14)
 GENERATE_THUNK(r15)
 #endif
-
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version - two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-.macro STUFF_RSB nr:req sp:req
-       mov     $(\nr / 2), %_ASM_BX
-       .align 16
-771:
-       call    772f
-773:                                           /* speculation trap */
-       pause
-       lfence
-       jmp     773b
-       .align 16
-772:
-       call    774f
-775:                                           /* speculation trap */
-       pause
-       lfence
-       jmp     775b
-       .align 16
-774:
-       dec     %_ASM_BX
-       jnz     771b
-       add     $((BITS_PER_LONG/8) * \nr), \sp
-.endm
-
-#define RSB_FILL_LOOPS         16      /* To avoid underflow */
-
-ENTRY(__fill_rsb)
-       STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
-       ret
-END(__fill_rsb)
-EXPORT_SYMBOL_GPL(__fill_rsb)
-
-#define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all entries */
-
-ENTRY(__clear_rsb)
-       STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
-       ret
-END(__clear_rsb)
-EXPORT_SYMBOL_GPL(__clear_rsb)
index b9283cc276220db667ab091a3358eb5741813f7f..476d810639a87a5ba5a24528b4de5dd34b6e59f6 100644 (file)
@@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void)
 
        for_each_possible_cpu(cpu)
                setup_cpu_entry_area(cpu);
+
+       /*
+        * This is the last essential update to swapper_pgdir which needs
+        * to be synchronized to initial_page_table on 32bit.
+        */
+       sync_initial_page_table();
 }
index 800de815519cd1061c090e70fab09bd62bbd8ddd..25a30b5d6582f2cd7937ed1beb51de989db24280 100644 (file)
@@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsigned long address)
        if (!pmd_k)
                return -1;
 
-       if (pmd_huge(*pmd_k))
+       if (pmd_large(*pmd_k))
                return 0;
 
        pte_k = pte_offset_kernel(pmd_k, address);
@@ -475,7 +475,7 @@ static noinline int vmalloc_fault(unsigned long address)
        if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
                BUG();
 
-       if (pud_huge(*pud))
+       if (pud_large(*pud))
                return 0;
 
        pmd = pmd_offset(pud, address);
@@ -486,7 +486,7 @@ static noinline int vmalloc_fault(unsigned long address)
        if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
                BUG();
 
-       if (pmd_huge(*pmd))
+       if (pmd_large(*pmd))
                return 0;
 
        pte_ref = pte_offset_kernel(pmd_ref, address);
@@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
        tsk = current;
        mm = tsk->mm;
 
-       /*
-        * Detect and handle instructions that would cause a page fault for
-        * both a tracked kernel page and a userspace page.
-        */
        prefetchw(&mm->mmap_sem);
 
        if (unlikely(kmmio_fault(regs, address)))
index 79cb066f40c0d4a4607a7b7aa4e5524a203b5e10..396e1f0151ac1973de4339a8946653f95aecb66d 100644 (file)
@@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
 }
 #endif /* CONFIG_HIGHMEM */
 
+void __init sync_initial_page_table(void)
+{
+       clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+                       KERNEL_PGD_PTRS);
+
+       /*
+        * sync back low identity map too.  It is used for example
+        * in the 32-bit EFI stub.
+        */
+       clone_pgd_range(initial_page_table,
+                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+                       min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+}
+
 void __init native_pagetable_init(void)
 {
        unsigned long pfn, va;
index 8b72923f1d35c07c5ded42ae36873790da02d247..af11a2890235584a5f07cfe7f83a00ea71fc47f9 100644 (file)
@@ -800,17 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
 
 #define PAGE_INUSE 0xFD
 
-static void __meminit free_pagetable(struct page *page, int order,
-               struct vmem_altmap *altmap)
+static void __meminit free_pagetable(struct page *page, int order)
 {
        unsigned long magic;
        unsigned int nr_pages = 1 << order;
 
-       if (altmap) {
-               vmem_altmap_free(altmap, nr_pages);
-               return;
-       }
-
        /* bootmem page has reserved flag */
        if (PageReserved(page)) {
                __ClearPageReserved(page);
@@ -826,8 +820,16 @@ static void __meminit free_pagetable(struct page *page, int order,
                free_pages((unsigned long)page_address(page), order);
 }
 
-static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
+static void __meminit free_hugepage_table(struct page *page,
                struct vmem_altmap *altmap)
+{
+       if (altmap)
+               vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE);
+       else
+               free_pagetable(page, get_order(PMD_SIZE));
+}
+
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
 {
        pte_t *pte;
        int i;
@@ -839,14 +841,13 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
        }
 
        /* free a pte talbe */
-       free_pagetable(pmd_page(*pmd), 0, altmap);
+       free_pagetable(pmd_page(*pmd), 0);
        spin_lock(&init_mm.page_table_lock);
        pmd_clear(pmd);
        spin_unlock(&init_mm.page_table_lock);
 }
 
-static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
-               struct vmem_altmap *altmap)
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 {
        pmd_t *pmd;
        int i;
@@ -858,14 +859,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
        }
 
        /* free a pmd talbe */
-       free_pagetable(pud_page(*pud), 0, altmap);
+       free_pagetable(pud_page(*pud), 0);
        spin_lock(&init_mm.page_table_lock);
        pud_clear(pud);
        spin_unlock(&init_mm.page_table_lock);
 }
 
-static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
-               struct vmem_altmap *altmap)
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
 {
        pud_t *pud;
        int i;
@@ -877,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
        }
 
        /* free a pud talbe */
-       free_pagetable(p4d_page(*p4d), 0, altmap);
+       free_pagetable(p4d_page(*p4d), 0);
        spin_lock(&init_mm.page_table_lock);
        p4d_clear(p4d);
        spin_unlock(&init_mm.page_table_lock);
@@ -885,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
 
 static void __meminit
 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
-                struct vmem_altmap *altmap, bool direct)
+                bool direct)
 {
        unsigned long next, pages = 0;
        pte_t *pte;
@@ -916,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
                         * freed when offlining, or simplely not in use.
                         */
                        if (!direct)
-                               free_pagetable(pte_page(*pte), 0, altmap);
+                               free_pagetable(pte_page(*pte), 0);
 
                        spin_lock(&init_mm.page_table_lock);
                        pte_clear(&init_mm, addr, pte);
@@ -939,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 
                        page_addr = page_address(pte_page(*pte));
                        if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
-                               free_pagetable(pte_page(*pte), 0, altmap);
+                               free_pagetable(pte_page(*pte), 0);
 
                                spin_lock(&init_mm.page_table_lock);
                                pte_clear(&init_mm, addr, pte);
@@ -974,9 +974,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
                        if (IS_ALIGNED(addr, PMD_SIZE) &&
                            IS_ALIGNED(next, PMD_SIZE)) {
                                if (!direct)
-                                       free_pagetable(pmd_page(*pmd),
-                                                      get_order(PMD_SIZE),
-                                                      altmap);
+                                       free_hugepage_table(pmd_page(*pmd),
+                                                           altmap);
 
                                spin_lock(&init_mm.page_table_lock);
                                pmd_clear(pmd);
@@ -989,9 +988,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
                                page_addr = page_address(pmd_page(*pmd));
                                if (!memchr_inv(page_addr, PAGE_INUSE,
                                                PMD_SIZE)) {
-                                       free_pagetable(pmd_page(*pmd),
-                                                      get_order(PMD_SIZE),
-                                                      altmap);
+                                       free_hugepage_table(pmd_page(*pmd),
+                                                           altmap);
 
                                        spin_lock(&init_mm.page_table_lock);
                                        pmd_clear(pmd);
@@ -1003,8 +1001,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
                }
 
                pte_base = (pte_t *)pmd_page_vaddr(*pmd);
-               remove_pte_table(pte_base, addr, next, altmap, direct);
-               free_pte_table(pte_base, pmd, altmap);
+               remove_pte_table(pte_base, addr, next, direct);
+               free_pte_table(pte_base, pmd);
        }
 
        /* Call free_pmd_table() in remove_pud_table(). */
@@ -1033,8 +1031,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
                            IS_ALIGNED(next, PUD_SIZE)) {
                                if (!direct)
                                        free_pagetable(pud_page(*pud),
-                                                      get_order(PUD_SIZE),
-                                                      altmap);
+                                                      get_order(PUD_SIZE));
 
                                spin_lock(&init_mm.page_table_lock);
                                pud_clear(pud);
@@ -1048,8 +1045,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
                                if (!memchr_inv(page_addr, PAGE_INUSE,
                                                PUD_SIZE)) {
                                        free_pagetable(pud_page(*pud),
-                                                      get_order(PUD_SIZE),
-                                                      altmap);
+                                                      get_order(PUD_SIZE));
 
                                        spin_lock(&init_mm.page_table_lock);
                                        pud_clear(pud);
@@ -1062,7 +1058,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
 
                pmd_base = pmd_offset(pud, 0);
                remove_pmd_table(pmd_base, addr, next, direct, altmap);
-               free_pmd_table(pmd_base, pud, altmap);
+               free_pmd_table(pmd_base, pud);
        }
 
        if (direct)
@@ -1094,7 +1090,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
                 * to adapt for boot-time switching between 4 and 5 level page tables.
                 */
                if (CONFIG_PGTABLE_LEVELS == 5)
-                       free_pud_table(pud_base, p4d, altmap);
+                       free_pud_table(pud_base, p4d);
        }
 
        if (direct)
index 01f682cf77a8b36bff9a2b45a8f8beb734b4b6c4..40a6085063d6fe87958f260bf9cbc1be3b647424 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/page.h>
 #include <asm/processor-flags.h>
 #include <asm/msr-index.h>
+#include <asm/nospec-branch.h>
 
        .text
        .code64
@@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
        movq    %rax, %r8               /* Workarea encryption routine */
        addq    $PAGE_SIZE, %r8         /* Workarea intermediate copy buffer */
 
+       ANNOTATE_RETPOLINE_SAFE
        call    *%rax                   /* Call the encryption routine */
 
        pop     %r12
index 004abf9ebf1222c169448090f7f1c570635bce41..34cda7e0551b4a8809bb4a1a9fedef22ce4a4e28 100644 (file)
@@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd)
 
        return 0;
 }
+
+/**
+ * pud_free_pmd_page - Clear pud entry and free pmd page.
+ * @pud: Pointer to a PUD.
+ *
+ * Context: The pud range has been unmaped and TLB purged.
+ * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ */
+int pud_free_pmd_page(pud_t *pud)
+{
+       pmd_t *pmd;
+       int i;
+
+       if (pud_none(*pud))
+               return 1;
+
+       pmd = (pmd_t *)pud_page_vaddr(*pud);
+
+       for (i = 0; i < PTRS_PER_PMD; i++)
+               if (!pmd_free_pte_page(&pmd[i]))
+                       return 0;
+
+       pud_clear(pud);
+       free_page((unsigned long)pmd);
+
+       return 1;
+}
+
+/**
+ * pmd_free_pte_page - Clear pmd entry and free pte page.
+ * @pmd: Pointer to a PMD.
+ *
+ * Context: The pmd range has been unmaped and TLB purged.
+ * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ */
+int pmd_free_pte_page(pmd_t *pmd)
+{
+       pte_t *pte;
+
+       if (pmd_none(*pmd))
+               return 1;
+
+       pte = (pte_t *)pmd_page_vaddr(*pmd);
+       pmd_clear(pmd);
+       free_page((unsigned long)pte);
+
+       return 1;
+}
 #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
index ce38f165489b5a13d92091c8671879f30ce44e20..631507f0c1980cc03367c161b7164a16c40de496 100644 (file)
@@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void)
 }
 
 /*
- * Clone the ESPFIX P4D into the user space visinble page table
+ * Clone the ESPFIX P4D into the user space visible page table
  */
 static void __init pti_setup_espfix64(void)
 {
index 45e4eb5bcbb2ab4894b12d6a948b19eb25250af4..b725154182cc331e2fdd51c5118a9623f4b4d6d2 100644 (file)
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
-#include <asm/cacheflush.h>
+#include <linux/bpf.h>
+
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
-#include <linux/bpf.h>
 
 /*
  * assembly code in arch/x86/net/bpf_jit.S
@@ -61,7 +61,12 @@ static bool is_imm8(int value)
 
 static bool is_simm32(s64 value)
 {
-       return value == (s64) (s32) value;
+       return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+       return value == (u64)(u32)value;
 }
 
 /* mov dst, src */
@@ -98,16 +103,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 #define X86_JLE 0x7E
 #define X86_JG  0x7F
 
-static void bpf_flush_icache(void *start, void *end)
-{
-       mm_segment_t old_fs = get_fs();
-
-       set_fs(KERNEL_DS);
-       smp_wmb();
-       flush_icache_range((unsigned long)start, (unsigned long)end);
-       set_fs(old_fs);
-}
-
 #define CHOOSE_LOAD_FUNC(K, func) \
        ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
@@ -212,7 +207,7 @@ struct jit_context {
 /* emit x64 prologue code for BPF program and check it's size.
  * bpf_tail_call helper will skip it while jumping into another program
  */
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 {
        u8 *prog = *pprog;
        int cnt = 0;
@@ -247,18 +242,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
        /* mov qword ptr [rbp+24],r15 */
        EMIT4(0x4C, 0x89, 0x7D, 24);
 
-       /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
-        * we need to reset the counter to 0. It's done in two instructions,
-        * resetting rax register to 0 (xor on eax gets 0 extended), and
-        * moving it to the counter location.
-        */
+       if (!ebpf_from_cbpf) {
+               /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+                * calls we need to reset the counter to 0. It's done in two
+                * instructions, resetting rax register to 0, and moving it
+                * to the counter location.
+                */
 
-       /* xor eax, eax */
-       EMIT2(0x31, 0xc0);
-       /* mov qword ptr [rbp+32], rax */
-       EMIT4(0x48, 0x89, 0x45, 32);
+               /* xor eax, eax */
+               EMIT2(0x31, 0xc0);
+               /* mov qword ptr [rbp+32], rax */
+               EMIT4(0x48, 0x89, 0x45, 32);
+
+               BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+       }
 
-       BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
        *pprog = prog;
 }
 
@@ -356,6 +354,86 @@ static void emit_load_skb_data_hlen(u8 **pprog)
        *pprog = prog;
 }
 
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+                          u32 dst_reg, const u32 imm32)
+{
+       u8 *prog = *pprog;
+       u8 b1, b2, b3;
+       int cnt = 0;
+
+       /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+        * (which zero-extends imm32) to save 2 bytes.
+        */
+       if (sign_propagate && (s32)imm32 < 0) {
+               /* 'mov %rax, imm32' sign extends imm32 */
+               b1 = add_1mod(0x48, dst_reg);
+               b2 = 0xC7;
+               b3 = 0xC0;
+               EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+               goto done;
+       }
+
+       /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+        * to save 3 bytes.
+        */
+       if (imm32 == 0) {
+               if (is_ereg(dst_reg))
+                       EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+               b2 = 0x31; /* xor */
+               b3 = 0xC0;
+               EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+               goto done;
+       }
+
+       /* mov %eax, imm32 */
+       if (is_ereg(dst_reg))
+               EMIT1(add_1mod(0x40, dst_reg));
+       EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+       *pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+                          const u32 imm32_hi, const u32 imm32_lo)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+               /* For emitting plain u32, where sign bit must not be
+                * propagated LLVM tends to load imm64 over mov32
+                * directly, so save couple of bytes by just doing
+                * 'mov %eax, imm32' instead.
+                */
+               emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+       } else {
+               /* movabsq %rax, imm64 */
+               EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+               EMIT(imm32_lo, 4);
+               EMIT(imm32_hi, 4);
+       }
+
+       *pprog = prog;
+}
+
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       if (is64) {
+               /* mov dst, src */
+               EMIT_mov(dst_reg, src_reg);
+       } else {
+               /* mov32 dst, src */
+               if (is_ereg(dst_reg) || is_ereg(src_reg))
+                       EMIT1(add_2mod(0x40, dst_reg, src_reg));
+               EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+       }
+
+       *pprog = prog;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                  int oldproglen, struct jit_context *ctx)
 {
@@ -369,7 +447,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
        int proglen = 0;
        u8 *prog = temp;
 
-       emit_prologue(&prog, bpf_prog->aux->stack_depth);
+       emit_prologue(&prog, bpf_prog->aux->stack_depth,
+                     bpf_prog_was_classic(bpf_prog));
 
        if (seen_ld_abs)
                emit_load_skb_data_hlen(&prog);
@@ -378,7 +457,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                const s32 imm32 = insn->imm;
                u32 dst_reg = insn->dst_reg;
                u32 src_reg = insn->src_reg;
-               u8 b1 = 0, b2 = 0, b3 = 0;
+               u8 b2 = 0, b3 = 0;
                s64 jmp_offset;
                u8 jmp_cond;
                bool reload_skb_data;
@@ -414,16 +493,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
                        break;
 
-                       /* mov dst, src */
                case BPF_ALU64 | BPF_MOV | BPF_X:
-                       EMIT_mov(dst_reg, src_reg);
-                       break;
-
-                       /* mov32 dst, src */
                case BPF_ALU | BPF_MOV | BPF_X:
-                       if (is_ereg(dst_reg) || is_ereg(src_reg))
-                               EMIT1(add_2mod(0x40, dst_reg, src_reg));
-                       EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+                       emit_mov_reg(&prog,
+                                    BPF_CLASS(insn->code) == BPF_ALU64,
+                                    dst_reg, src_reg);
                        break;
 
                        /* neg dst */
@@ -486,58 +560,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        break;
 
                case BPF_ALU64 | BPF_MOV | BPF_K:
-                       /* optimization: if imm32 is positive,
-                        * use 'mov eax, imm32' (which zero-extends imm32)
-                        * to save 2 bytes
-                        */
-                       if (imm32 < 0) {
-                               /* 'mov rax, imm32' sign extends imm32 */
-                               b1 = add_1mod(0x48, dst_reg);
-                               b2 = 0xC7;
-                               b3 = 0xC0;
-                               EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
-                               break;
-                       }
-
                case BPF_ALU | BPF_MOV | BPF_K:
-                       /* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
-                        * to save 3 bytes.
-                        */
-                       if (imm32 == 0) {
-                               if (is_ereg(dst_reg))
-                                       EMIT1(add_2mod(0x40, dst_reg, dst_reg));
-                               b2 = 0x31; /* xor */
-                               b3 = 0xC0;
-                               EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
-                               break;
-                       }
-
-                       /* mov %eax, imm32 */
-                       if (is_ereg(dst_reg))
-                               EMIT1(add_1mod(0x40, dst_reg));
-                       EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+                       emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+                                      dst_reg, imm32);
                        break;
 
                case BPF_LD | BPF_IMM | BPF_DW:
-                       /* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
-                        * to save 7 bytes.
-                        */
-                       if (insn[0].imm == 0 && insn[1].imm == 0) {
-                               b1 = add_2mod(0x48, dst_reg, dst_reg);
-                               b2 = 0x31; /* xor */
-                               b3 = 0xC0;
-                               EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
-                               insn++;
-                               i++;
-                               break;
-                       }
-
-                       /* movabsq %rax, imm64 */
-                       EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
-                       EMIT(insn[0].imm, 4);
-                       EMIT(insn[1].imm, 4);
-
+                       emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
                        insn++;
                        i++;
                        break;
@@ -594,36 +623,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                case BPF_ALU | BPF_MUL | BPF_X:
                case BPF_ALU64 | BPF_MUL | BPF_K:
                case BPF_ALU64 | BPF_MUL | BPF_X:
-                       EMIT1(0x50); /* push rax */
-                       EMIT1(0x52); /* push rdx */
+               {
+                       bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+                       if (dst_reg != BPF_REG_0)
+                               EMIT1(0x50); /* push rax */
+                       if (dst_reg != BPF_REG_3)
+                               EMIT1(0x52); /* push rdx */
 
                        /* mov r11, dst_reg */
                        EMIT_mov(AUX_REG, dst_reg);
 
                        if (BPF_SRC(insn->code) == BPF_X)
-                               /* mov rax, src_reg */
-                               EMIT_mov(BPF_REG_0, src_reg);
+                               emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
                        else
-                               /* mov rax, imm32 */
-                               EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+                               emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
 
-                       if (BPF_CLASS(insn->code) == BPF_ALU64)
+                       if (is64)
                                EMIT1(add_1mod(0x48, AUX_REG));
                        else if (is_ereg(AUX_REG))
                                EMIT1(add_1mod(0x40, AUX_REG));
                        /* mul(q) r11 */
                        EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
 
-                       /* mov r11, rax */
-                       EMIT_mov(AUX_REG, BPF_REG_0);
-
-                       EMIT1(0x5A); /* pop rdx */
-                       EMIT1(0x58); /* pop rax */
-
-                       /* mov dst_reg, r11 */
-                       EMIT_mov(dst_reg, AUX_REG);
+                       if (dst_reg != BPF_REG_3)
+                               EMIT1(0x5A); /* pop rdx */
+                       if (dst_reg != BPF_REG_0) {
+                               /* mov dst_reg, rax */
+                               EMIT_mov(dst_reg, BPF_REG_0);
+                               EMIT1(0x58); /* pop rax */
+                       }
                        break;
-
+               }
                        /* shifts */
                case BPF_ALU | BPF_LSH | BPF_K:
                case BPF_ALU | BPF_RSH | BPF_K:
@@ -641,7 +672,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                        case BPF_RSH: b3 = 0xE8; break;
                        case BPF_ARSH: b3 = 0xF8; break;
                        }
-                       EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+                       if (imm32 == 1)
+                               EMIT2(0xD1, add_1reg(b3, dst_reg));
+                       else
+                               EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
                        break;
 
                case BPF_ALU | BPF_LSH | BPF_X:
@@ -1188,7 +1223,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
         * may converge on the last pass. In such case do one more
         * pass to emit the final image
         */
-       for (pass = 0; pass < 10 || image; pass++) {
+       for (pass = 0; pass < 20 || image; pass++) {
                proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
                if (proglen <= 0) {
                        image = NULL;
@@ -1215,13 +1250,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                        }
                }
                oldproglen = proglen;
+               cond_resched();
        }
 
        if (bpf_jit_enable > 1)
                bpf_jit_dump(prog->len, proglen, pass + 1, image);
 
        if (image) {
-               bpf_flush_icache(header, image + proglen);
                if (!prog->is_func || extra_pass) {
                        bpf_jit_binary_lock_ro(header);
                } else {
index 174c59774cc935430583160c6c158d5de363c7ef..a7a7677265b6f73779d486da8bb5ae0b83e48062 100644 (file)
@@ -460,7 +460,7 @@ static int nmi_setup(void)
                goto fail;
 
        for_each_possible_cpu(cpu) {
-               if (!cpu)
+               if (!IS_ENABLED(CONFIG_SMP) || !cpu)
                        continue;
 
                memcpy(per_cpu(cpu_msrs, cpu).counters,
index 2c67bae6bb53e033b823e0146f3f1a0b2901eba0..fb1df9488e98b843f89155b3afbfe5958128a306 100644 (file)
@@ -79,7 +79,7 @@ static void intel_mid_power_off(void)
 
 static void intel_mid_reboot(void)
 {
-       intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+       intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
 }
 
 static unsigned long __init intel_mid_calibrate_tsc(void)
index de53bd15df5a865589e7eda181a1771fbe7297e4..24bb7598774e6aa94203e601f41329d31308692e 100644 (file)
@@ -102,7 +102,7 @@ ENTRY(startup_32)
         * don't we'll eventually crash trying to execute encrypted
         * instructions.
         */
-       bt      $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+       btl     $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
        jnc     .Ldone
        movl    $MSR_K8_SYSCFG, %ecx
        rdmsr
index c047f42552e1a61ed0a5787d904681974cc05af1..3c2c2530737efc717c9945ee1e2990e5b5bfd4c5 100644 (file)
@@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
        if (!xen_initial_domain()) {
                add_preferred_console("xenboot", 0, NULL);
-               add_preferred_console("tty", 0, NULL);
-               add_preferred_console("hvc", 0, NULL);
                if (pci_xen)
                        x86_init.pci.arch_init = pci_xen_init;
        } else {
@@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
                xen_boot_params_init_edd();
        }
+
+       add_preferred_console("tty", 0, NULL);
+       add_preferred_console("hvc", 0, NULL);
+
 #ifdef CONFIG_PCI
        /* PCI BIOS service won't work from a PV guest. */
        pci_probe &= ~PCI_PROBE_BIOS;
index d9f96cc5d74367932020edfe71424d7940fa77a3..1d83152c761bcb1f74929006fbe52d450d68b81c 100644 (file)
@@ -1,12 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
 #include <linux/tick.h>
+#include <linux/percpu-defs.h>
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/grant_table.h>
 #include <xen/events.h>
 
+#include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
 #include <asm/fixmap.h>
@@ -15,6 +18,8 @@
 #include "mmu.h"
 #include "pmu.h"
 
+static DEFINE_PER_CPU(u64, spec_ctrl);
+
 void xen_arch_pre_suspend(void)
 {
        xen_save_time_memory_area();
@@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)
 
 static void xen_vcpu_notify_restore(void *data)
 {
+       if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+               wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
+
        /* Boot processor notified via generic timekeeping_resume() */
        if (smp_processor_id() == 0)
                return;
@@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)
 
 static void xen_vcpu_notify_suspend(void *data)
 {
+       u64 tmp;
+
        tick_suspend_local();
+
+       if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
+               rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
+               this_cpu_write(spec_ctrl, tmp);
+               wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+       }
 }
 
 void xen_arch_resume(void)
index 623720a111432f68e8f8c080e4085d5cab151565..732631ce250fcab756d7e4d8d39ee054cf7a4de9 100644 (file)
@@ -16,6 +16,7 @@
  */
 
 #include <linux/dma-contiguous.h>
+#include <linux/dma-direct.h>
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
@@ -123,7 +124,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
                              unsigned long attrs)
 {
        unsigned long ret;
-       unsigned long uncached = 0;
+       unsigned long uncached;
        unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
        struct page *page = NULL;
 
@@ -144,15 +145,27 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
        if (!page)
                return NULL;
 
-       ret = (unsigned long)page_address(page);
+       *handle = phys_to_dma(dev, page_to_phys(page));
 
-       /* We currently don't support coherent memory outside KSEG */
+#ifdef CONFIG_MMU
+       if (PageHighMem(page)) {
+               void *p;
 
+               p = dma_common_contiguous_remap(page, size, VM_MAP,
+                                               pgprot_noncached(PAGE_KERNEL),
+                                               __builtin_return_address(0));
+               if (!p) {
+                       if (!dma_release_from_contiguous(dev, page, count))
+                               __free_pages(page, get_order(size));
+               }
+               return p;
+       }
+#endif
+       ret = (unsigned long)page_address(page);
        BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
               ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
 
        uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
-       *handle = virt_to_bus((void *)ret);
        __invalidate_dcache_range(ret, size);
 
        return (void *)uncached;
@@ -161,13 +174,20 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
 static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr,
                            dma_addr_t dma_handle, unsigned long attrs)
 {
-       unsigned long addr = (unsigned long)vaddr +
-               XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
-       struct page *page = virt_to_page(addr);
        unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-       BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR ||
-              addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
+       unsigned long addr = (unsigned long)vaddr;
+       struct page *page;
+
+       if (addr >= XCHAL_KSEG_BYPASS_VADDR &&
+           addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE) {
+               addr += XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
+               page = virt_to_page(addr);
+       } else {
+#ifdef CONFIG_MMU
+               dma_common_free_remap(vaddr, size, VM_MAP);
+#endif
+               page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle)));
+       }
 
        if (!dma_release_from_contiguous(dev, page, count))
                __free_pages(page, get_order(size));
index d776ec0d7b22d375c4df5121a3a18f95babca749..34aead7dcb4878bf18f48cc2087fd93b5bee95ca 100644 (file)
@@ -79,19 +79,75 @@ void __init zones_init(void)
        free_area_init_node(0, zones_size, ARCH_PFN_OFFSET, NULL);
 }
 
+#ifdef CONFIG_HIGHMEM
+static void __init free_area_high(unsigned long pfn, unsigned long end)
+{
+       for (; pfn < end; pfn++)
+               free_highmem_page(pfn_to_page(pfn));
+}
+
+static void __init free_highpages(void)
+{
+       unsigned long max_low = max_low_pfn;
+       struct memblock_region *mem, *res;
+
+       reset_all_zones_managed_pages();
+       /* set highmem page free */
+       for_each_memblock(memory, mem) {
+               unsigned long start = memblock_region_memory_base_pfn(mem);
+               unsigned long end = memblock_region_memory_end_pfn(mem);
+
+               /* Ignore complete lowmem entries */
+               if (end <= max_low)
+                       continue;
+
+               if (memblock_is_nomap(mem))
+                       continue;
+
+               /* Truncate partial highmem entries */
+               if (start < max_low)
+                       start = max_low;
+
+               /* Find and exclude any reserved regions */
+               for_each_memblock(reserved, res) {
+                       unsigned long res_start, res_end;
+
+                       res_start = memblock_region_reserved_base_pfn(res);
+                       res_end = memblock_region_reserved_end_pfn(res);
+
+                       if (res_end < start)
+                               continue;
+                       if (res_start < start)
+                               res_start = start;
+                       if (res_start > end)
+                               res_start = end;
+                       if (res_end > end)
+                               res_end = end;
+                       if (res_start != start)
+                               free_area_high(start, res_start);
+                       start = res_end;
+                       if (start == end)
+                               break;
+               }
+
+               /* And now free anything which remains */
+               if (start < end)
+                       free_area_high(start, end);
+       }
+}
+#else
+static void __init free_highpages(void)
+{
+}
+#endif
+
 /*
  * Initialize memory pages.
  */
 
 void __init mem_init(void)
 {
-#ifdef CONFIG_HIGHMEM
-       unsigned long tmp;
-
-       reset_all_zones_managed_pages();
-       for (tmp = max_low_pfn; tmp < max_pfn; tmp++)
-               free_highmem_page(pfn_to_page(tmp));
-#endif
+       free_highpages();
 
        max_mapnr = max_pfn - ARCH_PFN_OFFSET;
        high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT);
index 4117524ca45bc76cceb163c42f752ffd8d7e1659..c2033a232a443a3b54def7e9d88b646a7ac950d5 100644 (file)
@@ -812,7 +812,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
        struct gendisk *disk;
        struct request_queue *q;
        struct blkcg_gq *blkg;
-       struct module *owner;
        unsigned int major, minor;
        int key_len, part, ret;
        char *body;
@@ -904,9 +903,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
        spin_unlock_irq(q->queue_lock);
        rcu_read_unlock();
 fail:
-       owner = disk->fops->owner;
-       put_disk(disk);
-       module_put(owner);
+       put_disk_and_module(disk);
        /*
         * If queue was bypassing, we should retry.  Do so after a
         * short msleep().  It isn't strictly necessary but queue
@@ -931,13 +928,9 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
        __releases(ctx->disk->queue->queue_lock) __releases(rcu)
 {
-       struct module *owner;
-
        spin_unlock_irq(ctx->disk->queue->queue_lock);
        rcu_read_unlock();
-       owner = ctx->disk->fops->owner;
-       put_disk(ctx->disk);
-       module_put(owner);
+       put_disk_and_module(ctx->disk);
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);
 
index 2d1a7bbe063437bfacfca43ad479c305fccf56c7..6d82c4f7fadd9466f84a2405352f8296597eaa09 100644 (file)
@@ -2434,7 +2434,7 @@ blk_qc_t submit_bio(struct bio *bio)
                unsigned int count;
 
                if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
-                       count = queue_logical_block_size(bio->bi_disk->queue);
+                       count = queue_logical_block_size(bio->bi_disk->queue) >> 9;
                else
                        count = bio_sectors(bio);
 
index 357492712b0ea85362d2bbd6b2aa1687459cba66..16e83e6df404a24fd1a59baeb77b9c7b7cc9890c 100644 (file)
@@ -712,7 +712,6 @@ static void __blk_mq_requeue_request(struct request *rq)
 
        trace_block_rq_requeue(q, rq);
        wbt_requeue(q->rq_wb, &rq->issue_stat);
-       blk_mq_sched_requeue_request(rq);
 
        if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) {
                blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
@@ -725,6 +724,9 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
 {
        __blk_mq_requeue_request(rq);
 
+       /* this request will be re-inserted to io scheduler queue */
+       blk_mq_sched_requeue_request(rq);
+
        BUG_ON(blk_queued_rq(rq));
        blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
 }
index 88a53c188cb7338e74a9a55a44d8d0e5a2a758da..9656f9e9f99e20af13f1e175aba15f9447d9ef87 100644 (file)
@@ -547,7 +547,7 @@ static int exact_lock(dev_t devt, void *data)
 {
        struct gendisk *p = data;
 
-       if (!get_disk(p))
+       if (!get_disk_and_module(p))
                return -1;
        return 0;
 }
@@ -717,6 +717,11 @@ void del_gendisk(struct gendisk *disk)
        blk_integrity_del(disk);
        disk_del_events(disk);
 
+       /*
+        * Block lookups of the disk until all bdevs are unhashed and the
+        * disk is marked as dead (GENHD_FL_UP cleared).
+        */
+       down_write(&disk->lookup_sem);
        /* invalidate stuff */
        disk_part_iter_init(&piter, disk,
                             DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
@@ -731,6 +736,7 @@ void del_gendisk(struct gendisk *disk)
        bdev_unhash_inode(disk_devt(disk));
        set_capacity(disk, 0);
        disk->flags &= ~GENHD_FL_UP;
+       up_write(&disk->lookup_sem);
 
        if (!(disk->flags & GENHD_FL_HIDDEN))
                sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
@@ -809,16 +815,28 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
 
                spin_lock_bh(&ext_devt_lock);
                part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
-               if (part && get_disk(part_to_disk(part))) {
+               if (part && get_disk_and_module(part_to_disk(part))) {
                        *partno = part->partno;
                        disk = part_to_disk(part);
                }
                spin_unlock_bh(&ext_devt_lock);
        }
 
-       if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) {
-               put_disk(disk);
+       if (!disk)
+               return NULL;
+
+       /*
+        * Synchronize with del_gendisk() to not return disk that is being
+        * destroyed.
+        */
+       down_read(&disk->lookup_sem);
+       if (unlikely((disk->flags & GENHD_FL_HIDDEN) ||
+                    !(disk->flags & GENHD_FL_UP))) {
+               up_read(&disk->lookup_sem);
+               put_disk_and_module(disk);
                disk = NULL;
+       } else {
+               up_read(&disk->lookup_sem);
        }
        return disk;
 }
@@ -1418,6 +1436,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
                        kfree(disk);
                        return NULL;
                }
+               init_rwsem(&disk->lookup_sem);
                disk->node_id = node_id;
                if (disk_expand_part_tbl(disk, 0)) {
                        free_part_stats(&disk->part0);
@@ -1453,7 +1472,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 }
 EXPORT_SYMBOL(__alloc_disk_node);
 
-struct kobject *get_disk(struct gendisk *disk)
+struct kobject *get_disk_and_module(struct gendisk *disk)
 {
        struct module *owner;
        struct kobject *kobj;
@@ -1471,17 +1490,30 @@ struct kobject *get_disk(struct gendisk *disk)
        return kobj;
 
 }
-
-EXPORT_SYMBOL(get_disk);
+EXPORT_SYMBOL(get_disk_and_module);
 
 void put_disk(struct gendisk *disk)
 {
        if (disk)
                kobject_put(&disk_to_dev(disk)->kobj);
 }
-
 EXPORT_SYMBOL(put_disk);
 
+/*
+ * This is a counterpart of get_disk_and_module() and thus also of
+ * get_gendisk().
+ */
+void put_disk_and_module(struct gendisk *disk)
+{
+       if (disk) {
+               struct module *owner = disk->fops->owner;
+
+               put_disk(disk);
+               module_put(owner);
+       }
+}
+EXPORT_SYMBOL(put_disk_and_module);
+
 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
 {
        char event[] = "DISK_RO=1";
index 1668506d8ed80607b6e3c279fa22195fb73571de..3884d810efd27fc73bb07659b91296ea46265252 100644 (file)
@@ -225,7 +225,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 
        if (start + len > i_size_read(bdev->bd_inode))
                return -EINVAL;
-       truncate_inode_pages_range(mapping, start, start + len);
+       truncate_inode_pages_range(mapping, start, start + len - 1);
        return blkdev_issue_discard(bdev, start >> 9, len >> 9,
                                    GFP_KERNEL, flags);
 }
index f95c60774ce8ca613417d3ccf54bee52010752ee..0d6d25e32e1f44fda0049bb3e213b85a3debfa1f 100644 (file)
@@ -833,6 +833,7 @@ static struct elevator_type kyber_sched = {
                .limit_depth = kyber_limit_depth,
                .prepare_request = kyber_prepare_request,
                .finish_request = kyber_finish_request,
+               .requeue_request = kyber_finish_request,
                .completed_request = kyber_completed_request,
                .dispatch_request = kyber_dispatch_request,
                .has_work = kyber_has_work,
index c56f211c84400662f3e18c137b51c3f5406e20fd..8ec0ba9f538619f177eef3416711689323161a65 100644 (file)
@@ -535,13 +535,22 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
        spin_unlock(&dd->lock);
 }
 
+/*
+ * Nothing to do here. This is defined only to ensure that .finish_request
+ * method is called upon request completion.
+ */
+static void dd_prepare_request(struct request *rq, struct bio *bio)
+{
+}
+
 /*
  * For zoned block devices, write unlock the target zone of
  * completed write requests. Do this while holding the zone lock
  * spinlock so that the zone is never unlocked while deadline_fifo_request()
- * while deadline_next_request() are executing.
+ * or deadline_next_request() are executing. This function is called for
+ * all requests, whether or not these requests complete successfully.
  */
-static void dd_completed_request(struct request *rq)
+static void dd_finish_request(struct request *rq)
 {
        struct request_queue *q = rq->q;
 
@@ -756,7 +765,8 @@ static struct elevator_type mq_deadline = {
        .ops.mq = {
                .insert_requests        = dd_insert_requests,
                .dispatch_request       = dd_dispatch_request,
-               .completed_request      = dd_completed_request,
+               .prepare_request        = dd_prepare_request,
+               .finish_request         = dd_finish_request,
                .next_request           = elv_rb_latter_request,
                .former_request         = elv_rb_former_request,
                .bio_merge              = dd_bio_merge,
index 91622db9aedffd997947642a5872802312346869..08dabcd8b6aefc6844bbb9d9e9c001e6ff71fb33 100644 (file)
@@ -51,6 +51,12 @@ const char *bdevname(struct block_device *bdev, char *buf)
 
 EXPORT_SYMBOL(bdevname);
 
+const char *bio_devname(struct bio *bio, char *buf)
+{
+       return disk_name(bio->bi_disk, bio->bi_partno, buf);
+}
+EXPORT_SYMBOL(bio_devname);
+
 /*
  * There's very little reason to use this, you should really
  * have a struct block_device just about everywhere and use
index 9ed51d0c6b1d171fc2eab785ef854b64f93721fe..e4929eec547fc4141a6b078e79d5fe2688a20198 100644 (file)
@@ -490,7 +490,7 @@ static int opal_discovery0_end(struct opal_dev *dev)
 
        if (!found_com_id) {
                pr_debug("Could not find OPAL comid for device. Returning early\n");
-               return -EOPNOTSUPP;;
+               return -EOPNOTSUPP;
        }
 
        dev->comid = comid;
index 11b113f8e36741aeb00e921ee64a5ae871f8d55f..ebb626ffb5fa2d38c853ddcbbe7227aa09c7f7dd 100644 (file)
@@ -74,10 +74,10 @@ void __init acpi_watchdog_init(void)
                res.start = gas->address;
                if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
                        res.flags = IORESOURCE_MEM;
-                       res.end = res.start + ALIGN(gas->access_width, 4);
+                       res.end = res.start + ALIGN(gas->access_width, 4) - 1;
                } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
                        res.flags = IORESOURCE_IO;
-                       res.end = res.start + gas->access_width;
+                       res.end = res.start + gas->access_width - 1;
                } else {
                        pr_warn("Unsupported address space: %u\n",
                                gas->space_id);
index 7128488a3a728ff54f00fc1085c45afb5152f089..f2eb6c37ea0aa9aed03f562883634add68078f55 100644 (file)
@@ -70,7 +70,6 @@ static async_cookie_t async_cookie;
 static bool battery_driver_registered;
 static int battery_bix_broken_package;
 static int battery_notification_delay_ms;
-static int battery_full_discharging;
 static unsigned int cache_time = 1000;
 module_param(cache_time, uint, 0644);
 MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
@@ -215,12 +214,9 @@ static int acpi_battery_get_property(struct power_supply *psy,
                return -ENODEV;
        switch (psp) {
        case POWER_SUPPLY_PROP_STATUS:
-               if (battery->state & ACPI_BATTERY_STATE_DISCHARGING) {
-                       if (battery_full_discharging && battery->rate_now == 0)
-                               val->intval = POWER_SUPPLY_STATUS_FULL;
-                       else
-                               val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
-               } else if (battery->state & ACPI_BATTERY_STATE_CHARGING)
+               if (battery->state & ACPI_BATTERY_STATE_DISCHARGING)
+                       val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+               else if (battery->state & ACPI_BATTERY_STATE_CHARGING)
                        val->intval = POWER_SUPPLY_STATUS_CHARGING;
                else if (acpi_battery_is_charged(battery))
                        val->intval = POWER_SUPPLY_STATUS_FULL;
@@ -1170,12 +1166,6 @@ battery_notification_delay_quirk(const struct dmi_system_id *d)
        return 0;
 }
 
-static int __init battery_full_discharging_quirk(const struct dmi_system_id *d)
-{
-       battery_full_discharging = 1;
-       return 0;
-}
-
 static const struct dmi_system_id bat_dmi_table[] __initconst = {
        {
                .callback = battery_bix_broken_package_quirk,
@@ -1193,38 +1183,6 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "Aspire V5-573G"),
                },
        },
-       {
-               .callback = battery_full_discharging_quirk,
-               .ident = "ASUS GL502VSK",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "GL502VSK"),
-               },
-       },
-       {
-               .callback = battery_full_discharging_quirk,
-               .ident = "ASUS UX305LA",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "UX305LA"),
-               },
-       },
-       {
-               .callback = battery_full_discharging_quirk,
-               .ident = "ASUS UX360UA",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "UX360UA"),
-               },
-       },
-       {
-               .callback = battery_full_discharging_quirk,
-               .ident = "ASUS UX410UAK",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "UX410UAK"),
-               },
-       },
        {},
 };
 
index bbe48ad20886c8530fe525ffe9f35725d1df1ddc..eb09ef55c38a2779c046241c337ea7be3cf75b79 100644 (file)
@@ -2675,10 +2675,14 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
        else
                ndr_desc->numa_node = NUMA_NO_NODE;
 
-       if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH)
+       /*
+        * Persistence domain bits are hierarchical, if
+        * ACPI_NFIT_CAPABILITY_CACHE_FLUSH is set then
+        * ACPI_NFIT_CAPABILITY_MEM_FLUSH is implied.
+        */
+       if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH)
                set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags);
-
-       if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH)
+       else if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH)
                set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags);
 
        list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
index 8ccaae3550d284be2070f7e06dd6794c5bc8c5b3..85167603b9c94318bcef7c260de689c13e4e4545 100644 (file)
@@ -103,25 +103,27 @@ int acpi_map_pxm_to_node(int pxm)
  */
 int acpi_map_pxm_to_online_node(int pxm)
 {
-       int node, n, dist, min_dist;
+       int node, min_node;
 
        node = acpi_map_pxm_to_node(pxm);
 
        if (node == NUMA_NO_NODE)
                node = 0;
 
+       min_node = node;
        if (!node_online(node)) {
-               min_dist = INT_MAX;
+               int min_dist = INT_MAX, dist, n;
+
                for_each_online_node(n) {
                        dist = node_distance(node, n);
                        if (dist < min_dist) {
                                min_dist = dist;
-                               node = n;
+                               min_node = n;
                        }
                }
        }
 
-       return node;
+       return min_node;
 }
 EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
 
index 355a95a83a3405abd73a4b5d4ded9df48e8f0e38..1ff17799769d0b2372d2b9d385af57dab15acf10 100644 (file)
@@ -550,7 +550,9 @@ static const struct pci_device_id ahci_pci_tbl[] = {
          .driver_data = board_ahci_yes_fbs },
        { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9230),
          .driver_data = board_ahci_yes_fbs },
-       { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642),
+       { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642), /* highpoint rocketraid 642L */
+         .driver_data = board_ahci_yes_fbs },
+       { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0645), /* highpoint rocketraid 644L */
          .driver_data = board_ahci_yes_fbs },
 
        /* Promise */
index a0de7a38430c954b31c7b4e6e01a790ee98e85c2..7adcf3caabd00abbb08ef76c0e4e604f0ba8c97c 100644 (file)
@@ -665,6 +665,16 @@ int ahci_stop_engine(struct ata_port *ap)
        if ((tmp & (PORT_CMD_START | PORT_CMD_LIST_ON)) == 0)
                return 0;
 
+       /*
+        * Don't try to issue commands but return with ENODEV if the
+        * AHCI controller not available anymore (e.g. due to PCIe hot
+        * unplugging). Otherwise a 500ms delay for each port is added.
+        */
+       if (tmp == 0xffffffff) {
+               dev_err(ap->host->dev, "AHCI controller unavailable!\n");
+               return -ENODEV;
+       }
+
        /* setting HBA to idle */
        tmp &= ~PORT_CMD_START;
        writel(tmp, port_mmio + PORT_CMD);
index 341d0ef82cbddbf3c67d2b210ada2b45eebb8958..30cc8f1a31e1299f3cc68659a5be959492249382 100644 (file)
@@ -340,7 +340,7 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
  * 2) regulator for controlling the targets power (optional)
  * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node,
  *    or for non devicetree enabled platforms a single clock
- *     4) phys (optional)
+ * 4) phys (optional)
  *
  * RETURNS:
  * The allocated ahci_host_priv on success, otherwise an ERR_PTR value
index 3c09122bf03825b6489a16b8f116bd05485aa1b4..7431ccd0331648d4ce64930901a440eb8bae2d70 100644 (file)
@@ -4530,6 +4530,25 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { "PIONEER DVD-RW  DVR-212D",   NULL,   ATA_HORKAGE_NOSETXFER },
        { "PIONEER DVD-RW  DVR-216D",   NULL,   ATA_HORKAGE_NOSETXFER },
 
+       /* Crucial BX100 SSD 500GB has broken LPM support */
+       { "CT500BX100SSD1",             NULL,   ATA_HORKAGE_NOLPM },
+
+       /* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */
+       { "Crucial_CT512MX100*",        "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM |
+                                               ATA_HORKAGE_NOLPM, },
+       /* 512GB MX100 with newer firmware has only LPM issues */
+       { "Crucial_CT512MX100*",        NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM |
+                                               ATA_HORKAGE_NOLPM, },
+
+       /* 480GB+ M500 SSDs have both queued TRIM and LPM issues */
+       { "Crucial_CT480M500*",         NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM |
+                                               ATA_HORKAGE_NOLPM, },
+       { "Crucial_CT960M500*",         NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM |
+                                               ATA_HORKAGE_NOLPM, },
+
        /* devices that don't properly handle queued TRIM commands */
        { "Micron_M500_*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
@@ -4541,7 +4560,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
        { "Crucial_CT*MX100*",          "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "Samsung SSD 8*",             NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+       { "Samsung SSD 840*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Samsung SSD 850*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
        { "FCCT*M500*",                 NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
@@ -5401,8 +5422,7 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
         * We guarantee to LLDs that they will have at least one
         * non-zero sg if the command is a data command.
         */
-       if (WARN_ON_ONCE(ata_is_data(prot) &&
-                        (!qc->sg || !qc->n_elem || !qc->nbytes)))
+       if (ata_is_data(prot) && (!qc->sg || !qc->n_elem || !qc->nbytes))
                goto sys_err;
 
        if (ata_is_dma(prot) || (ata_is_pio(prot) &&
index 11c3137d7b0af5dda9cc3c45364f7677d7fa892d..c016829a38fd21798e263cdc678517b3dbf728f7 100644 (file)
@@ -815,7 +815,8 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
 
        if (ap->pflags & ATA_PFLAG_LOADING)
                ap->pflags &= ~ATA_PFLAG_LOADING;
-       else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
+       else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) &&
+               !(ap->flags & ATA_FLAG_SAS_HOST))
                schedule_delayed_work(&ap->hotplug_task, 0);
 
        if (ap->pflags & ATA_PFLAG_RECOVERED)
index 66be961c93a4e3311a7d477ec666e3f17150397d..89a9d4a2efc8a56a76407611a30d4079363fdfe9 100644 (file)
@@ -3316,6 +3316,12 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
                goto invalid_fld;
        }
 
+       /* We may not issue NCQ commands to devices not supporting NCQ */
+       if (ata_is_ncq(tf->protocol) && !ata_ncq_enabled(dev)) {
+               fp = 1;
+               goto invalid_fld;
+       }
+
        /* sanity check for pio multi commands */
        if ((cdb[1] & 0xe0) && !is_multi_taskfile(tf)) {
                fp = 1;
@@ -4282,7 +4288,7 @@ static inline void ata_scsi_dump_cdb(struct ata_port *ap,
 #ifdef ATA_DEBUG
        struct scsi_device *scsidev = cmd->device;
 
-       DPRINTK("CDB (%u:%d,%d,%d) %9ph\n",
+       DPRINTK("CDB (%u:%d,%d,%lld) %9ph\n",
                ap->print_id,
                scsidev->channel, scsidev->id, scsidev->lun,
                cmd->cmnd);
@@ -4309,7 +4315,9 @@ static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd,
                if (likely((scsi_op != ATA_16) || !atapi_passthru16)) {
                        /* relay SCSI command to ATAPI device */
                        int len = COMMAND_SIZE(scsi_op);
-                       if (unlikely(len > scmd->cmd_len || len > dev->cdb_len))
+                       if (unlikely(len > scmd->cmd_len ||
+                                    len > dev->cdb_len ||
+                                    scmd->cmd_len > ATAPI_CDB_LEN))
                                goto bad_cdb_len;
 
                        xlat_func = atapi_xlat;
index 80ee2f2a50d02872a9b657e99d320ac7dfe2cc5a..6456e07db72a7ea4e5cf2bcb1110dec9db42e946 100644 (file)
 enum sata_rcar_type {
        RCAR_GEN1_SATA,
        RCAR_GEN2_SATA,
+       RCAR_GEN3_SATA,
        RCAR_R8A7790_ES1_SATA,
 };
 
@@ -784,26 +785,11 @@ static void sata_rcar_setup_port(struct ata_host *host)
        ioaddr->command_addr    = ioaddr->cmd_addr + (ATA_REG_CMD << 2);
 }
 
-static void sata_rcar_init_controller(struct ata_host *host)
+static void sata_rcar_init_module(struct sata_rcar_priv *priv)
 {
-       struct sata_rcar_priv *priv = host->private_data;
        void __iomem *base = priv->base;
        u32 val;
 
-       /* reset and setup phy */
-       switch (priv->type) {
-       case RCAR_GEN1_SATA:
-               sata_rcar_gen1_phy_init(priv);
-               break;
-       case RCAR_GEN2_SATA:
-       case RCAR_R8A7790_ES1_SATA:
-               sata_rcar_gen2_phy_init(priv);
-               break;
-       default:
-               dev_warn(host->dev, "SATA phy is not initialized\n");
-               break;
-       }
-
        /* SATA-IP reset state */
        val = ioread32(base + ATAPI_CONTROL1_REG);
        val |= ATAPI_CONTROL1_RESET;
@@ -824,10 +810,33 @@ static void sata_rcar_init_controller(struct ata_host *host)
        /* ack and mask */
        iowrite32(0, base + SATAINTSTAT_REG);
        iowrite32(0x7ff, base + SATAINTMASK_REG);
+
        /* enable interrupts */
        iowrite32(ATAPI_INT_ENABLE_SATAINT, base + ATAPI_INT_ENABLE_REG);
 }
 
+static void sata_rcar_init_controller(struct ata_host *host)
+{
+       struct sata_rcar_priv *priv = host->private_data;
+
+       /* reset and setup phy */
+       switch (priv->type) {
+       case RCAR_GEN1_SATA:
+               sata_rcar_gen1_phy_init(priv);
+               break;
+       case RCAR_GEN2_SATA:
+       case RCAR_GEN3_SATA:
+       case RCAR_R8A7790_ES1_SATA:
+               sata_rcar_gen2_phy_init(priv);
+               break;
+       default:
+               dev_warn(host->dev, "SATA phy is not initialized\n");
+               break;
+       }
+
+       sata_rcar_init_module(priv);
+}
+
 static const struct of_device_id sata_rcar_match[] = {
        {
                /* Deprecated by "renesas,sata-r8a7779" */
@@ -856,7 +865,7 @@ static const struct of_device_id sata_rcar_match[] = {
        },
        {
                .compatible = "renesas,sata-r8a7795",
-               .data = (void *)RCAR_GEN2_SATA
+               .data = (void *)RCAR_GEN3_SATA
        },
        {
                .compatible = "renesas,rcar-gen2-sata",
@@ -864,7 +873,7 @@ static const struct of_device_id sata_rcar_match[] = {
        },
        {
                .compatible = "renesas,rcar-gen3-sata",
-               .data = (void *)RCAR_GEN2_SATA
+               .data = (void *)RCAR_GEN3_SATA
        },
        { },
 };
@@ -982,11 +991,18 @@ static int sata_rcar_resume(struct device *dev)
        if (ret)
                return ret;
 
-       /* ack and mask */
-       iowrite32(0, base + SATAINTSTAT_REG);
-       iowrite32(0x7ff, base + SATAINTMASK_REG);
-       /* enable interrupts */
-       iowrite32(ATAPI_INT_ENABLE_SATAINT, base + ATAPI_INT_ENABLE_REG);
+       if (priv->type == RCAR_GEN3_SATA) {
+               sata_rcar_gen2_phy_init(priv);
+               sata_rcar_init_module(priv);
+       } else {
+               /* ack and mask */
+               iowrite32(0, base + SATAINTSTAT_REG);
+               iowrite32(0x7ff, base + SATAINTMASK_REG);
+
+               /* enable interrupts */
+               iowrite32(ATAPI_INT_ENABLE_SATAINT,
+                         base + ATAPI_INT_ENABLE_REG);
+       }
 
        ata_host_resume(host);
 
index 9180b9bd58216f780ab608cf75d868cb60e484b1..834509506ef643399a18ec3cb841d8827c6c70c5 100644 (file)
@@ -97,7 +97,7 @@ static struct img_ascii_lcd_config boston_config = {
 static void malta_update(struct img_ascii_lcd_ctx *ctx)
 {
        unsigned int i;
-       int err;
+       int err = 0;
 
        for (i = 0; i < ctx->cfg->num_chars; i++) {
                err = regmap_write(ctx->regmap,
@@ -180,7 +180,7 @@ static int sead3_wait_lcd_idle(struct img_ascii_lcd_ctx *ctx)
 static void sead3_update(struct img_ascii_lcd_ctx *ctx)
 {
        unsigned int i;
-       int err;
+       int err = 0;
 
        for (i = 0; i < ctx->cfg->num_chars; i++) {
                err = sead3_wait_lcd_idle(ctx);
@@ -224,7 +224,7 @@ MODULE_DEVICE_TABLE(of, img_ascii_lcd_matches);
 
 /**
  * img_ascii_lcd_scroll() - scroll the display by a character
- * @arg: really a pointer to the private data structure
+ * @t: really a pointer to the private data structure
  *
  * Scroll the current message along the LCD by one character, rearming the
  * timer if required.
index ea7869c0d7f9f638ffb33f61d7b2b7437cd3bf41..ec5e8800f8adf18ad0f645747d991abb518a2659 100644 (file)
@@ -1372,7 +1372,7 @@ static void panel_process_inputs(void)
                                break;
                        input->rise_timer = 0;
                        input->state = INPUT_ST_RISING;
-                       /* no break here, fall through */
+                       /* fall through */
                case INPUT_ST_RISING:
                        if ((phys_curr & input->mask) != input->value) {
                                input->state = INPUT_ST_LOW;
@@ -1385,11 +1385,11 @@ static void panel_process_inputs(void)
                        }
                        input->high_timer = 0;
                        input->state = INPUT_ST_HIGH;
-                       /* no break here, fall through */
+                       /* fall through */
                case INPUT_ST_HIGH:
                        if (input_state_high(input))
                                break;
-                       /* no break here, fall through */
+                       /* fall through */
                case INPUT_ST_FALLING:
                        input_state_falling(input);
                }
index ba8acca036df27479a4d535a81328cd7223fe60f..cb0f1aad20b7dd1932e80f5c3b5153fc75eb2b4c 100644 (file)
@@ -55,7 +55,7 @@ config BCMA_DRIVER_PCI
 
 config BCMA_DRIVER_PCI_HOSTMODE
        bool "Driver for PCI core working in hostmode"
-       depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY
+       depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY && BCMA = y
        help
          PCI core hostmode operation (external PCI bus).
 
index f1eb4d3e1d575b2806c3d43155efb85f7640228d..f4161064365c9763bfe6e7741ac64538624cecd8 100644 (file)
@@ -203,7 +203,7 @@ static void bcma_pmu_resources_init(struct bcma_drv_cc *cc)
         * Add some delay; allow resources to come up and settle.
         * Delay is required for SoC (early init).
         */
-       mdelay(2);
+       usleep_range(2000, 2500);
 }
 
 /* Disable to allow reading SPROM. Don't know the adventages of enabling it. */
index 925842996986e9c68a68683387aa796ba46d69ef..63410ecfe640eff8d7e937c031169c2734551c10 100644 (file)
@@ -297,6 +297,7 @@ static const struct pci_device_id bcma_pci_bridge_tbl[] = {
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_DELL, 0x0016) },
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_DELL, 0x0018) },
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_FOXCONN, 0xe092) },
+       { PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_HP, 0x804a) },
        { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a0) },
        { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a9) },
        { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43aa) },
index e5aa62fcf5a8380d02866b7d9c31aa0a72f6573e..3aaf6af3ec23d7d54d5f45deb97ede64bec7d3db 100644 (file)
@@ -1758,7 +1758,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
        if (unit[drive].type->code == FD_NODRIVE)
                return NULL;
        *part = 0;
-       return get_disk(unit[drive].gendisk);
+       return get_disk_and_module(unit[drive].gendisk);
 }
 
 static int __init amiga_floppy_probe(struct platform_device *pdev)
index 8bc3b9fd8dd2be0df64d166dbc4c2b65eef03305..dfb2c2622e5a64d77e85ca9d14059c25f1840878 100644 (file)
@@ -1917,7 +1917,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
        if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS)
                return NULL;
        *part = 0;
-       return get_disk(unit[drive].disk);
+       return get_disk_and_module(unit[drive].disk);
 }
 
 static int __init atari_floppy_init (void)
index 8028a3a7e7fd63cabb8ce47c021e90e26d09a7ba..deea78e485da05e90436d78c334da2b40e9c6bce 100644 (file)
@@ -456,7 +456,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
 
        mutex_lock(&brd_devices_mutex);
        brd = brd_init_one(MINOR(dev) / max_part, &new);
-       kobj = brd ? get_disk(brd->brd_disk) : NULL;
+       kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL;
        mutex_unlock(&brd_devices_mutex);
 
        if (new)
index eae484acfbbc1d4c8228b5524b4d30070793620a..8ec7235fc93be49c48291725aec29f2c36818ade 100644 (file)
@@ -4505,7 +4505,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
        if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type))
                return NULL;
        *part = 0;
-       return get_disk(disks[drive]);
+       return get_disk_and_module(disks[drive]);
 }
 
 static int __init do_floppy_init(void)
index d5fe720cf14940b668f8764de2bad6cf95549528..ee62d2d517bf4537c60cdd83c70363382403fc57 100644 (file)
@@ -266,7 +266,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
        struct iov_iter i;
        ssize_t bw;
 
-       iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len);
+       iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
 
        file_start_write(file);
        bw = vfs_iter_write(file, &i, ppos, 0);
@@ -1922,7 +1922,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
        if (err < 0)
                kobj = NULL;
        else
-               kobj = get_disk(lo->lo_disk);
+               kobj = get_disk_and_module(lo->lo_disk);
        mutex_unlock(&loop_index_mutex);
 
        *part = 0;
index 5f2a4240a204d54fc6fe87e569dc6165d5190530..86258b00a1d4d1960a67fb021babc668884a72a7 100644 (file)
@@ -1591,7 +1591,7 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
                        if (new_index < 0) {
                                mutex_unlock(&nbd_index_mutex);
                                printk(KERN_ERR "nbd: failed to add new device\n");
-                               return ret;
+                               return new_index;
                        }
                        nbd = idr_find(&nbd_index_idr, new_index);
                }
index 531a0915066b313462d6208359ecea4102397215..c61d20c9f3f8092f3aed69d859f137e7d67aef33 100644 (file)
@@ -1122,7 +1122,7 @@ static int pkt_start_recovery(struct packet_data *pkt)
        pkt->sector = new_sector;
 
        bio_reset(pkt->bio);
-       bio_set_set(pkt->bio, pd->bdev);
+       bio_set_dev(pkt->bio, pd->bdev);
        bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0);
        pkt->bio->bi_iter.bi_sector = new_sector;
        pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
index 84434d3ea19b8f3a7500972219b952a6b84072a2..64e066eba72e03abec38d49036d0f3677aafb2a3 100644 (file)
@@ -799,7 +799,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
                return NULL;
 
        *part = 0;
-       return get_disk(swd->unit[drive].disk);
+       return get_disk_and_module(swd->unit[drive].disk);
 }
 
 static int swim_add_floppy(struct swim_priv *swd, enum drive_location location)
index e126e4cac2ca499566da91a6e3da01d0b1e4381e..92ec1bbece51d31c44f88eb6a2037333dd7a9f40 100644 (file)
@@ -262,6 +262,7 @@ static DEFINE_SPINLOCK(minor_lock);
 
 static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
 static void blkfront_gather_backend_features(struct blkfront_info *info);
+static int negotiate_mq(struct blkfront_info *info);
 
 static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
 {
@@ -1774,11 +1775,18 @@ static int talk_to_blkback(struct xenbus_device *dev,
        unsigned int i, max_page_order;
        unsigned int ring_page_order;
 
+       if (!info)
+               return -ENODEV;
+
        max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
                                              "max-ring-page-order", 0);
        ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
        info->nr_ring_pages = 1 << ring_page_order;
 
+       err = negotiate_mq(info);
+       if (err)
+               goto destroy_blkring;
+
        for (i = 0; i < info->nr_rings; i++) {
                struct blkfront_ring_info *rinfo = &info->rinfo[i];
 
@@ -1978,11 +1986,6 @@ static int blkfront_probe(struct xenbus_device *dev,
        }
 
        info->xbdev = dev;
-       err = negotiate_mq(info);
-       if (err) {
-               kfree(info);
-               return err;
-       }
 
        mutex_init(&info->mutex);
        info->vdevice = vdevice;
@@ -2099,10 +2102,6 @@ static int blkfront_resume(struct xenbus_device *dev)
 
        blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
-       err = negotiate_mq(info);
-       if (err)
-               return err;
-
        err = talk_to_blkback(dev, info);
        if (!err)
                blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);
index 41c95c9b2ab436e5917eb6f83f055b91ee521044..8f9130ab5887273d8feba518fabb76524fa7b579 100644 (file)
@@ -332,7 +332,7 @@ static const struct block_device_operations z2_fops =
 static struct kobject *z2_find(dev_t dev, int *part, void *data)
 {
        *part = 0;
-       return get_disk(z2ram_gendisk);
+       return get_disk_and_module(z2ram_gendisk);
 }
 
 static struct request_queue *z2_queue;
index 07e55cd8f8c87a4338b50faa5a3c57cfb7bd03bb..d8bbd661dbdb74a09edecc895fe2e8c7915f029f 100644 (file)
@@ -392,4 +392,16 @@ config BT_QCOMSMD
          Say Y here to compile support for HCI over Qualcomm SMD into the
          kernel or say M to compile as a module.
 
+config BT_HCIRSI
+       tristate "Redpine HCI support"
+       default n
+       select RSI_COEX
+       help
+         Redpine BT driver.
+         This driver handles BT traffic from upper layers and pass
+         to the RSI_91x coex module for further scheduling to device
+
+         Say Y here to compile support for HCI over Redpine into the
+         kernel or say M to compile as a module.
+
 endmenu
index 4e4e44d0979689db444a05c2d8e742d388965219..03cfc1b20c4adab688b672d388cfd183d3855fb9 100644 (file)
@@ -28,6 +28,8 @@ obj-$(CONFIG_BT_QCA)          += btqca.o
 
 obj-$(CONFIG_BT_HCIUART_NOKIA) += hci_nokia.o
 
+obj-$(CONFIG_BT_HCIRSI)                += btrsi.o
+
 btmrvl-y                       := btmrvl_main.o
 btmrvl-$(CONFIG_DEBUG_FS)      += btmrvl_debugfs.o
 
diff --git a/drivers/bluetooth/btrsi.c b/drivers/bluetooth/btrsi.c
new file mode 100644 (file)
index 0000000..5034325
--- /dev/null
@@ -0,0 +1,188 @@
+/**
+ * Copyright (c) 2017 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <asm/unaligned.h>
+#include <net/rsi_91x.h>
+#include <net/genetlink.h>
+
+#define RSI_HEADROOM_FOR_BT_HAL        16
+#define RSI_FRAME_DESC_SIZE    16
+
+struct rsi_hci_adapter {
+       void *priv;
+       struct rsi_proto_ops *proto_ops;
+       struct hci_dev *hdev;
+};
+
+static int rsi_hci_open(struct hci_dev *hdev)
+{
+       return 0;
+}
+
+static int rsi_hci_close(struct hci_dev *hdev)
+{
+       return 0;
+}
+
+static int rsi_hci_flush(struct hci_dev *hdev)
+{
+       return 0;
+}
+
+static int rsi_hci_send_pkt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+       struct rsi_hci_adapter *h_adapter = hci_get_drvdata(hdev);
+       struct sk_buff *new_skb = NULL;
+
+       switch (hci_skb_pkt_type(skb)) {
+       case HCI_COMMAND_PKT:
+               hdev->stat.cmd_tx++;
+               break;
+       case HCI_ACLDATA_PKT:
+               hdev->stat.acl_tx++;
+               break;
+       case HCI_SCODATA_PKT:
+               hdev->stat.sco_tx++;
+               break;
+       }
+
+       if (skb_headroom(skb) < RSI_HEADROOM_FOR_BT_HAL) {
+               /* Insufficient skb headroom - allocate a new skb */
+               new_skb = skb_realloc_headroom(skb, RSI_HEADROOM_FOR_BT_HAL);
+               if (unlikely(!new_skb))
+                       return -ENOMEM;
+               bt_cb(new_skb)->pkt_type = hci_skb_pkt_type(skb);
+               kfree_skb(skb);
+               skb = new_skb;
+       }
+
+       return h_adapter->proto_ops->coex_send_pkt(h_adapter->priv, skb,
+                                                  RSI_BT_Q);
+}
+
+static int rsi_hci_recv_pkt(void *priv, const u8 *pkt)
+{
+       struct rsi_hci_adapter *h_adapter = priv;
+       struct hci_dev *hdev = h_adapter->hdev;
+       struct sk_buff *skb;
+       int pkt_len = get_unaligned_le16(pkt) & 0x0fff;
+
+       skb = dev_alloc_skb(pkt_len);
+       if (!skb)
+               return -ENOMEM;
+
+       memcpy(skb->data, pkt + RSI_FRAME_DESC_SIZE, pkt_len);
+       skb_put(skb, pkt_len);
+       h_adapter->hdev->stat.byte_rx += skb->len;
+
+       hci_skb_pkt_type(skb) = pkt[14];
+
+       return hci_recv_frame(hdev, skb);
+}
+
+static int rsi_hci_attach(void *priv, struct rsi_proto_ops *ops)
+{
+       struct rsi_hci_adapter *h_adapter = NULL;
+       struct hci_dev *hdev;
+       int err = 0;
+
+       h_adapter = kzalloc(sizeof(*h_adapter), GFP_KERNEL);
+       if (!h_adapter)
+               return -ENOMEM;
+
+       h_adapter->priv = priv;
+       ops->set_bt_context(priv, h_adapter);
+       h_adapter->proto_ops = ops;
+
+       hdev = hci_alloc_dev();
+       if (!hdev) {
+               BT_ERR("Failed to alloc HCI device");
+               goto err;
+       }
+
+       h_adapter->hdev = hdev;
+
+       if (ops->get_host_intf(priv) == RSI_HOST_INTF_SDIO)
+               hdev->bus = HCI_SDIO;
+       else
+               hdev->bus = HCI_USB;
+
+       hci_set_drvdata(hdev, h_adapter);
+       hdev->dev_type = HCI_PRIMARY;
+       hdev->open = rsi_hci_open;
+       hdev->close = rsi_hci_close;
+       hdev->flush = rsi_hci_flush;
+       hdev->send = rsi_hci_send_pkt;
+
+       err = hci_register_dev(hdev);
+       if (err < 0) {
+               BT_ERR("HCI registration failed with errcode %d", err);
+               hci_free_dev(hdev);
+               goto err;
+       }
+
+       return 0;
+err:
+       h_adapter->hdev = NULL;
+       kfree(h_adapter);
+       return -EINVAL;
+}
+
+static void rsi_hci_detach(void *priv)
+{
+       struct rsi_hci_adapter *h_adapter = priv;
+       struct hci_dev *hdev;
+
+       if (!h_adapter)
+               return;
+
+       hdev = h_adapter->hdev;
+       if (hdev) {
+               hci_unregister_dev(hdev);
+               hci_free_dev(hdev);
+               h_adapter->hdev = NULL;
+       }
+
+       kfree(h_adapter);
+}
+
+const struct rsi_mod_ops rsi_bt_ops = {
+       .attach = rsi_hci_attach,
+       .detach = rsi_hci_detach,
+       .recv_pkt = rsi_hci_recv_pkt,
+};
+EXPORT_SYMBOL(rsi_bt_ops);
+
+static int rsi_91x_bt_module_init(void)
+{
+       return 0;
+}
+
+static void rsi_91x_bt_module_exit(void)
+{
+       return;
+}
+
+module_init(rsi_91x_bt_module_init);
+module_exit(rsi_91x_bt_module_exit);
+MODULE_AUTHOR("Redpine Signals Inc");
+MODULE_DESCRIPTION("RSI BT driver");
+MODULE_SUPPORTED_DEVICE("RSI-BT");
+MODULE_LICENSE("Dual BSD/GPL");
index c8e9ae6b99e1753b95f436e89b67e962091f9c81..5cd868ea28ed9f4e0ded8d73571e9baf84ff66d0 100644 (file)
@@ -21,6 +21,7 @@
  *
  */
 
+#include <linux/dmi.h>
 #include <linux/module.h>
 #include <linux/usb.h>
 #include <linux/usb/quirks.h>
@@ -230,7 +231,6 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 },
-       { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 },
@@ -263,6 +263,7 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
 
        /* QCA ROME chipset */
+       { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME },
        { USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME },
@@ -383,6 +384,21 @@ static const struct usb_device_id blacklist_table[] = {
        { }     /* Terminating entry */
 };
 
+/* The Bluetooth USB module build into some devices needs to be reset on resume,
+ * this is a problem with the platform (likely shutting off all power) not with
+ * the module itself. So we use a DMI list to match known broken platforms.
+ */
+static const struct dmi_system_id btusb_needs_reset_resume_table[] = {
+       {
+               /* Dell OptiPlex 3060 (QCA ROME device 0cf3:e007) */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"),
+               },
+       },
+       {}
+};
+
 #define BTUSB_MAX_ISOC_FRAMES  10
 
 #define BTUSB_INTR_RUNNING     0
@@ -2955,6 +2971,9 @@ static int btusb_probe(struct usb_interface *intf,
        hdev->send   = btusb_send_frame;
        hdev->notify = btusb_notify;
 
+       if (dmi_check_system(btusb_needs_reset_resume_table))
+               interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
+
 #ifdef CONFIG_PM
        err = btusb_config_oob_wake(hdev);
        if (err)
@@ -3041,12 +3060,6 @@ static int btusb_probe(struct usb_interface *intf,
        if (id->driver_info & BTUSB_QCA_ROME) {
                data->setup_on_usb = btusb_setup_qca;
                hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
-
-               /* QCA Rome devices lose their updated firmware over suspend,
-                * but the USB hub doesn't notice any status change.
-                * explicitly request a device reset on resume.
-                */
-               interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
        }
 
 #ifdef CONFIG_BT_HCIBTUSB_RTL
index 0438a64b8185ed85b60c28f5086141c7e7a7b769..40b9fb247010169d047f27b1532ef20e69ecab68 100644 (file)
@@ -244,7 +244,9 @@ static irqreturn_t bcm_host_wake(int irq, void *data)
 
        bt_dev_dbg(bdev, "Host wake IRQ");
 
-       pm_request_resume(bdev->dev);
+       pm_runtime_get(bdev->dev);
+       pm_runtime_mark_last_busy(bdev->dev);
+       pm_runtime_put_autosuspend(bdev->dev);
 
        return IRQ_HANDLED;
 }
@@ -301,7 +303,7 @@ static const struct bcm_set_sleep_mode default_sleep_params = {
        .usb_auto_sleep = 0,
        .usb_resume_timeout = 0,
        .break_to_host = 0,
-       .pulsed_host_wake = 0,
+       .pulsed_host_wake = 1,
 };
 
 static int bcm_setup_sleep(struct hci_uart *hu)
@@ -586,8 +588,11 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count)
        } else if (!bcm->rx_skb) {
                /* Delay auto-suspend when receiving completed packet */
                mutex_lock(&bcm_device_lock);
-               if (bcm->dev && bcm_device_exists(bcm->dev))
-                       pm_request_resume(bcm->dev->dev);
+               if (bcm->dev && bcm_device_exists(bcm->dev)) {
+                       pm_runtime_get(bcm->dev->dev);
+                       pm_runtime_mark_last_busy(bcm->dev->dev);
+                       pm_runtime_put_autosuspend(bcm->dev->dev);
+               }
                mutex_unlock(&bcm_device_lock);
        }
 
@@ -922,12 +927,13 @@ static int bcm_get_resources(struct bcm_device *dev)
 
        dev->clk = devm_clk_get(dev->dev, NULL);
 
-       dev->device_wakeup = devm_gpiod_get(dev->dev, "device-wakeup",
-                                           GPIOD_OUT_LOW);
+       dev->device_wakeup = devm_gpiod_get_optional(dev->dev, "device-wakeup",
+                                                    GPIOD_OUT_LOW);
        if (IS_ERR(dev->device_wakeup))
                return PTR_ERR(dev->device_wakeup);
 
-       dev->shutdown = devm_gpiod_get(dev->dev, "shutdown", GPIOD_OUT_LOW);
+       dev->shutdown = devm_gpiod_get_optional(dev->dev, "shutdown",
+                                               GPIOD_OUT_LOW);
        if (IS_ERR(dev->shutdown))
                return PTR_ERR(dev->shutdown);
 
index 4d46003c46cfe5f4599a17cf726cb3d86fd8399c..cdaeeea7999cd20759dca0d5cd7f88a3ff431440 100644 (file)
@@ -630,7 +630,7 @@ static int sysc_init_dts_quirks(struct sysc *ddata)
        for (i = 0; i < ARRAY_SIZE(sysc_dts_quirks); i++) {
                prop = of_get_property(np, sysc_dts_quirks[i].name, &len);
                if (!prop)
-                       break;
+                       continue;
 
                ddata->cfg.quirks |= sysc_dts_quirks[i].mask;
        }
index 4d1dc8b46877ca497a198a3d988d060b9235100a..f95b9c75175bcf27825b16891ca405886e51c379 100644 (file)
@@ -457,7 +457,7 @@ static int st33zp24_recv(struct tpm_chip *chip, unsigned char *buf,
                            size_t count)
 {
        int size = 0;
-       int expected;
+       u32 expected;
 
        if (!chip)
                return -EBUSY;
@@ -474,7 +474,7 @@ static int st33zp24_recv(struct tpm_chip *chip, unsigned char *buf,
        }
 
        expected = be32_to_cpu(*(__be32 *)(buf + 2));
-       if (expected > count) {
+       if (expected > count || expected < TPM_HEADER_SIZE) {
                size = -EIO;
                goto out;
        }
index 76df4fbcf089c2371b2ad8b08f31bf446b5c488b..9e80a953d6933ea05668636b6abcd444eff72b0a 100644 (file)
@@ -1190,6 +1190,10 @@ int tpm_get_random(struct tpm_chip *chip, u8 *out, size_t max)
                        break;
 
                recd = be32_to_cpu(tpm_cmd.params.getrandom_out.rng_data_len);
+               if (recd > num_bytes) {
+                       total = -EFAULT;
+                       break;
+               }
 
                rlength = be32_to_cpu(tpm_cmd.header.out.length);
                if (rlength < offsetof(struct tpm_getrandom_out, rng_data) +
index c17e75348a991e355236f2040f931e940541049f..a700f8f9ead797df39de4b5ef946fb5c2c65c95b 100644 (file)
@@ -683,6 +683,10 @@ static int tpm2_unseal_cmd(struct tpm_chip *chip,
        if (!rc) {
                data_len = be16_to_cpup(
                        (__be16 *) &buf.data[TPM_HEADER_SIZE + 4]);
+               if (data_len < MIN_KEY_SIZE ||  data_len > MAX_KEY_SIZE + 1) {
+                       rc = -EFAULT;
+                       goto out;
+               }
 
                rlength = be32_to_cpu(((struct tpm2_cmd *)&buf)
                                        ->header.out.length);
index c1dd39eaaeebb1f474d2d9f8b0d1e98e3559f27f..6116cd05e2287999c69ea4c5432098f159a44379 100644 (file)
@@ -473,7 +473,8 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count)
 static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
        int size = 0;
-       int expected, status;
+       int status;
+       u32 expected;
 
        if (count < TPM_HEADER_SIZE) {
                size = -EIO;
@@ -488,7 +489,7 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count)
        }
 
        expected = be32_to_cpu(*(__be32 *)(buf + 2));
-       if ((size_t) expected > count) {
+       if (((size_t) expected > count) || (expected < TPM_HEADER_SIZE)) {
                size = -EIO;
                goto out;
        }
index c6428771841f814a891719fab16e92d7e0723fc7..caa86b19c76dd7007a3975756dec8fe069a31db3 100644 (file)
@@ -281,7 +281,11 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
        struct device *dev = chip->dev.parent;
        struct i2c_client *client = to_i2c_client(dev);
        s32 rc;
-       int expected, status, burst_count, retries, size = 0;
+       int status;
+       int burst_count;
+       int retries;
+       int size = 0;
+       u32 expected;
 
        if (count < TPM_HEADER_SIZE) {
                i2c_nuvoton_ready(chip);    /* return to idle */
@@ -323,7 +327,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
                 * to machine native
                 */
                expected = be32_to_cpu(*(__be32 *) (buf + 2));
-               if (expected > count) {
+               if (expected > count || expected < size) {
                        dev_err(dev, "%s() expected > count\n", __func__);
                        size = -EIO;
                        continue;
index 183a5f54d875d072b98e7aead6563f3dd3d95a62..da074e3db19be5c895f80a403aabb992e9a14d29 100644 (file)
@@ -270,7 +270,8 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
        struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
        int size = 0;
-       int expected, status;
+       int status;
+       u32 expected;
 
        if (count < TPM_HEADER_SIZE) {
                size = -EIO;
@@ -285,7 +286,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
        }
 
        expected = be32_to_cpu(*(__be32 *) (buf + 2));
-       if (expected > count) {
+       if (expected > count || expected < TPM_HEADER_SIZE) {
                size = -EIO;
                goto out;
        }
index 44301a3d996333f37f4e6e1b9696f84f99563e71..a07f6451694ae3ff25b65fa02ddd5f78e1caec05 100644 (file)
@@ -449,17 +449,17 @@ struct bcm2835_pll_ana_bits {
 static const struct bcm2835_pll_ana_bits bcm2835_ana_default = {
        .mask0 = 0,
        .set0 = 0,
-       .mask1 = (u32)~(A2W_PLL_KI_MASK | A2W_PLL_KP_MASK),
+       .mask1 = A2W_PLL_KI_MASK | A2W_PLL_KP_MASK,
        .set1 = (2 << A2W_PLL_KI_SHIFT) | (8 << A2W_PLL_KP_SHIFT),
-       .mask3 = (u32)~A2W_PLL_KA_MASK,
+       .mask3 = A2W_PLL_KA_MASK,
        .set3 = (2 << A2W_PLL_KA_SHIFT),
        .fb_prediv_mask = BIT(14),
 };
 
 static const struct bcm2835_pll_ana_bits bcm2835_ana_pllh = {
-       .mask0 = (u32)~(A2W_PLLH_KA_MASK | A2W_PLLH_KI_LOW_MASK),
+       .mask0 = A2W_PLLH_KA_MASK | A2W_PLLH_KI_LOW_MASK,
        .set0 = (2 << A2W_PLLH_KA_SHIFT) | (2 << A2W_PLLH_KI_LOW_SHIFT),
-       .mask1 = (u32)~(A2W_PLLH_KI_HIGH_MASK | A2W_PLLH_KP_MASK),
+       .mask1 = A2W_PLLH_KI_HIGH_MASK | A2W_PLLH_KP_MASK,
        .set1 = (6 << A2W_PLLH_KP_SHIFT),
        .mask3 = 0,
        .set3 = 0,
@@ -623,8 +623,10 @@ static int bcm2835_pll_on(struct clk_hw *hw)
                     ~A2W_PLL_CTRL_PWRDN);
 
        /* Take the PLL out of reset. */
+       spin_lock(&cprman->regs_lock);
        cprman_write(cprman, data->cm_ctrl_reg,
                     cprman_read(cprman, data->cm_ctrl_reg) & ~CM_PLL_ANARST);
+       spin_unlock(&cprman->regs_lock);
 
        /* Wait for the PLL to lock. */
        timeout = ktime_add_ns(ktime_get(), LOCK_TIMEOUT_NS);
@@ -701,9 +703,11 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw,
        }
 
        /* Unmask the reference clock from the oscillator. */
+       spin_lock(&cprman->regs_lock);
        cprman_write(cprman, A2W_XOSC_CTRL,
                     cprman_read(cprman, A2W_XOSC_CTRL) |
                     data->reference_enable_mask);
+       spin_unlock(&cprman->regs_lock);
 
        if (do_ana_setup_first)
                bcm2835_pll_write_ana(cprman, data->ana_reg_base, ana);
index 9f7f931d6b2f717f786255bfcf74b6a74e212cde..5eb50c31e4553c114a6ce62d38a4afb0b158f9b6 100644 (file)
@@ -205,6 +205,18 @@ static const struct aspeed_clk_soc_data ast2400_data = {
        .calc_pll = aspeed_ast2400_calc_pll,
 };
 
+static int aspeed_clk_is_enabled(struct clk_hw *hw)
+{
+       struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
+       u32 clk = BIT(gate->clock_idx);
+       u32 enval = (gate->flags & CLK_GATE_SET_TO_DISABLE) ? 0 : clk;
+       u32 reg;
+
+       regmap_read(gate->map, ASPEED_CLK_STOP_CTRL, &reg);
+
+       return ((reg & clk) == enval) ? 1 : 0;
+}
+
 static int aspeed_clk_enable(struct clk_hw *hw)
 {
        struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
@@ -215,6 +227,11 @@ static int aspeed_clk_enable(struct clk_hw *hw)
 
        spin_lock_irqsave(gate->lock, flags);
 
+       if (aspeed_clk_is_enabled(hw)) {
+               spin_unlock_irqrestore(gate->lock, flags);
+               return 0;
+       }
+
        if (gate->reset_idx >= 0) {
                /* Put IP in reset */
                regmap_update_bits(gate->map, ASPEED_RESET_CTRL, rst, rst);
@@ -255,17 +272,6 @@ static void aspeed_clk_disable(struct clk_hw *hw)
        spin_unlock_irqrestore(gate->lock, flags);
 }
 
-static int aspeed_clk_is_enabled(struct clk_hw *hw)
-{
-       struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
-       u32 clk = BIT(gate->clock_idx);
-       u32 reg;
-
-       regmap_read(gate->map, ASPEED_CLK_STOP_CTRL, &reg);
-
-       return (reg & clk) ? 0 : 1;
-}
-
 static const struct clk_ops aspeed_clk_gate_ops = {
        .enable = aspeed_clk_enable,
        .disable = aspeed_clk_disable,
index 0f686a9dac3e78212b390ab985e1c571557c65f0..076d4244d6725228a12ed0c0daa5ee1c3b7e3c43 100644 (file)
@@ -1125,8 +1125,10 @@ static int clk_core_round_rate_nolock(struct clk_core *core,
 {
        lockdep_assert_held(&prepare_lock);
 
-       if (!core)
+       if (!core) {
+               req->rate = 0;
                return 0;
+       }
 
        clk_core_init_rate_req(core, req);
 
@@ -2309,8 +2311,11 @@ static int clk_core_set_phase_nolock(struct clk_core *core, int degrees)
 
        trace_clk_set_phase(core, degrees);
 
-       if (core->ops->set_phase)
+       if (core->ops->set_phase) {
                ret = core->ops->set_phase(core->hw, degrees);
+               if (!ret)
+                       core->phase = degrees;
+       }
 
        trace_clk_set_phase_complete(core, degrees);
 
@@ -2967,23 +2972,38 @@ static int __clk_core_init(struct clk_core *core)
                rate = 0;
        core->rate = core->req_rate = rate;
 
+       /*
+        * Enable CLK_IS_CRITICAL clocks so newly added critical clocks
+        * don't get accidentally disabled when walking the orphan tree and
+        * reparenting clocks
+        */
+       if (core->flags & CLK_IS_CRITICAL) {
+               unsigned long flags;
+
+               clk_core_prepare(core);
+
+               flags = clk_enable_lock();
+               clk_core_enable(core);
+               clk_enable_unlock(flags);
+       }
+
        /*
         * walk the list of orphan clocks and reparent any that newly finds a
         * parent.
         */
        hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
                struct clk_core *parent = __clk_init_parent(orphan);
-               unsigned long flags;
 
                /*
-                * we could call __clk_set_parent, but that would result in a
-                * redundant call to the .set_rate op, if it exists
+                * We need to use __clk_set_parent_before() and _after() to
+                * to properly migrate any prepare/enable count of the orphan
+                * clock. This is important for CLK_IS_CRITICAL clocks, which
+                * are enabled during init but might not have a parent yet.
                 */
                if (parent) {
                        /* update the clk tree topology */
-                       flags = clk_enable_lock();
-                       clk_reparent(orphan, parent);
-                       clk_enable_unlock(flags);
+                       __clk_set_parent_before(orphan, parent);
+                       __clk_set_parent_after(orphan, parent, NULL);
                        __clk_recalc_accuracies(orphan);
                        __clk_recalc_rates(orphan, 0);
                }
@@ -3000,16 +3020,6 @@ static int __clk_core_init(struct clk_core *core)
        if (core->ops->init)
                core->ops->init(core->hw);
 
-       if (core->flags & CLK_IS_CRITICAL) {
-               unsigned long flags;
-
-               clk_core_prepare(core);
-
-               flags = clk_enable_lock();
-               clk_core_enable(core);
-               clk_enable_unlock(flags);
-       }
-
        kref_init(&core->ref);
 out:
        clk_pm_runtime_put(core);
index 9b6c72bbddf96712a004aab1ec37b2ebe2d423a6..e8b2c43b1bb86f17612b63534172a7332f44f6eb 100644 (file)
@@ -149,6 +149,8 @@ static int hi3660_stub_clk_probe(struct platform_device *pdev)
                return PTR_ERR(stub_clk_chan.mbox);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -EINVAL;
        freq_reg = devm_ioremap(dev, res->start, resource_size(res));
        if (!freq_reg)
                return -ENOMEM;
index c864992e6983f800abc030e233320af5406ce973..caa8bd40692c61be4feba9a37cc5f33a8046bfac 100644 (file)
@@ -131,7 +131,17 @@ static const char *ieee1588_sels[] = { "pll3_sw", "pll4_sw", "dummy" /* usbphy2_
 static struct clk *clk[IMX5_CLK_END];
 static struct clk_onecell_data clk_data;
 
-static struct clk ** const uart_clks[] __initconst = {
+static struct clk ** const uart_clks_mx51[] __initconst = {
+       &clk[IMX5_CLK_UART1_IPG_GATE],
+       &clk[IMX5_CLK_UART1_PER_GATE],
+       &clk[IMX5_CLK_UART2_IPG_GATE],
+       &clk[IMX5_CLK_UART2_PER_GATE],
+       &clk[IMX5_CLK_UART3_IPG_GATE],
+       &clk[IMX5_CLK_UART3_PER_GATE],
+       NULL
+};
+
+static struct clk ** const uart_clks_mx50_mx53[] __initconst = {
        &clk[IMX5_CLK_UART1_IPG_GATE],
        &clk[IMX5_CLK_UART1_PER_GATE],
        &clk[IMX5_CLK_UART2_IPG_GATE],
@@ -321,8 +331,6 @@ static void __init mx5_clocks_common_init(void __iomem *ccm_base)
        clk_prepare_enable(clk[IMX5_CLK_TMAX1]);
        clk_prepare_enable(clk[IMX5_CLK_TMAX2]); /* esdhc2, fec */
        clk_prepare_enable(clk[IMX5_CLK_TMAX3]); /* esdhc1, esdhc4 */
-
-       imx_register_uart_clocks(uart_clks);
 }
 
 static void __init mx50_clocks_init(struct device_node *np)
@@ -388,6 +396,8 @@ static void __init mx50_clocks_init(struct device_node *np)
 
        r = clk_round_rate(clk[IMX5_CLK_USBOH3_PER_GATE], 54000000);
        clk_set_rate(clk[IMX5_CLK_USBOH3_PER_GATE], r);
+
+       imx_register_uart_clocks(uart_clks_mx50_mx53);
 }
 CLK_OF_DECLARE(imx50_ccm, "fsl,imx50-ccm", mx50_clocks_init);
 
@@ -477,6 +487,8 @@ static void __init mx51_clocks_init(struct device_node *np)
        val = readl(MXC_CCM_CLPCR);
        val |= 1 << 23;
        writel(val, MXC_CCM_CLPCR);
+
+       imx_register_uart_clocks(uart_clks_mx51);
 }
 CLK_OF_DECLARE(imx51_ccm, "fsl,imx51-ccm", mx51_clocks_init);
 
@@ -606,5 +618,7 @@ static void __init mx53_clocks_init(struct device_node *np)
 
        r = clk_round_rate(clk[IMX5_CLK_USBOH3_PER_GATE], 54000000);
        clk_set_rate(clk[IMX5_CLK_USBOH3_PER_GATE], r);
+
+       imx_register_uart_clocks(uart_clks_mx50_mx53);
 }
 CLK_OF_DECLARE(imx53_ccm, "fsl,imx53-ccm", mx53_clocks_init);
index 246957f1a413513543db0e4c79573f0aba10b6b0..b1cc8dbcd3274a429f1e80884ce8c09f974ebc09 100644 (file)
@@ -49,11 +49,10 @@ static int qcom_apcs_msm8916_clk_probe(struct platform_device *pdev)
        struct clk_regmap_mux_div *a53cc;
        struct regmap *regmap;
        struct clk_init_data init = { };
-       int ret;
+       int ret = -ENODEV;
 
        regmap = dev_get_regmap(parent, NULL);
-       if (IS_ERR(regmap)) {
-               ret = PTR_ERR(regmap);
+       if (!regmap) {
                dev_err(dev, "failed to get regmap: %d\n", ret);
                return ret;
        }
index 72b16ed1012b1e78e272b0ca06fee32a8a49aed0..3b97f60540ad8cd29aeb06a69620dcf940a15213 100644 (file)
@@ -762,7 +762,7 @@ static struct ccu_mp out_a_clk = {
                .features       = CCU_FEATURE_FIXED_PREDIV,
                .hw.init        = CLK_HW_INIT_PARENTS("out-a",
                                                      clk_out_parents,
-                                                     &ccu_div_ops,
+                                                     &ccu_mp_ops,
                                                      0),
        },
 };
@@ -783,7 +783,7 @@ static struct ccu_mp out_b_clk = {
                .features       = CCU_FEATURE_FIXED_PREDIV,
                .hw.init        = CLK_HW_INIT_PARENTS("out-b",
                                                      clk_out_parents,
-                                                     &ccu_div_ops,
+                                                     &ccu_mp_ops,
                                                      0),
        },
 };
@@ -804,7 +804,7 @@ static struct ccu_mp out_c_clk = {
                .features       = CCU_FEATURE_FIXED_PREDIV,
                .hw.init        = CLK_HW_INIT_PARENTS("out-c",
                                                      clk_out_parents,
-                                                     &ccu_div_ops,
+                                                     &ccu_mp_ops,
                                                      0),
        },
 };
index 612491a260708ad19534ee17c94c63ee77e48b8a..12e0a2d1991124504c3ac9a0c5245b991b9921f4 100644 (file)
@@ -45,7 +45,7 @@ static const struct omap_clkctrl_bit_data am3_gpio4_bit_data[] __initconst = {
 
 static const struct omap_clkctrl_reg_data am3_l4_per_clkctrl_regs[] __initconst = {
        { AM3_CPGMAC0_CLKCTRL, NULL, CLKF_SW_SUP, "cpsw_125mhz_gclk", "cpsw_125mhz_clkdm" },
-       { AM3_LCDC_CLKCTRL, NULL, CLKF_SW_SUP, "lcd_gclk", "lcdc_clkdm" },
+       { AM3_LCDC_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_SET_RATE_PARENT, "lcd_gclk", "lcdc_clkdm" },
        { AM3_USB_OTG_HS_CLKCTRL, NULL, CLKF_SW_SUP, "usbotg_fck", "l3s_clkdm" },
        { AM3_TPTC0_CLKCTRL, NULL, CLKF_SW_SUP, "l3_gclk", "l3_clkdm" },
        { AM3_EMIF_CLKCTRL, NULL, CLKF_SW_SUP, "dpll_ddr_m2_div2_ck", "l3_clkdm" },
index 2b7c2e017665e20ded9d8a46b96f55e2ddb52c49..63c5ddb501876993f0584364f44ed56a28bc175d 100644 (file)
@@ -187,7 +187,7 @@ static const struct omap_clkctrl_reg_data am4_l4_per_clkctrl_regs[] __initconst
        { AM4_OCP2SCP0_CLKCTRL, NULL, CLKF_SW_SUP, "l4ls_gclk" },
        { AM4_OCP2SCP1_CLKCTRL, NULL, CLKF_SW_SUP, "l4ls_gclk" },
        { AM4_EMIF_CLKCTRL, NULL, CLKF_SW_SUP, "dpll_ddr_m2_ck", "emif_clkdm" },
-       { AM4_DSS_CORE_CLKCTRL, NULL, CLKF_SW_SUP, "disp_clk", "dss_clkdm" },
+       { AM4_DSS_CORE_CLKCTRL, NULL, CLKF_SW_SUP | CLKF_SET_RATE_PARENT, "disp_clk", "dss_clkdm" },
        { AM4_CPGMAC0_CLKCTRL, NULL, CLKF_SW_SUP, "cpsw_125mhz_gclk", "cpsw_125mhz_clkdm" },
        { 0 },
 };
index afa0d6bfc5c15643a3dd2e765aa1b29048744d90..421b0539222058354d94ae6c29347d03b02ddd93 100644 (file)
@@ -537,6 +537,8 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
                init.parent_names = &reg_data->parent;
                init.num_parents = 1;
                init.flags = 0;
+               if (reg_data->flags & CLKF_SET_RATE_PARENT)
+                       init.flags |= CLK_SET_RATE_PARENT;
                init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d",
                                      node->parent->name, node->name,
                                      reg_data->offset, 0);
index b3b4ed9b68742bfbb8219af9c24c9d542d6f02ec..d2e5382821a43621310994ea1611e2ad27e798a8 100644 (file)
@@ -386,6 +386,7 @@ config ATMEL_PIT
 
 config ATMEL_ST
        bool "Atmel ST timer support" if COMPILE_TEST
+       depends on HAS_IOMEM
        select TIMER_OF
        select MFD_SYSCON
        help
index 4927355f9cbe51e086db1a144190c0bda1944733..471b428d8034ce5af3e2fc8993fca63df27b68f9 100644 (file)
@@ -251,9 +251,14 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id)
        int irq_reenable = clockevent_state_periodic(evt);
 
        /*
-        * Any write to CTRL reg ACks the interrupt, we rewrite the
-        * Count when [N]ot [H]alted bit.
-        * And re-arm it if perioid by [I]nterrupt [E]nable bit
+        * 1. ACK the interrupt
+        *    - For ARC700, any write to CTRL reg ACKs it, so just rewrite
+        *      Count when [N]ot [H]alted bit.
+        *    - For HS3x, it is a bit subtle. On taken count-down interrupt,
+        *      IP bit [3] is set, which needs to be cleared for ACK'ing.
+        *      The write below can only update the other two bits, hence
+        *      explicitly clears IP bit
+        * 2. Re-arm interrupt if periodic by writing to IE bit [0]
         */
        write_aux_reg(ARC_REG_TIMER0_CTRL, irq_reenable | TIMER_CTRL_NH);
 
index 3ee7e6fea6212668d8a9a4d4c1e5c52aedab58fb..846d18daf893b03d926276ba454a5babca7cc75d 100644 (file)
@@ -281,7 +281,7 @@ static int __init __ftm_clk_init(struct device_node *np, char *cnt_name,
 
 static unsigned long __init ftm_clk_init(struct device_node *np)
 {
-       unsigned long freq;
+       long freq;
 
        freq = __ftm_clk_init(np, "ftm-evt-counter-en", "ftm-evt");
        if (freq <= 0)
index a04808a21d4ec9eef5d3c6d5e7ed140945d1001b..986b6796b631f77daa26e3b300d901c2ae00da22 100644 (file)
@@ -166,7 +166,7 @@ static int __init __gic_clocksource_init(void)
 
        /* Set clocksource mask. */
        count_width = read_gic_config() & GIC_CONFIG_COUNTBITS;
-       count_width >>= __fls(GIC_CONFIG_COUNTBITS);
+       count_width >>= __ffs(GIC_CONFIG_COUNTBITS);
        count_width *= 4;
        count_width += 32;
        gic_clocksource.mask = CLOCKSOURCE_MASK(count_width);
@@ -205,12 +205,12 @@ static int __init gic_clocksource_of_init(struct device_node *node)
        } else if (of_property_read_u32(node, "clock-frequency",
                                        &gic_frequency)) {
                pr_err("GIC frequency not specified.\n");
-               return -EINVAL;;
+               return -EINVAL;
        }
        gic_timer_irq = irq_of_parse_and_map(node, 0);
        if (!gic_timer_irq) {
                pr_err("GIC timer IRQ not specified.\n");
-               return -EINVAL;;
+               return -EINVAL;
        }
 
        ret = __gic_clocksource_init();
index 2a3fe83ec3377cc160570fc9ed6e0e5375f57f7a..3b56ea3f52afc8ee47ca21c38cd0c6aa7ec80021 100644 (file)
@@ -334,7 +334,7 @@ static int __init sun5i_timer_init(struct device_node *node)
        timer_base = of_io_request_and_map(node, 0, of_node_full_name(node));
        if (IS_ERR(timer_base)) {
                pr_err("Can't map registers\n");
-               return PTR_ERR(timer_base);;
+               return PTR_ERR(timer_base);
        }
 
        irq = irq_of_parse_and_map(node, 0);
index 3a88e33b0cfed86e487888bc203d9823fcdeb9c0..fb586e09682d84704a28748673cdc4c571c28105 100644 (file)
@@ -44,10 +44,10 @@ config ARM_DT_BL_CPUFREQ
 
 config ARM_SCPI_CPUFREQ
        tristate "SCPI based CPUfreq driver"
-       depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
+       depends on ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
        help
-         This adds the CPUfreq driver support for ARM big.LITTLE platforms
-         using SCPI protocol for CPU power management.
+         This adds the CPUfreq driver support for ARM platforms using SCPI
+         protocol for CPU power management.
 
          This driver uses SCPI Message Protocol driver to interact with the
          firmware providing the CPU DVFS functionality.
index 7b596fa38ad2de4b7d9cf075ce69a876b4f2e35f..6bebc1f9f55aa65dc1718e47ddea4c0439b42cb0 100644 (file)
@@ -351,7 +351,13 @@ struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name)
 static int s3c_cpufreq_init(struct cpufreq_policy *policy)
 {
        policy->clk = clk_arm;
-       return cpufreq_generic_init(policy, ftab, cpu_cur.info->latency);
+
+       policy->cpuinfo.transition_latency = cpu_cur.info->latency;
+
+       if (ftab)
+               return cpufreq_table_validate_and_show(policy, ftab);
+
+       return 0;
 }
 
 static int __init s3c_cpufreq_initclks(void)
index c32a833e1b00542fd3f49bf758e96f57ee6fa478..d300a163945f53476b4a751f3c208d423717d55f 100644 (file)
@@ -51,15 +51,23 @@ static unsigned int scpi_cpufreq_get_rate(unsigned int cpu)
 static int
 scpi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index)
 {
+       unsigned long freq = policy->freq_table[index].frequency;
        struct scpi_data *priv = policy->driver_data;
-       u64 rate = policy->freq_table[index].frequency * 1000;
+       u64 rate = freq * 1000;
        int ret;
 
        ret = clk_set_rate(priv->clk, rate);
-       if (!ret && (clk_get_rate(priv->clk) != rate))
-               ret = -EIO;
 
-       return ret;
+       if (ret)
+               return ret;
+
+       if (clk_get_rate(priv->clk) != rate)
+               return -EIO;
+
+       arch_set_freq_scale(policy->related_cpus, freq,
+                           policy->cpuinfo.max_freq);
+
+       return 0;
 }
 
 static int
index fcfa5b1eae6169b44398b20442d95532d924821c..b3afb6cc9d72278b35eadb239e280b5aa311c3f9 100644 (file)
@@ -211,7 +211,7 @@ static int __sev_platform_shutdown_locked(int *error)
 {
        int ret;
 
-       ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error);
+       ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
        if (ret)
                return ret;
 
@@ -271,7 +271,7 @@ static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
                        return rc;
        }
 
-       return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error);
+       return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error);
 }
 
 static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
@@ -299,7 +299,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
                        return rc;
        }
 
-       return __sev_do_cmd_locked(cmd, 0, &argp->error);
+       return __sev_do_cmd_locked(cmd, NULL, &argp->error);
 }
 
 static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
@@ -624,7 +624,7 @@ EXPORT_SYMBOL_GPL(sev_guest_decommission);
 
 int sev_guest_df_flush(int *error)
 {
-       return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error);
+       return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error);
 }
 EXPORT_SYMBOL_GPL(sev_guest_df_flush);
 
index 473af694ad1cbee97db06646a715776a5d9e7889..ecdc292aa4e4d861552e4646cdf7bf6bd5c24ea6 100644 (file)
@@ -246,12 +246,6 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 {
        long avail;
 
-       /*
-        * The device driver is allowed to sleep, in order to make the
-        * memory directly accessible.
-        */
-       might_sleep();
-
        if (!dax_dev)
                return -EOPNOTSUPP;
 
index f652a0e0f5a2a46d78bece1d41cd0895dfc5d593..3548caa9e9339f17208a62066ad055c842491e3b 100644 (file)
@@ -163,6 +163,7 @@ struct mv_xor_v2_device {
        void __iomem *dma_base;
        void __iomem *glob_base;
        struct clk *clk;
+       struct clk *reg_clk;
        struct tasklet_struct irq_tasklet;
        struct list_head free_sw_desc;
        struct dma_device dmadev;
@@ -749,13 +750,26 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
+       xor_dev->reg_clk = devm_clk_get(&pdev->dev, "reg");
+       if (PTR_ERR(xor_dev->reg_clk) != -ENOENT) {
+               if (!IS_ERR(xor_dev->reg_clk)) {
+                       ret = clk_prepare_enable(xor_dev->reg_clk);
+                       if (ret)
+                               return ret;
+               } else {
+                       return PTR_ERR(xor_dev->reg_clk);
+               }
+       }
+
        xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER)
-               return -EPROBE_DEFER;
+       if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) {
+               ret = EPROBE_DEFER;
+               goto disable_reg_clk;
+       }
        if (!IS_ERR(xor_dev->clk)) {
                ret = clk_prepare_enable(xor_dev->clk);
                if (ret)
-                       return ret;
+                       goto disable_reg_clk;
        }
 
        ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1,
@@ -866,8 +880,9 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 free_msi_irqs:
        platform_msi_domain_free_irqs(&pdev->dev);
 disable_clk:
-       if (!IS_ERR(xor_dev->clk))
-               clk_disable_unprepare(xor_dev->clk);
+       clk_disable_unprepare(xor_dev->clk);
+disable_reg_clk:
+       clk_disable_unprepare(xor_dev->reg_clk);
        return ret;
 }
 
index e3ff162c03fc6a011cb0139a4ffbda2d2600fd32..d0cacdb0713eca47360e4f5ceedc8dc6428145bb 100644 (file)
@@ -917,7 +917,7 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 
        rcar_dmac_chan_configure_desc(chan, desc);
 
-       max_chunk_size = (RCAR_DMATCR_MASK + 1) << desc->xfer_shift;
+       max_chunk_size = RCAR_DMATCR_MASK << desc->xfer_shift;
 
        /*
         * Allocate and fill the transfer chunk descriptors. We own the only
index f34430f99fd805414085fea26540f3c152dd6b0c..872100215ca00f0f4703c025b8e52e637c85c710 100644 (file)
@@ -279,7 +279,7 @@ static const u32 correrrthrsld[] = {
  * sbridge structs
  */
 
-#define NUM_CHANNELS           4       /* Max channels per MC */
+#define NUM_CHANNELS           6       /* Max channels per MC */
 #define MAX_DIMMS              3       /* Max DIMMS per channel */
 #define KNL_MAX_CHAS           38      /* KNL max num. of Cache Home Agents */
 #define KNL_MAX_CHANNELS       6       /* KNL max num. of PCI channels */
index c16600f30611849562386e25375947785f580463..0bdea60c65ddbff81742b5d1a5d22c25521cfa8f 100644 (file)
@@ -639,7 +639,7 @@ static void __exit dcdbas_exit(void)
        platform_driver_unregister(&dcdbas_driver);
 }
 
-module_init(dcdbas_init);
+subsys_initcall_sync(dcdbas_init);
 module_exit(dcdbas_exit);
 
 MODULE_DESCRIPTION(DRIVER_DESCRIPTION " (version " DRIVER_VERSION ")");
index da661bf8cb96e3b683b04574667ec9e7a350376b..13c1edd37e9692155c1e127644715c433267e93b 100644 (file)
@@ -68,11 +68,11 @@ void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
        efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID;
        efi_status_t status;
        efi_physical_addr_t log_location, log_last_entry;
-       struct linux_efi_tpm_eventlog *log_tbl;
+       struct linux_efi_tpm_eventlog *log_tbl = NULL;
        unsigned long first_entry_addr, last_entry_addr;
        size_t log_size, last_entry_size;
        efi_bool_t truncated;
-       void *tcg2_protocol;
+       void *tcg2_protocol = NULL;
 
        status = efi_call_early(locate_protocol, &tcg2_guid, NULL,
                                &tcg2_protocol);
index e76de57dd617d7e2c918c057dcc0ced6636be7b2..ebaea8b1594b7fc4919a71fdac7ed9ef8418df7b 100644 (file)
@@ -14,7 +14,6 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
@@ -37,10 +36,9 @@ struct gpio_rcar_priv {
        struct platform_device *pdev;
        struct gpio_chip gpio_chip;
        struct irq_chip irq_chip;
-       struct clk *clk;
        unsigned int irq_parent;
+       atomic_t wakeup_path;
        bool has_both_edge_trigger;
-       bool needs_clk;
 };
 
 #define IOINTSEL 0x00  /* General IO/Interrupt Switching Register */
@@ -186,13 +184,10 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on)
                }
        }
 
-       if (!p->clk)
-               return 0;
-
        if (on)
-               clk_enable(p->clk);
+               atomic_inc(&p->wakeup_path);
        else
-               clk_disable(p->clk);
+               atomic_dec(&p->wakeup_path);
 
        return 0;
 }
@@ -330,17 +325,14 @@ static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset,
 
 struct gpio_rcar_info {
        bool has_both_edge_trigger;
-       bool needs_clk;
 };
 
 static const struct gpio_rcar_info gpio_rcar_info_gen1 = {
        .has_both_edge_trigger = false,
-       .needs_clk = false,
 };
 
 static const struct gpio_rcar_info gpio_rcar_info_gen2 = {
        .has_both_edge_trigger = true,
-       .needs_clk = true,
 };
 
 static const struct of_device_id gpio_rcar_of_table[] = {
@@ -403,7 +395,6 @@ static int gpio_rcar_parse_dt(struct gpio_rcar_priv *p, unsigned int *npins)
        ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args);
        *npins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK;
        p->has_both_edge_trigger = info->has_both_edge_trigger;
-       p->needs_clk = info->needs_clk;
 
        if (*npins == 0 || *npins > RCAR_MAX_GPIO_PER_BANK) {
                dev_warn(&p->pdev->dev,
@@ -440,16 +431,6 @@ static int gpio_rcar_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, p);
 
-       p->clk = devm_clk_get(dev, NULL);
-       if (IS_ERR(p->clk)) {
-               if (p->needs_clk) {
-                       dev_err(dev, "unable to get clock\n");
-                       ret = PTR_ERR(p->clk);
-                       goto err0;
-               }
-               p->clk = NULL;
-       }
-
        pm_runtime_enable(dev);
 
        irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
@@ -531,11 +512,24 @@ static int gpio_rcar_remove(struct platform_device *pdev)
        return 0;
 }
 
+static int __maybe_unused gpio_rcar_suspend(struct device *dev)
+{
+       struct gpio_rcar_priv *p = dev_get_drvdata(dev);
+
+       if (atomic_read(&p->wakeup_path))
+               device_set_wakeup_path(dev);
+
+       return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(gpio_rcar_pm_ops, gpio_rcar_suspend, NULL);
+
 static struct platform_driver gpio_rcar_device_driver = {
        .probe          = gpio_rcar_probe,
        .remove         = gpio_rcar_remove,
        .driver         = {
                .name   = "gpio_rcar",
+               .pm     = &gpio_rcar_pm_ops,
                .of_match_table = of_match_ptr(gpio_rcar_of_table),
        }
 };
index 564bb7a31da43b4e924daf7501c36ec9a4eb8fc2..84e5a9df234433b430fc950a04d80dc9cd43bad8 100644 (file)
@@ -241,6 +241,19 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 
                desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx,
                                                &of_flags);
+               /*
+                * -EPROBE_DEFER in our case means that we found a
+                * valid GPIO property, but no controller has been
+                * registered so far.
+                *
+                * This means we don't need to look any further for
+                * alternate name conventions, and we should really
+                * preserve the return code for our user to be able to
+                * retry probing later.
+                */
+               if (IS_ERR(desc) && PTR_ERR(desc) == -EPROBE_DEFER)
+                       return desc;
+
                if (!IS_ERR(desc) || (PTR_ERR(desc) != -ENOENT))
                        break;
        }
@@ -250,7 +263,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
                desc = of_find_spi_gpio(dev, con_id, &of_flags);
 
        /* Special handling for regulator GPIOs if used */
-       if (IS_ERR(desc))
+       if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER)
                desc = of_find_regulator_gpio(dev, con_id, &of_flags);
 
        if (IS_ERR(desc))
index d5a2eefd6c3e9c634597dba673a6ee25a60c830d..74edba18b1596504ab76a45e71146dc7084e6c13 100644 (file)
@@ -1156,7 +1156,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 512      /* Reserve at most 512 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 128      /* Reserve at most 128 WB slots for amdgpu-owned rings. */
 
 struct amdgpu_wb {
        struct amdgpu_bo        *wb_obj;
index 57afad79f55d086b673cd6c61c688d532620a033..8fa850a070e0fe8ea7a67823008ff7e543d8d1dc 100644 (file)
@@ -540,6 +540,9 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
        size_t size;
        u32 retry = 3;
 
+       if (amdgpu_acpi_pcie_notify_device_ready(adev))
+               return -EINVAL;
+
        /* Get the device handle */
        handle = ACPI_HANDLE(&adev->pdev->dev);
        if (!handle)
index 74d2efaec52f86a5dac357d2c52f775db67bdb83..7a073ac5f9c61c1653414eb6e60b95f263f505d4 100644 (file)
@@ -69,25 +69,18 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
                /* don't do anything if sink is not display port, i.e.,
                 * passive dp->(dvi|hdmi) adaptor
                 */
-               if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
-                       int saved_dpms = connector->dpms;
-                       /* Only turn off the display if it's physically disconnected */
-                       if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
-                               drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-                       } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
-                               /* Don't try to start link training before we
-                                * have the dpcd */
-                               if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
-                                       return;
-
-                               /* set it to OFF so that drm_helper_connector_dpms()
-                                * won't return immediately since the current state
-                                * is ON at this point.
-                                */
-                               connector->dpms = DRM_MODE_DPMS_OFF;
-                               drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-                       }
-                       connector->dpms = saved_dpms;
+               if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
+                   amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) &&
+                   amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
+                       /* Don't start link training before we have the DPCD */
+                       if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+                               return;
+
+                       /* Turn the connector off and back on immediately, which
+                        * will trigger link training
+                        */
+                       drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+                       drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
                }
        }
 }
index 00a50cc5ec9a31a77b7476020e5f47089c83c5de..66cb10cdc7c3e4410eb2b130b8b4d02c1732cdd6 100644 (file)
@@ -492,7 +492,7 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
                memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 
                /* clear wb memory */
-               memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t));
+               memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
        }
 
        return 0;
@@ -530,8 +530,9 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
  */
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 {
+       wb >>= 3;
        if (wb < adev->wb.num_wb)
-               __clear_bit(wb >> 3, adev->wb.used);
+               __clear_bit(wb, adev->wb.used);
 }
 
 /**
@@ -1455,11 +1456,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
                if (!adev->ip_blocks[i].status.hw)
                        continue;
-               if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
-                       amdgpu_free_static_csa(adev);
-                       amdgpu_device_wb_fini(adev);
-                       amdgpu_device_vram_scratch_fini(adev);
-               }
 
                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
                        adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
@@ -1486,6 +1482,13 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
                if (!adev->ip_blocks[i].status.sw)
                        continue;
+
+               if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+                       amdgpu_free_static_csa(adev);
+                       amdgpu_device_wb_fini(adev);
+                       amdgpu_device_vram_scratch_fini(adev);
+               }
+
                r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
                /* XXX handle errors */
                if (r) {
@@ -2060,9 +2063,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 
        DRM_INFO("amdgpu: finishing device.\n");
        adev->shutdown = true;
-       if (adev->mode_info.mode_config_initialized)
-               drm_crtc_force_disable_all(adev->ddev);
-
+       if (adev->mode_info.mode_config_initialized){
+               if (!amdgpu_device_has_dc_support(adev))
+                       drm_crtc_force_disable_all(adev->ddev);
+               else
+                       drm_atomic_helper_shutdown(adev->ddev);
+       }
        amdgpu_ib_pool_fini(adev);
        amdgpu_fence_driver_fini(adev);
        amdgpu_fbdev_fini(adev);
@@ -2284,14 +2290,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
                                drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
                        }
                        drm_modeset_unlock_all(dev);
-               } else {
-                       /*
-                        * There is no equivalent atomic helper to turn on
-                        * display, so we defined our own function for this,
-                        * once suspend resume is supported by the atomic
-                        * framework this will be reworked
-                        */
-                       amdgpu_dm_display_resume(adev);
                }
        }
 
@@ -2726,7 +2724,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
        if (amdgpu_device_has_dc_support(adev)) {
                if (drm_atomic_helper_resume(adev->ddev, state))
                        dev_info(adev->dev, "drm resume failed:%d\n", r);
-               amdgpu_dm_display_resume(adev);
        } else {
                drm_helper_resume_force_mode(adev->ddev);
        }
index e48b4ec88c8c72b84599d8f85b38836c92c26523..ca6c931dabfab9c3248dc5a0460b8bdd15f1011b 100644 (file)
@@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
        struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
 
        if (robj) {
-               if (robj->gem_base.import_attach)
-                       drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg);
                amdgpu_mn_unregister(robj);
                amdgpu_bo_unref(&robj);
        }
index e14ab34d8262418084abdafe4015ad5ddee5c0a7..7c2be32c5aea6fd55a5747fd22d80e913d356c1c 100644 (file)
@@ -75,7 +75,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
 static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man)
 {
        struct amdgpu_gtt_mgr *mgr = man->priv;
-
+       spin_lock(&mgr->lock);
        drm_mm_takedown(&mgr->mm);
        spin_unlock(&mgr->lock);
        kfree(mgr);
index 56bcd59c3399a0a912ea1fa8f869c105b8827327..36483e0d3c9729e555163e9f9a0b1cfe7dc319a8 100644 (file)
@@ -257,7 +257,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
        r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
        if (r) {
                adev->irq.installed = false;
-               flush_work(&adev->hotplug_work);
+               if (!amdgpu_device_has_dc_support(adev))
+                       flush_work(&adev->hotplug_work);
                cancel_work_sync(&adev->reset_work);
                return r;
        }
@@ -282,7 +283,8 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
                adev->irq.installed = false;
                if (adev->irq.msi_enabled)
                        pci_disable_msi(adev->pdev);
-               flush_work(&adev->hotplug_work);
+               if (!amdgpu_device_has_dc_support(adev))
+                       flush_work(&adev->hotplug_work);
                cancel_work_sync(&adev->reset_work);
        }
 
index 54f06c959340923c36ca469dd558df968e5688d0..2264c5c97009333aa74c6a64ffdfd6d2d69bd4d0 100644 (file)
@@ -352,6 +352,7 @@ struct amdgpu_mode_info {
        u16 firmware_flags;
        /* pointer to backlight encoder */
        struct amdgpu_encoder *bl_encoder;
+       u8 bl_level; /* saved backlight level */
        struct amdgpu_audio     audio; /* audio stuff */
        int                     num_crtc; /* number of crtcs */
        int                     num_hpd; /* number of hpd pins */
index 5c4c3e0d527be64386dcab0951727642f64d73db..1220322c168092951aa424e79c83976d3b17e1c4 100644 (file)
@@ -56,6 +56,8 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 
        amdgpu_bo_kunmap(bo);
 
+       if (bo->gem_base.import_attach)
+               drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
        drm_gem_object_release(&bo->gem_base);
        amdgpu_bo_unref(&bo->parent);
        if (!list_empty(&bo->shadow_list)) {
index 13044e66dcaf4e6ab0b79fab23aef8db987170db..561d3312af3280a56bccaee8c04dc7eab9017710 100644 (file)
@@ -481,7 +481,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
        result = 0;
 
        if (*pos < 12) {
-               early[0] = amdgpu_ring_get_rptr(ring);
+               early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
                early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
                early[2] = ring->wptr & ring->buf_mask;
                for (i = *pos / 4; i < 3 && size; i++) {
index b2eae86bf906abe6ed0bd7e89f67f2d9acbfa809..5c26a8e806b93dfe4a0aced5838f20b0fa234c80 100644 (file)
@@ -299,12 +299,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 
        cancel_delayed_work_sync(&adev->uvd.idle_work);
 
-       for (i = 0; i < adev->uvd.max_handles; ++i)
-               if (atomic_read(&adev->uvd.handles[i]))
-                       break;
+       /* only valid for physical mode */
+       if (adev->asic_type < CHIP_POLARIS10) {
+               for (i = 0; i < adev->uvd.max_handles; ++i)
+                       if (atomic_read(&adev->uvd.handles[i]))
+                               break;
 
-       if (i == AMDGPU_MAX_UVD_HANDLES)
-               return 0;
+               if (i == adev->uvd.max_handles)
+                       return 0;
+       }
 
        size = amdgpu_bo_size(adev->uvd.vcpu_bo);
        ptr = adev->uvd.cpu_addr;
index 2af26d2da12779f8bfb38460ddaf67f03ccf41e3..d702fb8e342753f19f308c398bd2b0567d252ba2 100644 (file)
@@ -34,7 +34,7 @@
 #include <linux/backlight.h>
 #include "bif/bif_4_1_d.h"
 
-static u8
+u8
 amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
 {
        u8 backlight_level;
@@ -48,7 +48,7 @@ amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
        return backlight_level;
 }
 
-static void
+void
 amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
                                            u8 backlight_level)
 {
index 2bdec40515ce51e846c15dd625d89de649a4db8f..f77cbdef679e989281027242ff11328105d08bb1 100644 (file)
 #ifndef __ATOMBIOS_ENCODER_H__
 #define __ATOMBIOS_ENCODER_H__
 
+u8
+amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev);
+void
+amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
+                                                  u8 backlight_level);
 u8
 amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder);
 void
index f34bc68aadfb119380e5f6ff1a279a996d4f453a..022f303463fc83920fca4962101a343c1b557238 100644 (file)
@@ -2921,6 +2921,11 @@ static int dce_v10_0_hw_fini(void *handle)
 
 static int dce_v10_0_suspend(void *handle)
 {
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+       adev->mode_info.bl_level =
+               amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
        return dce_v10_0_hw_fini(handle);
 }
 
@@ -2929,6 +2934,9 @@ static int dce_v10_0_resume(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int ret;
 
+       amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+                                                          adev->mode_info.bl_level);
+
        ret = dce_v10_0_hw_init(handle);
 
        /* turn on the BL */
index 26378bd6aba45a86e18e1d7ac122182a7730a811..800a9f36ab4faddbd5836de2c486e6550eff5a68 100644 (file)
@@ -3047,6 +3047,11 @@ static int dce_v11_0_hw_fini(void *handle)
 
 static int dce_v11_0_suspend(void *handle)
 {
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+       adev->mode_info.bl_level =
+               amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
        return dce_v11_0_hw_fini(handle);
 }
 
@@ -3055,6 +3060,9 @@ static int dce_v11_0_resume(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int ret;
 
+       amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+                                                          adev->mode_info.bl_level);
+
        ret = dce_v11_0_hw_init(handle);
 
        /* turn on the BL */
index bd2c4f727df661866733d3b21254769220f7101c..b8368f69ce1fbbe17230ef7e7cd5b7e0d35e9d85 100644 (file)
@@ -2787,6 +2787,11 @@ static int dce_v6_0_hw_fini(void *handle)
 
 static int dce_v6_0_suspend(void *handle)
 {
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+       adev->mode_info.bl_level =
+               amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
        return dce_v6_0_hw_fini(handle);
 }
 
@@ -2795,6 +2800,9 @@ static int dce_v6_0_resume(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int ret;
 
+       amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+                                                          adev->mode_info.bl_level);
+
        ret = dce_v6_0_hw_init(handle);
 
        /* turn on the BL */
@@ -3093,7 +3101,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
                tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
                WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
                schedule_work(&adev->hotplug_work);
-               DRM_INFO("IH: HPD%d\n", hpd + 1);
+               DRM_DEBUG("IH: HPD%d\n", hpd + 1);
        }
 
        return 0;
index c008dc03068707de0bd2620679f2f74357d474b1..012e0a9ae0ffcd5e7dda1ce1925ae93d4e4e4b6f 100644 (file)
@@ -2819,6 +2819,11 @@ static int dce_v8_0_hw_fini(void *handle)
 
 static int dce_v8_0_suspend(void *handle)
 {
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+       adev->mode_info.bl_level =
+               amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
        return dce_v8_0_hw_fini(handle);
 }
 
@@ -2827,6 +2832,9 @@ static int dce_v8_0_resume(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int ret;
 
+       amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+                                                          adev->mode_info.bl_level);
+
        ret = dce_v8_0_hw_init(handle);
 
        /* turn on the BL */
index a066c5eda135a782d8e01f730cfc10172210c7c9..a4309698e76c898fc038b362ed1d124a8e3d6806 100644 (file)
@@ -4384,34 +4384,8 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
        case CHIP_KAVERI:
                adev->gfx.config.max_shader_engines = 1;
                adev->gfx.config.max_tile_pipes = 4;
-               if ((adev->pdev->device == 0x1304) ||
-                   (adev->pdev->device == 0x1305) ||
-                   (adev->pdev->device == 0x130C) ||
-                   (adev->pdev->device == 0x130F) ||
-                   (adev->pdev->device == 0x1310) ||
-                   (adev->pdev->device == 0x1311) ||
-                   (adev->pdev->device == 0x131C)) {
-                       adev->gfx.config.max_cu_per_sh = 8;
-                       adev->gfx.config.max_backends_per_se = 2;
-               } else if ((adev->pdev->device == 0x1309) ||
-                          (adev->pdev->device == 0x130A) ||
-                          (adev->pdev->device == 0x130D) ||
-                          (adev->pdev->device == 0x1313) ||
-                          (adev->pdev->device == 0x131D)) {
-                       adev->gfx.config.max_cu_per_sh = 6;
-                       adev->gfx.config.max_backends_per_se = 2;
-               } else if ((adev->pdev->device == 0x1306) ||
-                          (adev->pdev->device == 0x1307) ||
-                          (adev->pdev->device == 0x130B) ||
-                          (adev->pdev->device == 0x130E) ||
-                          (adev->pdev->device == 0x1315) ||
-                          (adev->pdev->device == 0x131B)) {
-                       adev->gfx.config.max_cu_per_sh = 4;
-                       adev->gfx.config.max_backends_per_se = 1;
-               } else {
-                       adev->gfx.config.max_cu_per_sh = 3;
-                       adev->gfx.config.max_backends_per_se = 1;
-               }
+               adev->gfx.config.max_cu_per_sh = 8;
+               adev->gfx.config.max_backends_per_se = 2;
                adev->gfx.config.max_sh_per_se = 1;
                adev->gfx.config.max_texture_channel_caches = 4;
                adev->gfx.config.max_gprs = 256;
index 2719937e09d6bf00a4f78c47cd970ce5f5a51000..3b7e7af09ead1b8ea7ec8168b1252e71baaccb57 100644 (file)
@@ -634,7 +634,7 @@ static int gmc_v9_0_late_init(void *handle)
        for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
                BUG_ON(vm_inv_eng[i] > 16);
 
-       if (adev->asic_type == CHIP_VEGA10) {
+       if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
                r = gmc_v9_0_ecc_available(adev);
                if (r == 1) {
                        DRM_INFO("ECC is active.\n");
@@ -682,7 +682,10 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
        adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
        if (!adev->mc.vram_width) {
                /* hbm memory channel size */
-               chansize = 128;
+               if (adev->flags & AMD_IS_APU)
+                       chansize = 64;
+               else
+                       chansize = 128;
 
                tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
                tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
index e92fb372bc99738dad957334ba40d3a138c57636..91cf95a8c39c832d50b378bf7464a99f19c839c8 100644 (file)
@@ -238,31 +238,27 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
 static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
-       u64 *wptr = NULL;
-       uint64_t local_wptr = 0;
+       u64 wptr;
 
        if (ring->use_doorbell) {
                /* XXX check if swapping is necessary on BE */
-               wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
-               DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
-               *wptr = (*wptr) >> 2;
-               DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
+               wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+               DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
        } else {
                u32 lowbit, highbit;
                int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
 
-               wptr = &local_wptr;
                lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2;
                highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
 
                DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
                                me, highbit, lowbit);
-               *wptr = highbit;
-               *wptr = (*wptr) << 32;
-               *wptr |= lowbit;
+               wptr = highbit;
+               wptr = wptr << 32;
+               wptr |= lowbit;
        }
 
-       return *wptr;
+       return wptr >> 2;
 }
 
 /**
index 543101d5a5edd053c83beea6de7db5adbf9844eb..2095173aaabf864345c0643170a8975ef08866b4 100644 (file)
@@ -31,6 +31,7 @@
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
 #include "atom.h"
+#include "amd_pcie.h"
 #include "amdgpu_powerplay.h"
 #include "sid.h"
 #include "si_ih.h"
@@ -1461,8 +1462,8 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 {
        struct pci_dev *root = adev->pdev->bus->self;
        int bridge_pos, gpu_pos;
-       u32 speed_cntl, mask, current_data_rate;
-       int ret, i;
+       u32 speed_cntl, current_data_rate;
+       int i;
        u16 tmp16;
 
        if (pci_is_root_bus(adev->pdev->bus))
@@ -1474,23 +1475,20 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
        if (adev->flags & AMD_IS_APU)
                return;
 
-       ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
-       if (ret != 0)
-               return;
-
-       if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
+       if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+                                       CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
                return;
 
        speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
        current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
                LC_CURRENT_DATA_RATE_SHIFT;
-       if (mask & DRM_PCIE_SPEED_80) {
+       if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
                if (current_data_rate == 2) {
                        DRM_INFO("PCIE gen 3 link speeds already enabled\n");
                        return;
                }
                DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
-       } else if (mask & DRM_PCIE_SPEED_50) {
+       } else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) {
                if (current_data_rate == 1) {
                        DRM_INFO("PCIE gen 2 link speeds already enabled\n");
                        return;
@@ -1506,7 +1504,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
        if (!gpu_pos)
                return;
 
-       if (mask & DRM_PCIE_SPEED_80) {
+       if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
                if (current_data_rate != 2) {
                        u16 bridge_cfg, gpu_cfg;
                        u16 bridge_cfg2, gpu_cfg2;
@@ -1589,9 +1587,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 
        pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
        tmp16 &= ~0xf;
-       if (mask & DRM_PCIE_SPEED_80)
+       if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
                tmp16 |= 3;
-       else if (mask & DRM_PCIE_SPEED_50)
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
                tmp16 |= 2;
        else
                tmp16 |= 1;
index ce675a7f179a6a0f3b9f7b254589694541aea3cd..22f0b7ff3ac9731d05643b9c6af9ce7ef2ee3208 100644 (file)
@@ -26,6 +26,7 @@
 #include "amdgpu_pm.h"
 #include "amdgpu_dpm.h"
 #include "amdgpu_atombios.h"
+#include "amd_pcie.h"
 #include "sid.h"
 #include "r600_dpm.h"
 #include "si_dpm.h"
@@ -3331,29 +3332,6 @@ static void btc_apply_voltage_delta_rules(struct amdgpu_device *adev,
        }
 }
 
-static enum amdgpu_pcie_gen r600_get_pcie_gen_support(struct amdgpu_device *adev,
-                                              u32 sys_mask,
-                                              enum amdgpu_pcie_gen asic_gen,
-                                              enum amdgpu_pcie_gen default_gen)
-{
-       switch (asic_gen) {
-       case AMDGPU_PCIE_GEN1:
-               return AMDGPU_PCIE_GEN1;
-       case AMDGPU_PCIE_GEN2:
-               return AMDGPU_PCIE_GEN2;
-       case AMDGPU_PCIE_GEN3:
-               return AMDGPU_PCIE_GEN3;
-       default:
-               if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3))
-                       return AMDGPU_PCIE_GEN3;
-               else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2))
-                       return AMDGPU_PCIE_GEN2;
-               else
-                       return AMDGPU_PCIE_GEN1;
-       }
-       return AMDGPU_PCIE_GEN1;
-}
-
 static void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
                            u32 *p, u32 *u)
 {
@@ -5028,10 +5006,11 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
                                                              table->ACPIState.levels[0].vddc.index,
                                                              &table->ACPIState.levels[0].std_vddc);
                }
-               table->ACPIState.levels[0].gen2PCIE = (u8)r600_get_pcie_gen_support(adev,
-                                                                                   si_pi->sys_pcie_mask,
-                                                                                   si_pi->boot_pcie_gen,
-                                                                                   AMDGPU_PCIE_GEN1);
+               table->ACPIState.levels[0].gen2PCIE =
+                       (u8)amdgpu_get_pcie_gen_support(adev,
+                                                       si_pi->sys_pcie_mask,
+                                                       si_pi->boot_pcie_gen,
+                                                       AMDGPU_PCIE_GEN1);
 
                if (si_pi->vddc_phase_shed_control)
                        si_populate_phase_shedding_value(adev,
@@ -7168,10 +7147,10 @@ static void si_parse_pplib_clock_info(struct amdgpu_device *adev,
        pl->vddc = le16_to_cpu(clock_info->si.usVDDC);
        pl->vddci = le16_to_cpu(clock_info->si.usVDDCI);
        pl->flags = le32_to_cpu(clock_info->si.ulFlags);
-       pl->pcie_gen = r600_get_pcie_gen_support(adev,
-                                                si_pi->sys_pcie_mask,
-                                                si_pi->boot_pcie_gen,
-                                                clock_info->si.ucPCIEGen);
+       pl->pcie_gen = amdgpu_get_pcie_gen_support(adev,
+                                                  si_pi->sys_pcie_mask,
+                                                  si_pi->boot_pcie_gen,
+                                                  clock_info->si.ucPCIEGen);
 
        /* patch up vddc if necessary */
        ret = si_get_leakage_voltage_from_leakage_index(adev, pl->vddc,
@@ -7326,7 +7305,6 @@ static int si_dpm_init(struct amdgpu_device *adev)
        struct si_power_info *si_pi;
        struct atom_clock_dividers dividers;
        int ret;
-       u32 mask;
 
        si_pi = kzalloc(sizeof(struct si_power_info), GFP_KERNEL);
        if (si_pi == NULL)
@@ -7336,11 +7314,9 @@ static int si_dpm_init(struct amdgpu_device *adev)
        eg_pi = &ni_pi->eg;
        pi = &eg_pi->rv7xx;
 
-       ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
-       if (ret)
-               si_pi->sys_pcie_mask = 0;
-       else
-               si_pi->sys_pcie_mask = mask;
+       si_pi->sys_pcie_mask =
+               (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
+               CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
        si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
        si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev);
 
index b2bfedaf57f197b38e98dce034d12c1d3359ca64..9bab4842cd4411f8e282c083f526937b2979f6a0 100644 (file)
@@ -1618,7 +1618,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
        .set_wptr = uvd_v6_0_enc_ring_set_wptr,
        .emit_frame_size =
                4 + /* uvd_v6_0_enc_ring_emit_pipeline_sync */
-               6 + /* uvd_v6_0_enc_ring_emit_vm_flush */
+               5 + /* uvd_v6_0_enc_ring_emit_vm_flush */
                5 + 5 + /* uvd_v6_0_enc_ring_emit_fence x2 vm fence */
                1, /* uvd_v6_0_enc_ring_insert_end */
        .emit_ib_size = 5, /* uvd_v6_0_enc_ring_emit_ib */
index 1ce4c98385e3a17385a00b076a699cd64462d4dd..63c67346d316ac26a53a6f52e34feb8857dc9550 100644 (file)
@@ -629,11 +629,13 @@ static int dm_resume(void *handle)
 {
        struct amdgpu_device *adev = handle;
        struct amdgpu_display_manager *dm = &adev->dm;
+       int ret = 0;
 
        /* power on hardware */
        dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
-       return 0;
+       ret = amdgpu_dm_display_resume(adev);
+       return ret;
 }
 
 int amdgpu_dm_display_resume(struct amdgpu_device *adev)
@@ -1035,6 +1037,10 @@ static void handle_hpd_rx_irq(void *param)
                        !is_mst_root_connector) {
                /* Downstream Port status changed. */
                if (dc_link_detect(dc_link, DETECT_REASON_HPDRX)) {
+
+                       if (aconnector->fake_enable)
+                               aconnector->fake_enable = false;
+
                        amdgpu_dm_update_connector_after_detect(aconnector);
 
 
@@ -2010,30 +2016,32 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode,
        dst.width = stream->timing.h_addressable;
        dst.height = stream->timing.v_addressable;
 
-       rmx_type = dm_state->scaling;
-       if (rmx_type == RMX_ASPECT || rmx_type == RMX_OFF) {
-               if (src.width * dst.height <
-                               src.height * dst.width) {
-                       /* height needs less upscaling/more downscaling */
-                       dst.width = src.width *
-                                       dst.height / src.height;
-               } else {
-                       /* width needs less upscaling/more downscaling */
-                       dst.height = src.height *
-                                       dst.width / src.width;
+       if (dm_state) {
+               rmx_type = dm_state->scaling;
+               if (rmx_type == RMX_ASPECT || rmx_type == RMX_OFF) {
+                       if (src.width * dst.height <
+                                       src.height * dst.width) {
+                               /* height needs less upscaling/more downscaling */
+                               dst.width = src.width *
+                                               dst.height / src.height;
+                       } else {
+                               /* width needs less upscaling/more downscaling */
+                               dst.height = src.height *
+                                               dst.width / src.width;
+                       }
+               } else if (rmx_type == RMX_CENTER) {
+                       dst = src;
                }
-       } else if (rmx_type == RMX_CENTER) {
-               dst = src;
-       }
 
-       dst.x = (stream->timing.h_addressable - dst.width) / 2;
-       dst.y = (stream->timing.v_addressable - dst.height) / 2;
+               dst.x = (stream->timing.h_addressable - dst.width) / 2;
+               dst.y = (stream->timing.v_addressable - dst.height) / 2;
 
-       if (dm_state->underscan_enable) {
-               dst.x += dm_state->underscan_hborder / 2;
-               dst.y += dm_state->underscan_vborder / 2;
-               dst.width -= dm_state->underscan_hborder;
-               dst.height -= dm_state->underscan_vborder;
+               if (dm_state->underscan_enable) {
+                       dst.x += dm_state->underscan_hborder / 2;
+                       dst.y += dm_state->underscan_vborder / 2;
+                       dst.width -= dm_state->underscan_hborder;
+                       dst.height -= dm_state->underscan_vborder;
+               }
        }
 
        stream->src = src;
@@ -2358,12 +2366,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
        if (aconnector == NULL) {
                DRM_ERROR("aconnector is NULL!\n");
-               goto drm_connector_null;
-       }
-
-       if (dm_state == NULL) {
-               DRM_ERROR("dm_state is NULL!\n");
-               goto dm_state_null;
+               return stream;
        }
 
        drm_connector = &aconnector->base;
@@ -2375,18 +2378,18 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                 */
                if (aconnector->mst_port) {
                        dm_dp_mst_dc_sink_create(drm_connector);
-                       goto mst_dc_sink_create_done;
+                       return stream;
                }
 
                if (create_fake_sink(aconnector))
-                       goto stream_create_fail;
+                       return stream;
        }
 
        stream = dc_create_stream_for_sink(aconnector->dc_sink);
 
        if (stream == NULL) {
                DRM_ERROR("Failed to create stream for sink!\n");
-               goto stream_create_fail;
+               return stream;
        }
 
        list_for_each_entry(preferred_mode, &aconnector->base.modes, head) {
@@ -2412,9 +2415,12 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
        } else {
                decide_crtc_timing_for_drm_display_mode(
                                &mode, preferred_mode,
-                               dm_state->scaling != RMX_OFF);
+                               dm_state ? (dm_state->scaling != RMX_OFF) : false);
        }
 
+       if (!dm_state)
+               drm_mode_set_crtcinfo(&mode, 0);
+
        fill_stream_properties_from_drm_display_mode(stream,
                        &mode, &aconnector->base);
        update_stream_scaling_settings(&mode, dm_state, stream);
@@ -2424,10 +2430,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                drm_connector,
                aconnector->dc_sink);
 
-stream_create_fail:
-dm_state_null:
-drm_connector_null:
-mst_dc_sink_create_done:
+       update_stream_signal(stream);
+
        return stream;
 }
 
@@ -2495,6 +2499,27 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc)
        return &state->base;
 }
 
+
+static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
+{
+       enum dc_irq_source irq_source;
+       struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
+       struct amdgpu_device *adev = crtc->dev->dev_private;
+
+       irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
+       return dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
+}
+
+static int dm_enable_vblank(struct drm_crtc *crtc)
+{
+       return dm_set_vblank(crtc, true);
+}
+
+static void dm_disable_vblank(struct drm_crtc *crtc)
+{
+       dm_set_vblank(crtc, false);
+}
+
 /* Implemented only the options currently availible for the driver */
 static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
        .reset = dm_crtc_reset_state,
@@ -2504,6 +2529,8 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
        .page_flip = drm_atomic_helper_page_flip,
        .atomic_duplicate_state = dm_crtc_duplicate_state,
        .atomic_destroy_state = dm_crtc_destroy_state,
+       .enable_vblank = dm_enable_vblank,
+       .disable_vblank = dm_disable_vblank,
 };
 
 static enum drm_connector_status
@@ -2798,7 +2825,7 @@ int amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
                goto fail;
        }
 
-       stream = dc_create_stream_for_sink(dc_sink);
+       stream = create_stream_for_sink(aconnector, mode, NULL);
        if (stream == NULL) {
                DRM_ERROR("Failed to create stream for sink!\n");
                goto fail;
@@ -3058,6 +3085,9 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
        if (!dm_plane_state->dc_state)
                return 0;
 
+       if (!fill_rects_from_plane_state(state, dm_plane_state->dc_state))
+               return -EINVAL;
+
        if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK)
                return 0;
 
@@ -3104,8 +3134,6 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
 
        switch (aplane->base.type) {
        case DRM_PLANE_TYPE_PRIMARY:
-               aplane->base.format_default = true;
-
                res = drm_universal_plane_init(
                                dm->adev->ddev,
                                &aplane->base,
@@ -4630,8 +4658,6 @@ static int dm_update_planes_state(struct dc *dc,
        bool pflip_needed  = !state->allow_modeset;
        int ret = 0;
 
-       if (pflip_needed)
-               return ret;
 
        /* Add new planes */
        for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
@@ -4646,6 +4672,8 @@ static int dm_update_planes_state(struct dc *dc,
 
                /* Remove any changed/removed planes */
                if (!enable) {
+                       if (pflip_needed)
+                               continue;
 
                        if (!old_plane_crtc)
                                continue;
@@ -4677,6 +4705,7 @@ static int dm_update_planes_state(struct dc *dc,
                        *lock_and_validation_needed = true;
 
                } else { /* Add new planes */
+                       struct dc_plane_state *dc_new_plane_state;
 
                        if (drm_atomic_plane_disabling(plane->state, new_plane_state))
                                continue;
@@ -4690,38 +4719,50 @@ static int dm_update_planes_state(struct dc *dc,
                        if (!dm_new_crtc_state->stream)
                                continue;
 
+                       if (pflip_needed)
+                               continue;
 
                        WARN_ON(dm_new_plane_state->dc_state);
 
-                       dm_new_plane_state->dc_state = dc_create_plane_state(dc);
-
-                       DRM_DEBUG_DRIVER("Enabling DRM plane: %d on DRM crtc %d\n",
-                                       plane->base.id, new_plane_crtc->base.id);
-
-                       if (!dm_new_plane_state->dc_state) {
+                       dc_new_plane_state = dc_create_plane_state(dc);
+                       if (!dc_new_plane_state) {
                                ret = -EINVAL;
                                return ret;
                        }
 
+                       DRM_DEBUG_DRIVER("Enabling DRM plane: %d on DRM crtc %d\n",
+                                       plane->base.id, new_plane_crtc->base.id);
+
                        ret = fill_plane_attributes(
                                new_plane_crtc->dev->dev_private,
-                               dm_new_plane_state->dc_state,
+                               dc_new_plane_state,
                                new_plane_state,
                                new_crtc_state);
-                       if (ret)
+                       if (ret) {
+                               dc_plane_state_release(dc_new_plane_state);
                                return ret;
+                       }
 
-
+                       /*
+                        * Any atomic check errors that occur after this will
+                        * not need a release. The plane state will be attached
+                        * to the stream, and therefore part of the atomic
+                        * state. It'll be released when the atomic state is
+                        * cleaned.
+                        */
                        if (!dc_add_plane_to_context(
                                        dc,
                                        dm_new_crtc_state->stream,
-                                       dm_new_plane_state->dc_state,
+                                       dc_new_plane_state,
                                        dm_state->context)) {
 
+                               dc_plane_state_release(dc_new_plane_state);
                                ret = -EINVAL;
                                return ret;
                        }
 
+                       dm_new_plane_state->dc_state = dc_new_plane_state;
+
                        /* Tell DC to do a full surface update every time there
                         * is a plane change. Inefficient, but works for now.
                         */
@@ -4735,6 +4776,33 @@ static int dm_update_planes_state(struct dc *dc,
        return ret;
 }
 
+static int dm_atomic_check_plane_state_fb(struct drm_atomic_state *state,
+                                         struct drm_crtc *crtc)
+{
+       struct drm_plane *plane;
+       struct drm_crtc_state *crtc_state;
+
+       WARN_ON(!drm_atomic_get_new_crtc_state(state, crtc));
+
+       drm_for_each_plane_mask(plane, state->dev, crtc->state->plane_mask) {
+               struct drm_plane_state *plane_state =
+                       drm_atomic_get_plane_state(state, plane);
+
+               if (IS_ERR(plane_state))
+                       return -EDEADLK;
+
+               crtc_state = drm_atomic_get_crtc_state(plane_state->state, crtc);
+               if (IS_ERR(crtc_state))
+                       return PTR_ERR(crtc_state);
+
+               if (crtc->primary == plane && crtc_state->active) {
+                       if (!plane_state->fb)
+                               return -EINVAL;
+               }
+       }
+       return 0;
+}
+
 static int amdgpu_dm_atomic_check(struct drm_device *dev,
                                  struct drm_atomic_state *state)
 {
@@ -4758,6 +4826,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
                goto fail;
 
        for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+               ret = dm_atomic_check_plane_state_fb(state, crtc);
+               if (ret)
+                       goto fail;
+
                if (!drm_atomic_crtc_needs_modeset(new_crtc_state) &&
                    !new_crtc_state->color_mgmt_changed)
                        continue;
index 9bd142f65f9baa9b9881dcde18c6ec9d8e527416..e1acc10e35a2fd6a4215c3d3179ac50b27bda6ce 100644 (file)
@@ -109,7 +109,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
                struct cea_sad *sad = &sads[i];
 
                edid_caps->audio_modes[i].format_code = sad->format;
-               edid_caps->audio_modes[i].channel_count = sad->channels;
+               edid_caps->audio_modes[i].channel_count = sad->channels + 1;
                edid_caps->audio_modes[i].sample_rate = sad->freq;
                edid_caps->audio_modes[i].sample_size = sad->byte2;
        }
index 1874b6cee6afa1dd81aeb0177c605c69c1b68e65..422055080df4a1b59b5fa4c66a12d6b5bd90f5ac 100644 (file)
@@ -683,10 +683,8 @@ static const struct amdgpu_irq_src_funcs dm_hpd_irq_funcs = {
 
 void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
 {
-       if (adev->mode_info.num_crtc > 0)
-               adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
-       else
-               adev->crtc_irq.num_types = 0;
+
+       adev->crtc_irq.num_types = adev->mode_info.num_crtc;
        adev->crtc_irq.funcs = &dm_crtc_irq_funcs;
 
        adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
index f3d87f418d2efffd349358fd08b3b52e2a3cca5d..93421dad21bd3fdeba7f7d19946bf4c0005a4921 100644 (file)
@@ -189,6 +189,12 @@ void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
                        .link = aconnector->dc_link,
                        .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
 
+       /*
+        * TODO: Need to further figure out why ddc.algo is NULL while MST port exists
+        */
+       if (!aconnector->port || !aconnector->port->aux.ddc.algo)
+               return;
+
        edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
 
        if (!edid) {
index 35e84ed031de08f5edfadbfb71e550291ae8ebef..12868c769606b8de546b193412d5c28052353dd5 100644 (file)
@@ -1358,13 +1358,13 @@ enum dc_irq_source dc_interrupt_to_irq_source(
        return dal_irq_service_to_irq_source(dc->res_pool->irqs, src_id, ext_id);
 }
 
-void dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable)
+bool dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable)
 {
 
        if (dc == NULL)
-               return;
+               return false;
 
-       dal_irq_service_set(dc->res_pool->irqs, src, enable);
+       return dal_irq_service_set(dc->res_pool->irqs, src, enable);
 }
 
 void dc_interrupt_ack(struct dc *dc, enum dc_irq_source src)
index a3742827157361ac0c3c57733eb845cd34ea4720..be5546181fa84d0dbbf91935878cae6e10129154 100644 (file)
@@ -1749,8 +1749,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
                        link->link_enc,
                        pipe_ctx->clock_source->id,
                        display_color_depth,
-                       pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A,
-                       pipe_ctx->stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK,
+                       pipe_ctx->stream->signal,
                        stream->phy_pix_clk);
 
        if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
index 61e8c3e02d1696bf08f53bb24cefdc074a74cf10..639421a00ab629eefce3b7333126632f4d2b0848 100644 (file)
@@ -718,7 +718,7 @@ static enum link_training_result perform_channel_equalization_sequence(
        uint32_t retries_ch_eq;
        enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
        union lane_align_status_updated dpcd_lane_status_updated = {{0}};
-       union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {{{0}}};;
+       union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {{{0}}};
 
        hw_tr_pattern = get_supported_tp(link);
 
@@ -1465,7 +1465,7 @@ void decide_link_settings(struct dc_stream_state *stream,
        /* MST doesn't perform link training for now
         * TODO: add MST specific link training routine
         */
-       if (is_mst_supported(link)) {
+       if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
                *link_setting = link->verified_link_cap;
                return;
        }
index 95b8dd0e53c6951a1e28e988fd4cb13dcaf04f0d..4d07ffebfd3112f937d0082b4482df4e4330451d 100644 (file)
@@ -1360,9 +1360,6 @@ bool dc_is_stream_scaling_unchanged(
        return true;
 }
 
-/* Maximum TMDS single link pixel clock 165MHz */
-#define TMDS_MAX_PIXEL_CLOCK_IN_KHZ 165000
-
 static void update_stream_engine_usage(
                struct resource_context *res_ctx,
                const struct resource_pool *pool,
index 261811e0c094a81a0a1c0c1c6420f83d4db4a3bc..cd5819789d76aa7b1106dc8991685a17b1e4c6af 100644 (file)
@@ -33,8 +33,7 @@
 /*******************************************************************************
  * Private functions
  ******************************************************************************/
-#define TMDS_MAX_PIXEL_CLOCK_IN_KHZ_UPMOST 297000
-static void update_stream_signal(struct dc_stream_state *stream)
+void update_stream_signal(struct dc_stream_state *stream)
 {
 
        struct dc_sink *dc_sink = stream->sink;
@@ -45,8 +44,9 @@ static void update_stream_signal(struct dc_stream_state *stream)
                stream->signal = dc_sink->sink_signal;
 
        if (dc_is_dvi_signal(stream->signal)) {
-               if (stream->timing.pix_clk_khz > TMDS_MAX_PIXEL_CLOCK_IN_KHZ_UPMOST &&
-                       stream->sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
+               if (stream->ctx->dc->caps.dual_link_dvi &&
+                   stream->timing.pix_clk_khz > TMDS_MAX_PIXEL_CLOCK &&
+                   stream->sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
                        stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK;
                else
                        stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
@@ -193,44 +193,20 @@ bool dc_stream_set_cursor_attributes(
 
        core_dc = stream->ctx->dc;
        res_ctx = &core_dc->current_state->res_ctx;
+       stream->cursor_attributes = *attributes;
 
        for (i = 0; i < MAX_PIPES; i++) {
                struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
 
-               if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp))
+               if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.xfm &&
+                   !pipe_ctx->plane_res.dpp) || !pipe_ctx->plane_res.ipp)
                        continue;
                if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
                        continue;
 
 
-               if (pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes != NULL)
-                       pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes(
-                                               pipe_ctx->plane_res.ipp, attributes);
-
-               if (pipe_ctx->plane_res.hubp != NULL &&
-                               pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes != NULL)
-                       pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes(
-                                       pipe_ctx->plane_res.hubp, attributes);
-
-               if (pipe_ctx->plane_res.mi != NULL &&
-                               pipe_ctx->plane_res.mi->funcs->set_cursor_attributes != NULL)
-                       pipe_ctx->plane_res.mi->funcs->set_cursor_attributes(
-                                       pipe_ctx->plane_res.mi, attributes);
-
-
-               if (pipe_ctx->plane_res.xfm != NULL &&
-                               pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes != NULL)
-                       pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes(
-                               pipe_ctx->plane_res.xfm, attributes);
-
-               if (pipe_ctx->plane_res.dpp != NULL &&
-                               pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes != NULL)
-                       pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes(
-                               pipe_ctx->plane_res.dpp, attributes->color_format);
+               core_dc->hwss.set_cursor_attribute(pipe_ctx);
        }
-
-       stream->cursor_attributes = *attributes;
-
        return true;
 }
 
@@ -254,55 +230,21 @@ bool dc_stream_set_cursor_position(
 
        core_dc = stream->ctx->dc;
        res_ctx = &core_dc->current_state->res_ctx;
+       stream->cursor_position = *position;
 
        for (i = 0; i < MAX_PIPES; i++) {
                struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
-               struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp;
-               struct mem_input *mi = pipe_ctx->plane_res.mi;
-               struct hubp *hubp = pipe_ctx->plane_res.hubp;
-               struct dpp *dpp = pipe_ctx->plane_res.dpp;
-               struct dc_cursor_position pos_cpy = *position;
-               struct dc_cursor_mi_param param = {
-                       .pixel_clk_khz = stream->timing.pix_clk_khz,
-                       .ref_clk_khz = core_dc->res_pool->ref_clock_inKhz,
-                       .viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x,
-                       .viewport_width = pipe_ctx->plane_res.scl_data.viewport.width,
-                       .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz
-               };
 
                if (pipe_ctx->stream != stream ||
                                (!pipe_ctx->plane_res.mi  && !pipe_ctx->plane_res.hubp) ||
                                !pipe_ctx->plane_state ||
-                               (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp))
-                       continue;
-
-               if (pipe_ctx->plane_state->address.type
-                               == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
-                       pos_cpy.enable = false;
-
-               if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
-                       pos_cpy.enable = false;
-
-
-               if (ipp != NULL && ipp->funcs->ipp_cursor_set_position != NULL)
-                       ipp->funcs->ipp_cursor_set_position(ipp, &pos_cpy, &param);
-
-               if (mi != NULL && mi->funcs->set_cursor_position != NULL)
-                       mi->funcs->set_cursor_position(mi, &pos_cpy, &param);
-
-               if (!hubp)
+                               (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp) ||
+                               !pipe_ctx->plane_res.ipp)
                        continue;
 
-               if (hubp->funcs->set_cursor_position != NULL)
-                       hubp->funcs->set_cursor_position(hubp, &pos_cpy, &param);
-
-               if (dpp != NULL && dpp->funcs->set_cursor_position != NULL)
-                       dpp->funcs->set_cursor_position(dpp, &pos_cpy, &param, hubp->curs_attr.width);
-
+               core_dc->hwss.set_cursor_position(pipe_ctx);
        }
 
-       stream->cursor_position = *position;
-
        return true;
 }
 
index e2e3c9df79ea0f976dbbc6f6bf889169ccce0484..d6d56611604eb417e47d009e1f0119305cb2ab91 100644 (file)
@@ -62,6 +62,7 @@ struct dc_caps {
        bool dcc_const_color;
        bool dynamic_audio;
        bool is_apu;
+       bool dual_link_dvi;
 };
 
 struct dc_dcc_surface_param {
@@ -672,7 +673,7 @@ enum dc_irq_source dc_interrupt_to_irq_source(
                struct dc *dc,
                uint32_t src_id,
                uint32_t ext_id);
-void dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable);
+bool dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable);
 void dc_interrupt_ack(struct dc *dc, enum dc_irq_source src);
 enum dc_irq_source dc_get_hpd_irq_source_at_index(
                struct dc *dc, uint32_t link_index);
index 01c60f11b2bdeec208f5df9e5cfd2092a614eac5..456e4d29eaddcc4b19f0b28290efdeefd8a2f3e8 100644 (file)
@@ -237,6 +237,8 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
  */
 struct dc_stream_state *dc_create_stream_for_sink(struct dc_sink *dc_sink);
 
+void update_stream_signal(struct dc_stream_state *stream);
+
 void dc_stream_retain(struct dc_stream_state *dc_stream);
 void dc_stream_release(struct dc_stream_state *dc_stream);
 
index b73db9e784375618f87422f143cf36d00e1ce552..f11f17fe08f98196fe6f105f5bb86860810b1d0b 100644 (file)
        SR(D2VGA_CONTROL), \
        SR(D3VGA_CONTROL), \
        SR(D4VGA_CONTROL), \
+       SR(VGA_TEST_CONTROL), \
        SR(DC_IP_REQUEST_CNTL), \
        BL_REG_LIST()
 
@@ -337,6 +338,7 @@ struct dce_hwseq_registers {
        uint32_t D2VGA_CONTROL;
        uint32_t D3VGA_CONTROL;
        uint32_t D4VGA_CONTROL;
+       uint32_t VGA_TEST_CONTROL;
        /* MMHUB registers. read only. temporary hack */
        uint32_t VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32;
        uint32_t VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
@@ -493,6 +495,12 @@ struct dce_hwseq_registers {
        HWS_SF(, DOMAIN6_PG_STATUS, DOMAIN6_PGFSM_PWR_STATUS, mask_sh), \
        HWS_SF(, DOMAIN7_PG_STATUS, DOMAIN7_PGFSM_PWR_STATUS, mask_sh), \
        HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
+       HWS_SF(, D1VGA_CONTROL, D1VGA_MODE_ENABLE, mask_sh),\
+       HWS_SF(, D2VGA_CONTROL, D2VGA_MODE_ENABLE, mask_sh),\
+       HWS_SF(, D3VGA_CONTROL, D3VGA_MODE_ENABLE, mask_sh),\
+       HWS_SF(, D4VGA_CONTROL, D4VGA_MODE_ENABLE, mask_sh),\
+       HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_ENABLE, mask_sh),\
+       HWS_SF(, VGA_TEST_CONTROL, VGA_TEST_RENDER_START, mask_sh),\
        HWS_SF(, LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh), \
        HWS_SF(, LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh)
 
@@ -583,7 +591,13 @@ struct dce_hwseq_registers {
        type DCFCLK_GATE_DIS; \
        type DCHUBBUB_GLOBAL_TIMER_REFDIV; \
        type DENTIST_DPPCLK_WDIVIDER; \
-       type DENTIST_DISPCLK_WDIVIDER;
+       type DENTIST_DISPCLK_WDIVIDER; \
+       type VGA_TEST_ENABLE; \
+       type VGA_TEST_RENDER_START; \
+       type D1VGA_MODE_ENABLE; \
+       type D2VGA_MODE_ENABLE; \
+       type D3VGA_MODE_ENABLE; \
+       type D4VGA_MODE_ENABLE;
 
 struct dce_hwseq_shift {
        HWSEQ_REG_FIELD_LIST(uint8_t)
index a266e3f5e75fd7ae82a46f7d3e09f900660c72cc..e4741f1a2b01b2f4d31ede97dcc8be7ad71e2b64 100644 (file)
 #define DCE110_DIG_FE_SOURCE_SELECT_DIGF 0x20
 #define DCE110_DIG_FE_SOURCE_SELECT_DIGG 0x40
 
-/* Minimum pixel clock, in KHz. For TMDS signal is 25.00 MHz */
-#define TMDS_MIN_PIXEL_CLOCK 25000
-/* Maximum pixel clock, in KHz. For TMDS signal is 165.00 MHz */
-#define TMDS_MAX_PIXEL_CLOCK 165000
-/* For current ASICs pixel clock - 600MHz */
-#define MAX_ENCODER_CLOCK 600000
-
 enum {
        DP_MST_UPDATE_MAX_RETRY = 50
 };
@@ -683,6 +676,7 @@ void dce110_link_encoder_construct(
 {
        struct bp_encoder_cap_info bp_cap_info = {0};
        const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+       enum bp_result result = BP_RESULT_OK;
 
        enc110->base.funcs = &dce110_lnk_enc_funcs;
        enc110->base.ctx = init_data->ctx;
@@ -757,15 +751,24 @@ void dce110_link_encoder_construct(
                enc110->base.preferred_engine = ENGINE_ID_UNKNOWN;
        }
 
+       /* default to one to mirror Windows behavior */
+       enc110->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+       result = bp_funcs->get_encoder_cap_info(enc110->base.ctx->dc_bios,
+                                               enc110->base.id, &bp_cap_info);
+
        /* Override features with DCE-specific values */
-       if (BP_RESULT_OK == bp_funcs->get_encoder_cap_info(
-                       enc110->base.ctx->dc_bios, enc110->base.id,
-                       &bp_cap_info)) {
+       if (BP_RESULT_OK == result) {
                enc110->base.features.flags.bits.IS_HBR2_CAPABLE =
                                bp_cap_info.DP_HBR2_EN;
                enc110->base.features.flags.bits.IS_HBR3_CAPABLE =
                                bp_cap_info.DP_HBR3_EN;
                enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+       } else {
+               dm_logger_write(enc110->base.ctx->logger, LOG_WARNING,
+                               "%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+                               __func__,
+                               result);
        }
 }
 
@@ -904,8 +907,7 @@ void dce110_link_encoder_enable_tmds_output(
        struct link_encoder *enc,
        enum clock_source_id clock_source,
        enum dc_color_depth color_depth,
-       bool hdmi,
-       bool dual_link,
+       enum signal_type signal,
        uint32_t pixel_clock)
 {
        struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc);
@@ -919,16 +921,12 @@ void dce110_link_encoder_enable_tmds_output(
        cntl.engine_id = enc->preferred_engine;
        cntl.transmitter = enc110->base.transmitter;
        cntl.pll_id = clock_source;
-       if (hdmi) {
-               cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
-               cntl.lanes_number = 4;
-       } else if (dual_link) {
-               cntl.signal = SIGNAL_TYPE_DVI_DUAL_LINK;
+       cntl.signal = signal;
+       if (cntl.signal == SIGNAL_TYPE_DVI_DUAL_LINK)
                cntl.lanes_number = 8;
-       } else {
-               cntl.signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
+       else
                cntl.lanes_number = 4;
-       }
+
        cntl.hpd_sel = enc110->base.hpd_source;
 
        cntl.pixel_clock = pixel_clock;
index 8ca9afe47a2b268af39b502cc8547f5a0ba7e3cc..0ec3433d34b622cdc9ebd6251cbf75a0c3261002 100644 (file)
@@ -210,8 +210,7 @@ void dce110_link_encoder_enable_tmds_output(
        struct link_encoder *enc,
        enum clock_source_id clock_source,
        enum dc_color_depth color_depth,
-       bool hdmi,
-       bool dual_link,
+       enum signal_type signal,
        uint32_t pixel_clock);
 
 /* enables DP PHY output */
index 3931412ab6d32e139a9653a12fe5c2d6de8c9337..87093894ea9e73f8e4a4b1f46787b6a30a2bcb5a 100644 (file)
@@ -128,23 +128,22 @@ static void set_truncation(
                return;
        }
        /* on other format-to do */
-       if (params->flags.TRUNCATE_ENABLED == 0 ||
-                       params->flags.TRUNCATE_DEPTH == 2)
+       if (params->flags.TRUNCATE_ENABLED == 0)
                return;
        /*Set truncation depth and Enable truncation*/
        REG_UPDATE_3(FMT_BIT_DEPTH_CONTROL,
                                FMT_TRUNCATE_EN, 1,
                                FMT_TRUNCATE_DEPTH,
-                               params->flags.TRUNCATE_MODE,
+                               params->flags.TRUNCATE_DEPTH,
                                FMT_TRUNCATE_MODE,
-                               params->flags.TRUNCATE_DEPTH);
+                               params->flags.TRUNCATE_MODE);
 }
 
 
 /**
  *     set_spatial_dither
  *     1) set spatial dithering mode: pattern of seed
- *     2) set spatical dithering depth: 0 for 18bpp or 1 for 24bpp
+ *     2) set spatial dithering depth: 0 for 18bpp or 1 for 24bpp
  *     3) set random seed
  *     4) set random mode
  *             lfsr is reset every frame or not reset
index 3ea43e2a9450ce562cd01b8a5c18a9ee060d2a62..442dd2d93618d57eeff5c21ae576750f86c50416 100644 (file)
@@ -852,6 +852,7 @@ static bool construct(
        dc->caps.max_downscale_ratio = 200;
        dc->caps.i2c_speed_in_khz = 40;
        dc->caps.max_cursor_size = 128;
+       dc->caps.dual_link_dvi = true;
 
        for (i = 0; i < pool->base.pipe_count; i++) {
                pool->base.timing_generators[i] =
index 86cdd7b4811fb7f1195ce7d8e73db9e8e42c6c3a..6f382a3ac90f19a3c210ef33c97dca43ac55df42 100644 (file)
@@ -688,15 +688,22 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
        struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
        struct dc_link *link = pipe_ctx->stream->sink->link;
 
-       /* 1. update AVI info frame (HDMI, DP)
-        * we always need to update info frame
-       */
+
        uint32_t active_total_with_borders;
        uint32_t early_control = 0;
        struct timing_generator *tg = pipe_ctx->stream_res.tg;
 
-       /* TODOFPGA may change to hwss.update_info_frame */
+       /* For MST, there are multiply stream go to only one link.
+        * connect DIG back_end to front_end while enable_stream and
+        * disconnect them during disable_stream
+        * BY this, it is logic clean to separate stream and link */
+       link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
+                                                   pipe_ctx->stream_res.stream_enc->id, true);
+
+       /* update AVI info frame (HDMI, DP)*/
+       /* TODO: FPGA may change to hwss.update_info_frame */
        dce110_update_info_frame(pipe_ctx);
+
        /* enable early control to avoid corruption on DP monitor*/
        active_total_with_borders =
                        timing->h_addressable
@@ -717,12 +724,8 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
                        pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc);
        }
 
-       /* For MST, there are multiply stream go to only one link.
-        * connect DIG back_end to front_end while enable_stream and
-        * disconnect them during disable_stream
-        * BY this, it is logic clean to separate stream and link */
-       link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
-                                                   pipe_ctx->stream_res.stream_enc->id, true);
+
+
 
 }
 
@@ -1690,9 +1693,13 @@ static void apply_min_clocks(
  *  Check if FBC can be enabled
  */
 static bool should_enable_fbc(struct dc *dc,
-                             struct dc_state *context)
+                             struct dc_state *context,
+                             uint32_t *pipe_idx)
 {
-       struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0];
+       uint32_t i;
+       struct pipe_ctx *pipe_ctx = NULL;
+       struct resource_context *res_ctx = &context->res_ctx;
+
 
        ASSERT(dc->fbc_compressor);
 
@@ -1704,6 +1711,14 @@ static bool should_enable_fbc(struct dc *dc,
        if (context->stream_count != 1)
                return false;
 
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               if (res_ctx->pipe_ctx[i].stream) {
+                       pipe_ctx = &res_ctx->pipe_ctx[i];
+                       *pipe_idx = i;
+                       break;
+               }
+       }
+
        /* Only supports eDP */
        if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP)
                return false;
@@ -1729,11 +1744,14 @@ static bool should_enable_fbc(struct dc *dc,
 static void enable_fbc(struct dc *dc,
                       struct dc_state *context)
 {
-       if (should_enable_fbc(dc, context)) {
+       uint32_t pipe_idx = 0;
+
+       if (should_enable_fbc(dc, context, &pipe_idx)) {
                /* Program GRPH COMPRESSED ADDRESS and PITCH */
                struct compr_addr_and_pitch_params params = {0, 0, 0};
                struct compressor *compr = dc->fbc_compressor;
-               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0];
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
+
 
                params.source_view_width = pipe_ctx->stream->timing.h_addressable;
                params.source_view_height = pipe_ctx->stream->timing.v_addressable;
@@ -2915,6 +2933,49 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx,
        }
 }
 
+void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx)
+{
+       struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
+       struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp;
+       struct mem_input *mi = pipe_ctx->plane_res.mi;
+       struct dc_cursor_mi_param param = {
+               .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz,
+               .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz,
+               .viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x,
+               .viewport_width = pipe_ctx->plane_res.scl_data.viewport.width,
+               .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz
+       };
+
+       if (pipe_ctx->plane_state->address.type
+                       == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
+               pos_cpy.enable = false;
+
+       if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
+               pos_cpy.enable = false;
+
+       if (ipp->funcs->ipp_cursor_set_position)
+               ipp->funcs->ipp_cursor_set_position(ipp, &pos_cpy, &param);
+       if (mi->funcs->set_cursor_position)
+               mi->funcs->set_cursor_position(mi, &pos_cpy, &param);
+}
+
+void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
+{
+       struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes;
+
+       if (pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes)
+               pipe_ctx->plane_res.ipp->funcs->ipp_cursor_set_attributes(
+                               pipe_ctx->plane_res.ipp, attributes);
+
+       if (pipe_ctx->plane_res.mi->funcs->set_cursor_attributes)
+               pipe_ctx->plane_res.mi->funcs->set_cursor_attributes(
+                               pipe_ctx->plane_res.mi, attributes);
+
+       if (pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes)
+               pipe_ctx->plane_res.xfm->funcs->set_cursor_attributes(
+                               pipe_ctx->plane_res.xfm, attributes);
+}
+
 static void ready_shared_resources(struct dc *dc, struct dc_state *context) {}
 
 static void optimize_shared_resources(struct dc *dc) {}
@@ -2957,6 +3018,8 @@ static const struct hw_sequencer_funcs dce110_funcs = {
        .edp_backlight_control = hwss_edp_backlight_control,
        .edp_power_control = hwss_edp_power_control,
        .edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready,
+       .set_cursor_position = dce110_set_cursor_position,
+       .set_cursor_attribute = dce110_set_cursor_attribute
 };
 
 void dce110_hw_sequencer_construct(struct dc *dc)
index 7c4779578fb76528caef31a1a531f73214c86059..00f18c485e1e7b5a7d38e7bfb50df0d92d207a71 100644 (file)
@@ -846,6 +846,16 @@ static bool dce110_validate_bandwidth(
        return result;
 }
 
+enum dc_status dce110_validate_plane(const struct dc_plane_state *plane_state,
+                                    struct dc_caps *caps)
+{
+       if (((plane_state->dst_rect.width * 2) < plane_state->src_rect.width) ||
+           ((plane_state->dst_rect.height * 2) < plane_state->src_rect.height))
+               return DC_FAIL_SURFACE_VALIDATE;
+
+       return DC_OK;
+}
+
 static bool dce110_validate_surface_sets(
                struct dc_state *context)
 {
@@ -869,6 +879,13 @@ static bool dce110_validate_surface_sets(
                                        plane->src_rect.height > 1080))
                                        return false;
 
+                               /* we don't have the logic to support underlay
+                                * only yet so block the use case where we get
+                                * NV12 plane as top layer
+                                */
+                               if (j == 0)
+                                       return false;
+
                                /* irrespective of plane format,
                                 * stream should be RGB encoded
                                 */
@@ -1021,6 +1038,7 @@ static const struct resource_funcs dce110_res_pool_funcs = {
        .link_enc_create = dce110_link_encoder_create,
        .validate_guaranteed = dce110_validate_guaranteed,
        .validate_bandwidth = dce110_validate_bandwidth,
+       .validate_plane = dce110_validate_plane,
        .acquire_idle_pipe_for_layer = dce110_acquire_underlay,
        .add_stream_to_ctx = dce110_add_stream_to_ctx,
        .validate_global = dce110_validate_global
index 663e0a047a4becc5bae59b672ad951df24cedcfc..98d9cd0109e1f3cf5dbfa7b9111353c163aa4ad4 100644 (file)
@@ -1103,6 +1103,8 @@ static bool construct(
        dc->caps.max_downscale_ratio = 200;
        dc->caps.i2c_speed_in_khz = 100;
        dc->caps.max_cursor_size = 128;
+       dc->caps.dual_link_dvi = true;
+
 
        /*************************************************
         *  Create resources                             *
index 57cd67359567b5bd80a61bf2df3697a765d92dd5..5aab01db28ee78e1cd1c83fef2b240a3911cca52 100644 (file)
@@ -835,6 +835,8 @@ static bool construct(
        dc->caps.max_downscale_ratio = 200;
        dc->caps.i2c_speed_in_khz = 100;
        dc->caps.max_cursor_size = 128;
+       dc->caps.dual_link_dvi = true;
+
        dc->debug = debug_defaults;
 
        /*************************************************
index 8f2bd56f3461d665e8dd7c15e6280b32b0f135c1..25d7eb1567aeb10bc61d23df101f6b3303fe107c 100644 (file)
@@ -793,6 +793,7 @@ static bool dce80_construct(
        dc->caps.max_downscale_ratio = 200;
        dc->caps.i2c_speed_in_khz = 40;
        dc->caps.max_cursor_size = 128;
+       dc->caps.dual_link_dvi = true;
 
        /*************************************************
         *  Create resources                             *
index 82572863acab747759e3d8c2787a1e2cffc1f8df..dc1e010725c13d7f56bde96c1047626ef25745c0 100644 (file)
@@ -238,10 +238,34 @@ static void enable_power_gating_plane(
 static void disable_vga(
        struct dce_hwseq *hws)
 {
+       unsigned int in_vga1_mode = 0;
+       unsigned int in_vga2_mode = 0;
+       unsigned int in_vga3_mode = 0;
+       unsigned int in_vga4_mode = 0;
+
+       REG_GET(D1VGA_CONTROL, D1VGA_MODE_ENABLE, &in_vga1_mode);
+       REG_GET(D2VGA_CONTROL, D2VGA_MODE_ENABLE, &in_vga2_mode);
+       REG_GET(D3VGA_CONTROL, D3VGA_MODE_ENABLE, &in_vga3_mode);
+       REG_GET(D4VGA_CONTROL, D4VGA_MODE_ENABLE, &in_vga4_mode);
+
+       if (in_vga1_mode == 0 && in_vga2_mode == 0 &&
+                       in_vga3_mode == 0 && in_vga4_mode == 0)
+               return;
+
        REG_WRITE(D1VGA_CONTROL, 0);
        REG_WRITE(D2VGA_CONTROL, 0);
        REG_WRITE(D3VGA_CONTROL, 0);
        REG_WRITE(D4VGA_CONTROL, 0);
+
+       /* HW Engineer's Notes:
+        *  During switch from vga->extended, if we set the VGA_TEST_ENABLE and
+        *  then hit the VGA_TEST_RENDER_START, then the DCHUBP timing gets updated correctly.
+        *
+        *  Then vBIOS will have it poll for the VGA_TEST_RENDER_DONE and unset
+        *  VGA_TEST_ENABLE, to leave it in the same state as before.
+        */
+       REG_UPDATE(VGA_TEST_CONTROL, VGA_TEST_ENABLE, 1);
+       REG_UPDATE(VGA_TEST_CONTROL, VGA_TEST_RENDER_START, 1);
 }
 
 static void dpp_pg_control(
@@ -1761,6 +1785,11 @@ static void update_dchubp_dpp(
                        &pipe_ctx->plane_res.scl_data.viewport_c);
        }
 
+       if (pipe_ctx->stream->cursor_attributes.address.quad_part != 0) {
+               dc->hwss.set_cursor_position(pipe_ctx);
+               dc->hwss.set_cursor_attribute(pipe_ctx);
+       }
+
        if (plane_state->update_flags.bits.full_update) {
                /*gamut remap*/
                program_gamut_remap(pipe_ctx);
@@ -2296,7 +2325,7 @@ static bool dcn10_dummy_display_power_gating(
        return true;
 }
 
-void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
+static void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
 {
        struct dc_plane_state *plane_state = pipe_ctx->plane_state;
        struct timing_generator *tg = pipe_ctx->stream_res.tg;
@@ -2316,12 +2345,46 @@ void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx)
        }
 }
 
-void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data)
+static void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data)
 {
        if (hws->ctx->dc->res_pool->hubbub != NULL)
                hubbub1_update_dchub(hws->ctx->dc->res_pool->hubbub, dh_data);
 }
 
+static void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
+{
+       struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
+       struct hubp *hubp = pipe_ctx->plane_res.hubp;
+       struct dpp *dpp = pipe_ctx->plane_res.dpp;
+       struct dc_cursor_mi_param param = {
+               .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz,
+               .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clock_inKhz,
+               .viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x,
+               .viewport_width = pipe_ctx->plane_res.scl_data.viewport.width,
+               .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz
+       };
+
+       if (pipe_ctx->plane_state->address.type
+                       == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
+               pos_cpy.enable = false;
+
+       if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
+               pos_cpy.enable = false;
+
+       hubp->funcs->set_cursor_position(hubp, &pos_cpy, &param);
+       dpp->funcs->set_cursor_position(dpp, &pos_cpy, &param, hubp->curs_attr.width);
+}
+
+static void dcn10_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
+{
+       struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes;
+
+       pipe_ctx->plane_res.hubp->funcs->set_cursor_attributes(
+                       pipe_ctx->plane_res.hubp, attributes);
+       pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes(
+               pipe_ctx->plane_res.dpp, attributes->color_format);
+}
+
 static const struct hw_sequencer_funcs dcn10_funcs = {
        .program_gamut_remap = program_gamut_remap,
        .program_csc_matrix = program_csc_matrix,
@@ -2362,6 +2425,8 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
        .edp_backlight_control = hwss_edp_backlight_control,
        .edp_power_control = hwss_edp_power_control,
        .edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready,
+       .set_cursor_position = dcn10_set_cursor_position,
+       .set_cursor_attribute = dcn10_set_cursor_attribute
 };
 
 
index 0fd329deacd8a047c6302e092075972698191fff..54d8a13861423483dfc01050a78edc1d190424da 100644 (file)
@@ -123,8 +123,7 @@ struct link_encoder_funcs {
        void (*enable_tmds_output)(struct link_encoder *enc,
                enum clock_source_id clock_source,
                enum dc_color_depth color_depth,
-               bool hdmi,
-               bool dual_link,
+               enum signal_type signal,
                uint32_t pixel_clock);
        void (*enable_dp_output)(struct link_encoder *enc,
                const struct dc_link_settings *link_settings,
index 4c0aa56f7bae255e42b18db495efba17b9868971..379c6ecd271a5919b87640dff21edb96caf36933 100644 (file)
@@ -198,6 +198,9 @@ struct hw_sequencer_funcs {
                        bool enable);
        void (*edp_wait_for_hpd_ready)(struct dc_link *link, bool power_up);
 
+       void (*set_cursor_position)(struct pipe_ctx *pipe);
+       void (*set_cursor_attribute)(struct pipe_ctx *pipe);
+
 };
 
 void color_space_to_black_color(
index f7e40b292dfbbbdc5b81dfd8a444a63f42542a5b..d3e1923b01a8d0eaa076b6617ba659c459c4810e 100644 (file)
@@ -217,7 +217,7 @@ bool dce110_vblank_set(
                        core_dc->current_state->res_ctx.pipe_ctx[pipe_offset].stream_res.tg;
 
        if (enable) {
-               if (!tg->funcs->arm_vert_intr(tg, 2)) {
+               if (!tg || !tg->funcs->arm_vert_intr(tg, 2)) {
                        DC_ERROR("Failed to get VBLANK!\n");
                        return false;
                }
index 57a54a7b89e5f73d5f67dcb8177368ccc0f8acd0..1c079ba37c3006cd7f51c49ea74599575ed46e05 100644 (file)
@@ -42,8 +42,7 @@ static void virtual_link_encoder_enable_tmds_output(
        struct link_encoder *enc,
        enum clock_source_id clock_source,
        enum dc_color_depth color_depth,
-       bool hdmi,
-       bool dual_link,
+       enum signal_type signal,
        uint32_t pixel_clock) {}
 
 static void virtual_link_encoder_enable_dp_output(
index 7a9b43f84a31636bbb958597a3b66cbabf792810..36bbad5942674bcda382624f2d13b2fe3f6b304e 100644 (file)
@@ -419,11 +419,6 @@ struct bios_event_info {
        bool backlight_changed;
 };
 
-enum {
-       HDMI_PIXEL_CLOCK_IN_KHZ_297 = 297000,
-       TMDS_PIXEL_CLOCK_IN_KHZ_165 = 165000
-};
-
 /*
  * DFS-bypass flag
  */
index b5ebde642207431080f62db9adbbf18491f6a33b..199c5db67cbca1e0ed7aeba6a721c92d4ba25468 100644 (file)
 #ifndef __DC_SIGNAL_TYPES_H__
 #define __DC_SIGNAL_TYPES_H__
 
+/* Minimum pixel clock, in KHz. For TMDS signal is 25.00 MHz */
+#define TMDS_MIN_PIXEL_CLOCK 25000
+/* Maximum pixel clock, in KHz. For TMDS signal is 165.00 MHz */
+#define TMDS_MAX_PIXEL_CLOCK 165000
+
 enum signal_type {
        SIGNAL_TYPE_NONE                = 0L,           /* no signal */
        SIGNAL_TYPE_DVI_SINGLE_LINK     = (1 << 0),
index 4c3223a4d62b04d0264686e2eb6e493e6daf7c23..adb6e7b9280ce48650b4bb287a1b122e4c06002b 100644 (file)
@@ -162,7 +162,7 @@ static int pp_hw_init(void *handle)
                if(hwmgr->smumgr_funcs->start_smu(pp_handle->hwmgr)) {
                        pr_err("smc start failed\n");
                        hwmgr->smumgr_funcs->smu_fini(pp_handle->hwmgr);
-                       return -EINVAL;;
+                       return -EINVAL;
                }
                if (ret == PP_DPM_DISABLED)
                        goto exit;
index 41e42beff213905837c15db1841800b2021ec502..08e8a793714f26dc38e571a51b52f8421bb72444 100644 (file)
@@ -2756,10 +2756,13 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
                                    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
 
-       disable_mclk_switching = ((1 < info.display_count) ||
-                                 disable_mclk_switching_for_frame_lock ||
-                                 smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
-                                 (mode_info.refresh_rate > 120));
+       if (info.display_count == 0)
+               disable_mclk_switching = false;
+       else
+               disable_mclk_switching = ((1 < info.display_count) ||
+                                         disable_mclk_switching_for_frame_lock ||
+                                         smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
+                                         (mode_info.refresh_rate > 120));
 
        sclk = smu7_ps->performance_levels[0].engine_clock;
        mclk = smu7_ps->performance_levels[0].memory_clock;
@@ -4534,13 +4537,6 @@ static int smu7_set_power_profile_state(struct pp_hwmgr *hwmgr,
        int tmp_result, result = 0;
        uint32_t sclk_mask = 0, mclk_mask = 0;
 
-       if (hwmgr->chip_id == CHIP_FIJI) {
-               if (request->type == AMD_PP_GFX_PROFILE)
-                       smu7_enable_power_containment(hwmgr);
-               else if (request->type == AMD_PP_COMPUTE_PROFILE)
-                       smu7_disable_power_containment(hwmgr);
-       }
-
        if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_AUTO)
                return -EINVAL;
 
index 2d55dabc77d4112d0b1c50bf1e1ae38ac0505dd0..5f9c3efb532f6c48471b75a0e5510369b98fffff 100644 (file)
@@ -3168,10 +3168,13 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
        disable_mclk_switching_for_vr = PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR);
        force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh);
 
-       disable_mclk_switching = (info.display_count > 1) ||
-                                   disable_mclk_switching_for_frame_lock ||
-                                   disable_mclk_switching_for_vr ||
-                                   force_mclk_high;
+       if (info.display_count == 0)
+               disable_mclk_switching = false;
+       else
+               disable_mclk_switching = (info.display_count > 1) ||
+                       disable_mclk_switching_for_frame_lock ||
+                       disable_mclk_switching_for_vr ||
+                       force_mclk_high;
 
        sclk = vega10_ps->performance_levels[0].gfx_clock;
        mclk = vega10_ps->performance_levels[0].mem_clock;
index 5f4c2e833a650dd6be2e6afb5e9835cf7e434e17..d665dd5af5dd80f2348dd1290c41ecb4756ab9d7 100644 (file)
@@ -97,7 +97,7 @@ static const struct ast_vbios_dclk_info dclk_table[] = {
        {0x67, 0x22, 0x00},                     /* 0E: VCLK157_5        */
        {0x6A, 0x22, 0x00},                     /* 0F: VCLK162          */
        {0x4d, 0x4c, 0x80},                     /* 10: VCLK154          */
-       {0xa7, 0x78, 0x80},                     /* 11: VCLK83.5         */
+       {0x68, 0x6f, 0x80},                     /* 11: VCLK83.5         */
        {0x28, 0x49, 0x80},                     /* 12: VCLK106.5        */
        {0x37, 0x49, 0x80},                     /* 13: VCLK146.25       */
        {0x1f, 0x45, 0x80},                     /* 14: VCLK148.5        */
@@ -127,7 +127,7 @@ static const struct ast_vbios_dclk_info dclk_table_ast2500[] = {
        {0x67, 0x22, 0x00},                     /* 0E: VCLK157_5        */
        {0x6A, 0x22, 0x00},                     /* 0F: VCLK162          */
        {0x4d, 0x4c, 0x80},                     /* 10: VCLK154          */
-       {0xa7, 0x78, 0x80},                     /* 11: VCLK83.5         */
+       {0x68, 0x6f, 0x80},                     /* 11: VCLK83.5         */
        {0x28, 0x49, 0x80},                     /* 12: VCLK106.5        */
        {0x37, 0x49, 0x80},                     /* 13: VCLK146.25       */
        {0x1f, 0x45, 0x80},                     /* 14: VCLK148.5        */
index 5a13ff29f4f04af99c61fa07261a35a44ac0dd8e..2dc5e8bed17214f187fdbe93e28b72cceeb6b376 100644 (file)
@@ -121,6 +121,10 @@ int drm_mode_addfb(struct drm_device *dev,
        r.pixel_format = drm_mode_legacy_fb_format(or->bpp, or->depth);
        r.handles[0] = or->handle;
 
+       if (r.pixel_format == DRM_FORMAT_XRGB2101010 &&
+           dev->driver->driver_features & DRIVER_PREFER_XBGR_30BPP)
+               r.pixel_format = DRM_FORMAT_XBGR2101010;
+
        ret = drm_mode_addfb2(dev, &r, file_priv);
        if (ret)
                return ret;
@@ -457,6 +461,12 @@ int drm_mode_getfb(struct drm_device *dev,
        if (!fb)
                return -ENOENT;
 
+       /* Multi-planar framebuffers need getfb2. */
+       if (fb->format->num_planes > 1) {
+               ret = -EINVAL;
+               goto out;
+       }
+
        r->height = fb->height;
        r->width = fb->width;
        r->depth = fb->format->depth;
@@ -480,6 +490,7 @@ int drm_mode_getfb(struct drm_device *dev,
                ret = -ENODEV;
        }
 
+out:
        drm_framebuffer_put(fb);
 
        return ret;
index c8454ac43fae0fbdece898d38abc9c8e7059582b..db6b94dda5dfaede1ebb97cc21b17ea301fdd275 100644 (file)
@@ -471,6 +471,7 @@ struct parser_exec_state {
         * used when ret from 2nd level batch buffer
         */
        int saved_buf_addr_type;
+       bool is_ctx_wa;
 
        struct cmd_info *info;
 
@@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s)
        bb->accessing = true;
        bb->bb_start_cmd_va = s->ip_va;
 
+       if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
+               bb->bb_offset = s->ip_va - s->rb_va;
+       else
+               bb->bb_offset = 0;
+
        /*
         * ip_va saves the virtual address of the shadow batch buffer, while
         * ip_gma saves the graphics address of the original batch buffer.
@@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload)
        s.ring_tail = gma_tail;
        s.rb_va = workload->shadow_ring_buffer_va;
        s.workload = workload;
+       s.is_ctx_wa = false;
 
        if ((bypass_scan_mask & (1 << workload->ring_id)) ||
                gma_head == gma_tail)
@@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
        s.ring_tail = gma_tail;
        s.rb_va = wa_ctx->indirect_ctx.shadow_va;
        s.workload = workload;
+       s.is_ctx_wa = true;
 
        if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
                ret = -EINVAL;
index 256f1bb522b7a2edb5490c81be29aa46774e3ed1..152df3d0291e543fa9ae9eb923e773df62cb10ef 100644 (file)
@@ -394,9 +394,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
         * performace for batch mmio read/write, so we need
         * handle forcewake mannually.
         */
+       intel_runtime_pm_get(dev_priv);
        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
        switch_mmio(pre, next, ring_id);
        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+       intel_runtime_pm_put(dev_priv);
 }
 
 /**
index b55b3580ca1dd00402374354f3d12918817de378..d74d6f05c62c4e60d4b1604707cd79ffa992d041 100644 (file)
@@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
                pdp_pair[i].val = pdp[7 - i];
 }
 
+/*
+ * when populating shadow ctx from guest, we should not overrride oa related
+ * registers, so that they will not be overlapped by guest oa configs. Thus
+ * made it possible to capture oa data from host for both host and guests.
+ */
+static void sr_oa_regs(struct intel_vgpu_workload *workload,
+               u32 *reg_state, bool save)
+{
+       struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+       u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+       u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+       int i = 0;
+       u32 flex_mmio[] = {
+               i915_mmio_reg_offset(EU_PERF_CNTL0),
+               i915_mmio_reg_offset(EU_PERF_CNTL1),
+               i915_mmio_reg_offset(EU_PERF_CNTL2),
+               i915_mmio_reg_offset(EU_PERF_CNTL3),
+               i915_mmio_reg_offset(EU_PERF_CNTL4),
+               i915_mmio_reg_offset(EU_PERF_CNTL5),
+               i915_mmio_reg_offset(EU_PERF_CNTL6),
+       };
+
+       if (!workload || !reg_state || workload->ring_id != RCS)
+               return;
+
+       if (save) {
+               workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+
+               for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+                       u32 state_offset = ctx_flexeu0 + i * 2;
+
+                       workload->flex_mmio[i] = reg_state[state_offset + 1];
+               }
+       } else {
+               reg_state[ctx_oactxctrl] =
+                       i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+               reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+
+               for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+                       u32 state_offset = ctx_flexeu0 + i * 2;
+                       u32 mmio = flex_mmio[i];
+
+                       reg_state[state_offset] = mmio;
+                       reg_state[state_offset + 1] = workload->flex_mmio[i];
+               }
+       }
+}
+
 static int populate_shadow_context(struct intel_vgpu_workload *workload)
 {
        struct intel_vgpu *vgpu = workload->vgpu;
@@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
        page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
        shadow_ring_context = kmap(page);
 
+       sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
 #define COPY_REG(name) \
        intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
                + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
@@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
                        sizeof(*shadow_ring_context),
                        I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
 
+       sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
        kunmap(page);
        return 0;
 }
@@ -376,6 +426,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
                        goto err;
                }
 
+               /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
+                * is only updated into ring_scan_buffer, not real ring address
+                * allocated in later copy_workload_to_ring_buffer. pls be noted
+                * shadow_ring_buffer_va is now pointed to real ring buffer va
+                * in copy_workload_to_ring_buffer.
+                */
+
+               if (bb->bb_offset)
+                       bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+                               + bb->bb_offset;
+
                /* relocate shadow batch buffer */
                bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
                if (gmadr_bytes == 8)
@@ -1044,10 +1105,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 
        bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
 
-       s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
-                       sizeof(struct intel_vgpu_workload), 0,
-                       SLAB_HWCACHE_ALIGN,
-                       NULL);
+       s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
+                                                 sizeof(struct intel_vgpu_workload), 0,
+                                                 SLAB_HWCACHE_ALIGN,
+                                                 offsetof(struct intel_vgpu_workload, rb_tail),
+                                                 sizeof_field(struct intel_vgpu_workload, rb_tail),
+                                                 NULL);
 
        if (!s->workloads) {
                ret = -ENOMEM;
index ff175a98b19ed41c6cdd8405a8872d4bdb6003dd..a79a4f60637ebbe451f7f2f0442ca663deba3bdf 100644 (file)
@@ -110,6 +110,10 @@ struct intel_vgpu_workload {
        /* shadow batch buffer */
        struct list_head shadow_bb;
        struct intel_shadow_wa_ctx wa_ctx;
+
+       /* oa registers */
+       u32 oactxctrl;
+       u32 flex_mmio[7];
 };
 
 struct intel_vgpu_shadow_bb {
@@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb {
        u32 *bb_start_cmd_va;
        unsigned int clflush;
        bool accessing;
+       unsigned long bb_offset;
 };
 
 #define workload_q_head(vgpu, ring_id) \
index dd89abd2263d20334403ae4c4b1ef61af5482135..6ff5d655c20249b27926dea99b4243a8ed2f122a 100644 (file)
@@ -434,20 +434,28 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
                        dma_fence_put(shared[i]);
                kfree(shared);
 
+               /*
+                * If both shared fences and an exclusive fence exist,
+                * then by construction the shared fences must be later
+                * than the exclusive fence. If we successfully wait for
+                * all the shared fences, we know that the exclusive fence
+                * must all be signaled. If all the shared fences are
+                * signaled, we can prune the array and recover the
+                * floating references on the fences/requests.
+                */
                prune_fences = count && timeout >= 0;
        } else {
                excl = reservation_object_get_excl_rcu(resv);
        }
 
-       if (excl && timeout >= 0) {
+       if (excl && timeout >= 0)
                timeout = i915_gem_object_wait_fence(excl, flags, timeout,
                                                     rps_client);
-               prune_fences = timeout >= 0;
-       }
 
        dma_fence_put(excl);
 
-       /* Oportunistically prune the fences iff we know they have *all* been
+       /*
+        * Opportunistically prune the fences iff we know they have *all* been
         * signaled and that the reservation object has not been changed (i.e.
         * no new fences have been added).
         */
@@ -3205,8 +3213,10 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
         * rolling the global seqno forward (since this would complete requests
         * for which we haven't set the fence error to EIO yet).
         */
-       for_each_engine(engine, i915, id)
+       for_each_engine(engine, i915, id) {
+               i915_gem_reset_prepare_engine(engine);
                engine->submit_request = nop_submit_request;
+       }
 
        /*
         * Make sure no one is running the old callback before we proceed with
@@ -3244,6 +3254,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
                intel_engine_init_global_seqno(engine,
                                               intel_engine_last_submit(engine));
                spin_unlock_irqrestore(&engine->timeline->lock, flags);
+
+               i915_gem_reset_finish_engine(engine);
        }
 
        set_bit(I915_WEDGED, &i915->gpu_error.flags);
index 4401068ff468ad36aef1d5df5277e2f008f01bb3..3ab1ace2a6bdd8a4be32828e58262d9ef9e20ae7 100644 (file)
@@ -505,6 +505,8 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
                list_add_tail(&vma->exec_link, &eb->unbound);
                if (drm_mm_node_allocated(&vma->node))
                        err = i915_vma_unbind(vma);
+               if (unlikely(err))
+                       vma->exec_flags = NULL;
        }
        return err;
 }
@@ -2410,7 +2412,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        if (out_fence) {
                if (err == 0) {
                        fd_install(out_fence_fd, out_fence->file);
-                       args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
+                       args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
                        args->rsvd2 |= (u64)out_fence_fd << 32;
                        out_fence_fd = -1;
                } else {
index e09d18df8b7f18ca0b17f65797e5cbdc4baec072..a3e93d46316a267571f199cfcbd665434f41003f 100644 (file)
@@ -476,8 +476,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
        GEM_BUG_ON(!irqs_disabled());
        lockdep_assert_held(&engine->timeline->lock);
 
-       trace_i915_gem_request_execute(request);
-
        /* Transfer from per-context onto the global per-engine timeline */
        timeline = engine->timeline;
        GEM_BUG_ON(timeline == request->timeline);
@@ -501,6 +499,8 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
        list_move_tail(&request->link, &timeline->requests);
        spin_unlock(&request->timeline->lock);
 
+       trace_i915_gem_request_execute(request);
+
        wake_up_all(&request->execute);
 }
 
index 0be50e43507de0e15ef06245335940eed75d3f6b..f8fe5ffcdcfff8cb27667efdcec2914eb3b7b06b 100644 (file)
@@ -1303,9 +1303,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
         */
        mutex_lock(&dev_priv->drm.struct_mutex);
        dev_priv->perf.oa.exclusive_stream = NULL;
-       mutex_unlock(&dev_priv->drm.struct_mutex);
-
        dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+       mutex_unlock(&dev_priv->drm.struct_mutex);
 
        free_oa_buffer(dev_priv);
 
@@ -1756,22 +1755,13 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
  * Note: it's only the RCS/Render context that has any OA state.
  */
 static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
-                                      const struct i915_oa_config *oa_config,
-                                      bool interruptible)
+                                      const struct i915_oa_config *oa_config)
 {
        struct i915_gem_context *ctx;
        int ret;
        unsigned int wait_flags = I915_WAIT_LOCKED;
 
-       if (interruptible) {
-               ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-               if (ret)
-                       return ret;
-
-               wait_flags |= I915_WAIT_INTERRUPTIBLE;
-       } else {
-               mutex_lock(&dev_priv->drm.struct_mutex);
-       }
+       lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
        /* Switch away from any user context. */
        ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
@@ -1819,8 +1809,6 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
        }
 
  out:
-       mutex_unlock(&dev_priv->drm.struct_mutex);
-
        return ret;
 }
 
@@ -1863,7 +1851,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
         * to make sure all slices/subslices are ON before writing to NOA
         * registers.
         */
-       ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
+       ret = gen8_configure_all_contexts(dev_priv, oa_config);
        if (ret)
                return ret;
 
@@ -1878,7 +1866,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
 static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
 {
        /* Reset all contexts' slices/subslices configurations. */
-       gen8_configure_all_contexts(dev_priv, NULL, false);
+       gen8_configure_all_contexts(dev_priv, NULL);
 
        I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
                                      ~GT_NOA_ENABLE));
@@ -1888,7 +1876,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
 static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
 {
        /* Reset all contexts' slices/subslices configurations. */
-       gen8_configure_all_contexts(dev_priv, NULL, false);
+       gen8_configure_all_contexts(dev_priv, NULL);
 
        /* Make sure we disable noa to save power. */
        I915_WRITE(RPM_CONFIG1,
@@ -2138,6 +2126,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
        if (ret)
                goto err_oa_buf_alloc;
 
+       ret = i915_mutex_lock_interruptible(&dev_priv->drm);
+       if (ret)
+               goto err_lock;
+
        ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
                                                      stream->oa_config);
        if (ret)
@@ -2145,23 +2137,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 
        stream->ops = &i915_oa_stream_ops;
 
-       /* Lock device for exclusive_stream access late because
-        * enable_metric_set() might lock as well on gen8+.
-        */
-       ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-       if (ret)
-               goto err_lock;
-
        dev_priv->perf.oa.exclusive_stream = stream;
 
        mutex_unlock(&dev_priv->drm.struct_mutex);
 
        return 0;
 
-err_lock:
+err_enable:
        dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+       mutex_unlock(&dev_priv->drm.struct_mutex);
 
-err_enable:
+err_lock:
        free_oa_buffer(dev_priv);
 
 err_oa_buf_alloc:
index a2108e35c599982831cab1eab39c6b6721af1d80..33eb0c5b1d3244d944a9e80fa883352952cb5ac0 100644 (file)
@@ -2027,7 +2027,7 @@ enum i915_power_well_id {
 #define _CNL_PORT_TX_DW5_LN0_AE                0x162454
 #define _CNL_PORT_TX_DW5_LN0_B         0x162654
 #define _CNL_PORT_TX_DW5_LN0_C         0x162C54
-#define _CNL_PORT_TX_DW5_LN0_D         0x162ED4
+#define _CNL_PORT_TX_DW5_LN0_D         0x162E54
 #define _CNL_PORT_TX_DW5_LN0_F         0x162854
 #define CNL_PORT_TX_DW5_GRP(port)      _MMIO_PORT6(port, \
                                                    _CNL_PORT_TX_DW5_GRP_AE, \
@@ -2058,7 +2058,7 @@ enum i915_power_well_id {
 #define _CNL_PORT_TX_DW7_LN0_AE                0x16245C
 #define _CNL_PORT_TX_DW7_LN0_B         0x16265C
 #define _CNL_PORT_TX_DW7_LN0_C         0x162C5C
-#define _CNL_PORT_TX_DW7_LN0_D         0x162EDC
+#define _CNL_PORT_TX_DW7_LN0_D         0x162E5C
 #define _CNL_PORT_TX_DW7_LN0_F         0x16285C
 #define CNL_PORT_TX_DW7_GRP(port)      _MMIO_PORT6(port, \
                                                    _CNL_PORT_TX_DW7_GRP_AE, \
index b33d2158c234df68fc0a983b58daa37d74a74575..e5e6f6bb2b05a62039652c260a2accd51e2b2c95 100644 (file)
@@ -304,8 +304,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 {
        struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
        struct intel_rps *rps = &dev_priv->gt_pm.rps;
-       u32 val;
+       bool boost = false;
        ssize_t ret;
+       u32 val;
 
        ret = kstrtou32(buf, 0, &val);
        if (ret)
@@ -317,8 +318,13 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
                return -EINVAL;
 
        mutex_lock(&dev_priv->pcu_lock);
-       rps->boost_freq = val;
+       if (val != rps->boost_freq) {
+               rps->boost_freq = val;
+               boost = atomic_read(&rps->num_waiters);
+       }
        mutex_unlock(&dev_priv->pcu_lock);
+       if (boost)
+               schedule_work(&rps->work);
 
        return count;
 }
index 522d54fecb53489193eb2b72dbf9fdd41009ac67..4a01f62a392dd18569ca0f3503c87bd1bbbe59b9 100644 (file)
@@ -779,11 +779,11 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *dev_priv,
 {
        struct intel_encoder *encoder;
 
-       if (WARN_ON(pipe >= ARRAY_SIZE(dev_priv->av_enc_map)))
-               return NULL;
-
        /* MST */
        if (pipe >= 0) {
+               if (WARN_ON(pipe >= ARRAY_SIZE(dev_priv->av_enc_map)))
+                       return NULL;
+
                encoder = dev_priv->av_enc_map[pipe];
                /*
                 * when bootup, audio driver may not know it is
index f51645a08dcaf489e0668af616fe39d421822a38..6aff9d096e13d08addb7b0bdf143aa9e73c2223b 100644 (file)
@@ -2175,8 +2175,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
                intel_prepare_dp_ddi_buffers(encoder, crtc_state);
 
        intel_ddi_init_dp_buf_reg(encoder);
-       if (!is_mst)
-               intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+       intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
        intel_dp_start_link_train(intel_dp);
        if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
                intel_dp_stop_link_train(intel_dp);
@@ -2274,14 +2273,12 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
        struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
        struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
        struct intel_dp *intel_dp = &dig_port->dp;
-       bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST);
 
        /*
         * Power down sink before disabling the port, otherwise we end
         * up getting interrupts from the sink on detecting link loss.
         */
-       if (!is_mst)
-               intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+       intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
 
        intel_disable_ddi_buf(encoder);
 
index 35c5299feab6862659023ce54f3d99dd45aacbec..a29868cd30c740feec642e801838b5eb5e9a0e95 100644 (file)
@@ -620,19 +620,15 @@ static int
 bxt_power_sequencer_idx(struct intel_dp *intel_dp)
 {
        struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp));
+       int backlight_controller = dev_priv->vbt.backlight.controller;
 
        lockdep_assert_held(&dev_priv->pps_mutex);
 
        /* We should never land here with regular DP ports */
        WARN_ON(!intel_dp_is_edp(intel_dp));
 
-       /*
-        * TODO: BXT has 2 PPS instances. The correct port->PPS instance
-        * mapping needs to be retrieved from VBT, for now just hard-code to
-        * use instance #0 always.
-        */
        if (!intel_dp->pps_reset)
-               return 0;
+               return backlight_controller;
 
        intel_dp->pps_reset = false;
 
@@ -642,7 +638,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp)
         */
        intel_dp_init_panel_power_sequencer_registers(intel_dp, false);
 
-       return 0;
+       return backlight_controller;
 }
 
 typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv,
index 348a4f7ffb674b435bdb77089f9c229abe171ab5..53747318f4a7162fe79f2217108ad889e138b3e7 100644 (file)
@@ -246,7 +246,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
         */
        tmp = I915_READ_CTL(engine);
        if (tmp & RING_WAIT) {
-               i915_handle_error(dev_priv, 0,
+               i915_handle_error(dev_priv, BIT(engine->id),
                                  "Kicking stuck wait on %s",
                                  engine->name);
                I915_WRITE_CTL(engine, tmp);
@@ -258,7 +258,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
                default:
                        return ENGINE_DEAD;
                case 1:
-                       i915_handle_error(dev_priv, 0,
+                       i915_handle_error(dev_priv, ALL_ENGINES,
                                          "Kicking stuck semaphore on %s",
                                          engine->name);
                        I915_WRITE_CTL(engine, tmp);
index 7ece2f061b9e8087ce7f5b22179377435d50af9b..e0fca035ff789be49a9811eade75cd92a67eea3a 100644 (file)
@@ -719,6 +719,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
        struct rb_node *rb;
        unsigned long flags;
 
+       GEM_TRACE("%s\n", engine->name);
+
        spin_lock_irqsave(&engine->timeline->lock, flags);
 
        /* Cancel the requests on the HW and clear the ELSP tracker. */
@@ -765,6 +767,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
         */
        clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 
+       /* Mark all CS interrupts as complete */
+       execlists->active = 0;
+
        spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
index 9a9961802f5c39ce7270217903c550b2f01ed92d..e83af0f2be869a105036bf49c4f1b612acab83ee 100644 (file)
@@ -225,7 +225,11 @@ static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
                                  struct drm_crtc_state *old_crtc_state)
 {
        drm_crtc_vblank_on(crtc);
+}
 
+static void ipu_crtc_atomic_flush(struct drm_crtc *crtc,
+                                 struct drm_crtc_state *old_crtc_state)
+{
        spin_lock_irq(&crtc->dev->event_lock);
        if (crtc->state->event) {
                WARN_ON(drm_crtc_vblank_get(crtc));
@@ -293,6 +297,7 @@ static const struct drm_crtc_helper_funcs ipu_helper_funcs = {
        .mode_set_nofb = ipu_crtc_mode_set_nofb,
        .atomic_check = ipu_crtc_atomic_check,
        .atomic_begin = ipu_crtc_atomic_begin,
+       .atomic_flush = ipu_crtc_atomic_flush,
        .atomic_disable = ipu_crtc_atomic_disable,
        .atomic_enable = ipu_crtc_atomic_enable,
 };
index 57ed56d8623fcb67133f1fe79f390ad45c38c257..d9113faaa62f56400e5e974a5005edc928d0fae3 100644 (file)
@@ -22,6 +22,7 @@
 #include <drm/drm_plane_helper.h>
 
 #include "video/imx-ipu-v3.h"
+#include "imx-drm.h"
 #include "ipuv3-plane.h"
 
 struct ipu_plane_state {
@@ -272,7 +273,7 @@ static void ipu_plane_destroy(struct drm_plane *plane)
        kfree(ipu_plane);
 }
 
-void ipu_plane_state_reset(struct drm_plane *plane)
+static void ipu_plane_state_reset(struct drm_plane *plane)
 {
        struct ipu_plane_state *ipu_state;
 
@@ -292,7 +293,8 @@ void ipu_plane_state_reset(struct drm_plane *plane)
        plane->state = &ipu_state->base;
 }
 
-struct drm_plane_state *ipu_plane_duplicate_state(struct drm_plane *plane)
+static struct drm_plane_state *
+ipu_plane_duplicate_state(struct drm_plane *plane)
 {
        struct ipu_plane_state *state;
 
@@ -306,8 +308,8 @@ struct drm_plane_state *ipu_plane_duplicate_state(struct drm_plane *plane)
        return &state->base;
 }
 
-void ipu_plane_destroy_state(struct drm_plane *plane,
-                            struct drm_plane_state *state)
+static void ipu_plane_destroy_state(struct drm_plane *plane,
+                                   struct drm_plane_state *state)
 {
        struct ipu_plane_state *ipu_state = to_ipu_plane_state(state);
 
index 3e9bba4d66246b10a2f4a914fbddf97513681de0..6d8e3a9a6fc093164adc20e37e7b70db54cfe7bf 100644 (file)
@@ -680,7 +680,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
        } else {
                dev_info(&pdev->dev,
                         "no iommu, fallback to phys contig buffers for scanout\n");
-               aspace = NULL;;
+               aspace = NULL;
        }
 
        pm_runtime_put_sync(&pdev->dev);
index 380f340204e8a844be6541adc8b7c506844c9ba0..debbbf0fd4bdda619732c67952c772f9957c4166 100644 (file)
@@ -134,7 +134,7 @@ nv50_get_intensity(struct backlight_device *bd)
        struct nouveau_encoder *nv_encoder = bl_get_data(bd);
        struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
        struct nvif_object *device = &drm->client.device.object;
-       int or = nv_encoder->or;
+       int or = ffs(nv_encoder->dcb->or) - 1;
        u32 div = 1025;
        u32 val;
 
@@ -149,7 +149,7 @@ nv50_set_intensity(struct backlight_device *bd)
        struct nouveau_encoder *nv_encoder = bl_get_data(bd);
        struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
        struct nvif_object *device = &drm->client.device.object;
-       int or = nv_encoder->or;
+       int or = ffs(nv_encoder->dcb->or) - 1;
        u32 div = 1025;
        u32 val = (bd->props.brightness * div) / 100;
 
@@ -170,7 +170,7 @@ nva3_get_intensity(struct backlight_device *bd)
        struct nouveau_encoder *nv_encoder = bl_get_data(bd);
        struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
        struct nvif_object *device = &drm->client.device.object;
-       int or = nv_encoder->or;
+       int or = ffs(nv_encoder->dcb->or) - 1;
        u32 div, val;
 
        div  = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
@@ -188,7 +188,7 @@ nva3_set_intensity(struct backlight_device *bd)
        struct nouveau_encoder *nv_encoder = bl_get_data(bd);
        struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
        struct nvif_object *device = &drm->client.device.object;
-       int or = nv_encoder->or;
+       int or = ffs(nv_encoder->dcb->or) - 1;
        u32 div, val;
 
        div = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
@@ -228,7 +228,7 @@ nv50_backlight_init(struct drm_connector *connector)
                        return -ENODEV;
        }
 
-       if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or)))
+       if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)))
                return 0;
 
        if (drm->client.device.info.chipset <= 0xa0 ||
@@ -268,13 +268,13 @@ nouveau_backlight_init(struct drm_device *dev)
        struct nvif_device *device = &drm->client.device;
        struct drm_connector *connector;
 
+       INIT_LIST_HEAD(&drm->bl_connectors);
+
        if (apple_gmux_present()) {
                NV_INFO(drm, "Apple GMUX detected: not registering Nouveau backlight interface\n");
                return 0;
        }
 
-       INIT_LIST_HEAD(&drm->bl_connectors);
-
        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
                if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS &&
                    connector->connector_type != DRM_MODE_CONNECTOR_eDP)
index dd8d4352ed998e7f09d36a981098edae84830808..caddce88d2d8b3e5eaf0528c609fc2a190c8294f 100644 (file)
@@ -4477,6 +4477,7 @@ nv50_display_create(struct drm_device *dev)
        nouveau_display(dev)->fini = nv50_display_fini;
        disp->disp = &nouveau_display(dev)->disp;
        dev->mode_config.funcs = &nv50_disp_func;
+       dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP;
        if (nouveau_atomic)
                dev->driver->driver_features |= DRIVER_ATOMIC;
 
index 93946dcee3191be1418ecf332ef7135854849a09..1c12e58f44c2684b5ce0d84a18e286b55a01235f 100644 (file)
@@ -1354,7 +1354,7 @@ nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse,
 
                tail = this->addr + this->size;
                if (vmm->func->page_block && next && next->page != p)
-                       tail = ALIGN_DOWN(addr, vmm->func->page_block);
+                       tail = ALIGN_DOWN(tail, vmm->func->page_block);
 
                if (addr <= tail && tail - addr >= size) {
                        rb_erase(&this->tree, &vmm->free);
index d3045a371a557261776ff32a88aca99b8836c538..7c73bc7e2f854e815aeb249bb4f80dbb932794f4 100644 (file)
@@ -3221,35 +3221,8 @@ static void cik_gpu_init(struct radeon_device *rdev)
        case CHIP_KAVERI:
                rdev->config.cik.max_shader_engines = 1;
                rdev->config.cik.max_tile_pipes = 4;
-               if ((rdev->pdev->device == 0x1304) ||
-                   (rdev->pdev->device == 0x1305) ||
-                   (rdev->pdev->device == 0x130C) ||
-                   (rdev->pdev->device == 0x130F) ||
-                   (rdev->pdev->device == 0x1310) ||
-                   (rdev->pdev->device == 0x1311) ||
-                   (rdev->pdev->device == 0x131C)) {
-                       rdev->config.cik.max_cu_per_sh = 8;
-                       rdev->config.cik.max_backends_per_se = 2;
-               } else if ((rdev->pdev->device == 0x1309) ||
-                          (rdev->pdev->device == 0x130A) ||
-                          (rdev->pdev->device == 0x130D) ||
-                          (rdev->pdev->device == 0x1313) ||
-                          (rdev->pdev->device == 0x131D)) {
-                       rdev->config.cik.max_cu_per_sh = 6;
-                       rdev->config.cik.max_backends_per_se = 2;
-               } else if ((rdev->pdev->device == 0x1306) ||
-                          (rdev->pdev->device == 0x1307) ||
-                          (rdev->pdev->device == 0x130B) ||
-                          (rdev->pdev->device == 0x130E) ||
-                          (rdev->pdev->device == 0x1315) ||
-                          (rdev->pdev->device == 0x1318) ||
-                          (rdev->pdev->device == 0x131B)) {
-                       rdev->config.cik.max_cu_per_sh = 4;
-                       rdev->config.cik.max_backends_per_se = 1;
-               } else {
-                       rdev->config.cik.max_cu_per_sh = 3;
-                       rdev->config.cik.max_backends_per_se = 1;
-               }
+               rdev->config.cik.max_cu_per_sh = 8;
+               rdev->config.cik.max_backends_per_se = 2;
                rdev->config.cik.max_sh_per_se = 1;
                rdev->config.cik.max_texture_channel_caches = 4;
                rdev->config.cik.max_gprs = 256;
index 2e2ca3c6b47d374fc6650d15bfab5d7934740a2f..df9469a8fdb1689ec2a5f8b4c3e1a5995b4e4e82 100644 (file)
@@ -90,25 +90,18 @@ void radeon_connector_hotplug(struct drm_connector *connector)
                /* don't do anything if sink is not display port, i.e.,
                 * passive dp->(dvi|hdmi) adaptor
                 */
-               if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
-                       int saved_dpms = connector->dpms;
-                       /* Only turn off the display if it's physically disconnected */
-                       if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
-                               drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-                       } else if (radeon_dp_needs_link_train(radeon_connector)) {
-                               /* Don't try to start link training before we
-                                * have the dpcd */
-                               if (!radeon_dp_getdpcd(radeon_connector))
-                                       return;
-
-                               /* set it to OFF so that drm_helper_connector_dpms()
-                                * won't return immediately since the current state
-                                * is ON at this point.
-                                */
-                               connector->dpms = DRM_MODE_DPMS_OFF;
-                               drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-                       }
-                       connector->dpms = saved_dpms;
+               if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
+                   radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) &&
+                   radeon_dp_needs_link_train(radeon_connector)) {
+                       /* Don't start link training before we have the DPCD */
+                       if (!radeon_dp_getdpcd(radeon_connector))
+                               return;
+
+                       /* Turn the connector off and back on immediately, which
+                        * will trigger link training
+                        */
+                       drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+                       drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
                }
        }
 }
index 8d3e3d2e0090938c2ec895d0836871622fad0bd7..7828a5e1062999b1bae507440b6dd8847095546b 100644 (file)
@@ -1365,6 +1365,10 @@ int radeon_device_init(struct radeon_device *rdev,
        if ((rdev->flags & RADEON_IS_PCI) &&
            (rdev->family <= CHIP_RS740))
                rdev->need_dma32 = true;
+#ifdef CONFIG_PPC64
+       if (rdev->family == CHIP_CEDAR)
+               rdev->need_dma32 = true;
+#endif
 
        dma_bits = rdev->need_dma32 ? 32 : 40;
        r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
index a9962ffba720b784b24de5f5f117f330b19e5710..27d8e7dd2d0676c4f369041be3bcb5b95774a1b3 100644 (file)
@@ -34,8 +34,6 @@ void radeon_gem_object_free(struct drm_gem_object *gobj)
        struct radeon_bo *robj = gem_to_radeon_bo(gobj);
 
        if (robj) {
-               if (robj->gem_base.import_attach)
-                       drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg);
                radeon_mn_unregister(robj);
                radeon_bo_unref(&robj);
        }
index 15404af9d740612d6882f832c7373eea04ec9f71..31f5ad605e59f694bad78c5f18ffa211c422dfef 100644 (file)
@@ -82,6 +82,8 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
        mutex_unlock(&bo->rdev->gem.mutex);
        radeon_bo_clear_surface_reg(bo);
        WARN_ON_ONCE(!list_empty(&bo->va));
+       if (bo->gem_base.import_attach)
+               drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
        drm_gem_object_release(&bo->gem_base);
        kfree(bo);
 }
index 326ad068c15aa63ef38a6c85fbcc368ad52bd801..4b6542538ff91581272deadb6e971c50f2429033 100644 (file)
@@ -47,7 +47,6 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev);
 static bool radeon_pm_debug_check_in_vbl(struct radeon_device *rdev, bool finish);
 static void radeon_pm_update_profile(struct radeon_device *rdev);
 static void radeon_pm_set_clocks(struct radeon_device *rdev);
-static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev);
 
 int radeon_pm_get_type_index(struct radeon_device *rdev,
                             enum radeon_pm_state_type ps_type,
@@ -80,8 +79,6 @@ void radeon_pm_acpi_event_handler(struct radeon_device *rdev)
                                radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power);
                }
                mutex_unlock(&rdev->pm.mutex);
-               /* allow new DPM state to be picked */
-               radeon_pm_compute_clocks_dpm(rdev);
        } else if (rdev->pm.pm_method == PM_METHOD_PROFILE) {
                if (rdev->pm.profile == PM_PROFILE_AUTO) {
                        mutex_lock(&rdev->pm.mutex);
@@ -885,8 +882,7 @@ static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev,
                dpm_state = POWER_STATE_TYPE_INTERNAL_3DPERF;
        /* balanced states don't exist at the moment */
        if (dpm_state == POWER_STATE_TYPE_BALANCED)
-               dpm_state = rdev->pm.dpm.ac_power ?
-                       POWER_STATE_TYPE_PERFORMANCE : POWER_STATE_TYPE_BATTERY;
+               dpm_state = POWER_STATE_TYPE_PERFORMANCE;
 
 restart_search:
        /* Pick the best power state based on current conditions */
index 2c18996d59c58e6247a24936da9dc155f76d78cf..0d95888ccc3e778bb9752376682c5cccba525e7b 100644 (file)
@@ -461,7 +461,7 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo
 {
        struct drm_sched_job *s_job;
        struct drm_sched_entity *entity, *tmp;
-       int i;;
+       int i;
 
        spin_lock(&sched->job_list_lock);
        list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
index 5decae0069d0bfda0f45a0c78ea033f1186282c6..78cbc3145e44063426ed40c09b5fadbb35bbdd75 100644 (file)
@@ -93,6 +93,8 @@ static void sun4i_crtc_atomic_disable(struct drm_crtc *crtc,
 
        DRM_DEBUG_DRIVER("Disabling the CRTC\n");
 
+       drm_crtc_vblank_off(crtc);
+
        sun4i_tcon_set_status(scrtc->tcon, encoder, false);
 
        if (crtc->state->event && !crtc->state->active) {
@@ -113,6 +115,8 @@ static void sun4i_crtc_atomic_enable(struct drm_crtc *crtc,
        DRM_DEBUG_DRIVER("Enabling the CRTC\n");
 
        sun4i_tcon_set_status(scrtc->tcon, encoder, true);
+
+       drm_crtc_vblank_on(crtc);
 }
 
 static void sun4i_crtc_mode_set_nofb(struct drm_crtc *crtc)
index 023f39bda633de70825243b3a28335e2686091ed..e36004fbe45360deb9487fa80cdd564c33fa030e 100644 (file)
@@ -132,10 +132,13 @@ static int sun4i_dclk_get_phase(struct clk_hw *hw)
 static int sun4i_dclk_set_phase(struct clk_hw *hw, int degrees)
 {
        struct sun4i_dclk *dclk = hw_to_dclk(hw);
+       u32 val = degrees / 120;
+
+       val <<= 28;
 
        regmap_update_bits(dclk->regmap, SUN4I_TCON0_IO_POL_REG,
                           GENMASK(29, 28),
-                          degrees / 120);
+                          val);
 
        return 0;
 }
index 4570da0227b4e49523b7f9c697fce54777dacd7e..d9a71f361b1440fd6a84c4dcbd9f295c7e873a39 100644 (file)
@@ -111,7 +111,7 @@ static int sun4i_drv_bind(struct device *dev)
        /* drm_vblank_init calls kcalloc, which can fail */
        ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
        if (ret)
-               goto free_mem_region;
+               goto cleanup_mode_config;
 
        drm->irq_enabled = true;
 
@@ -139,7 +139,6 @@ static int sun4i_drv_bind(struct device *dev)
        sun4i_framebuffer_free(drm);
 cleanup_mode_config:
        drm_mode_config_cleanup(drm);
-free_mem_region:
        of_reserved_mem_device_release(dev);
 free_drm:
        drm_dev_unref(drm);
index 500b6fb3e0284d2fdfc71265a64f0d5b51fe4f99..fa4bcd092eaf20f9f04faaaf49ca9339f134f385 100644 (file)
@@ -538,7 +538,8 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master,
                                             &sun4i_hdmi_regmap_config);
        if (IS_ERR(hdmi->regmap)) {
                dev_err(dev, "Couldn't create HDMI encoder regmap\n");
-               return PTR_ERR(hdmi->regmap);
+               ret = PTR_ERR(hdmi->regmap);
+               goto err_disable_mod_clk;
        }
 
        ret = sun4i_tmds_create(hdmi);
@@ -551,7 +552,8 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master,
                hdmi->ddc_parent_clk = devm_clk_get(dev, "ddc");
                if (IS_ERR(hdmi->ddc_parent_clk)) {
                        dev_err(dev, "Couldn't get the HDMI DDC clock\n");
-                       return PTR_ERR(hdmi->ddc_parent_clk);
+                       ret = PTR_ERR(hdmi->ddc_parent_clk);
+                       goto err_disable_mod_clk;
                }
        } else {
                hdmi->ddc_parent_clk = hdmi->tmds_clk;
index 832f8f9bc47fd046baebde3af7c4d406b301a40c..b8da5a50a61d3b820d8ee5c8badc26132efe0b2b 100644 (file)
@@ -92,6 +92,8 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 
        DRM_DEBUG_DRIVER("Vertical parameters OK\n");
 
+       tcon->dclk_min_div = 6;
+       tcon->dclk_max_div = 127;
        rounded_rate = clk_round_rate(tcon->dclk, rate);
        if (rounded_rate < rate)
                return MODE_CLOCK_LOW;
index 3c15cf24b50360918253ed5bcff4d4a73a1e6cd0..a818ca4916051ade239efa0f4789d5c3cab36165 100644 (file)
@@ -101,10 +101,13 @@ static void sun4i_tcon_channel_set_status(struct sun4i_tcon *tcon, int channel,
                return;
        }
 
-       if (enabled)
+       if (enabled) {
                clk_prepare_enable(clk);
-       else
+               clk_rate_exclusive_get(clk);
+       } else {
+               clk_rate_exclusive_put(clk);
                clk_disable_unprepare(clk);
+       }
 }
 
 static void sun4i_tcon_lvds_set_status(struct sun4i_tcon *tcon,
@@ -335,6 +338,9 @@ static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon,
        regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG,
                           SUN4I_TCON_GCTL_IOMAP_MASK,
                           SUN4I_TCON_GCTL_IOMAP_TCON0);
+
+       /* Enable the output on the pins */
+       regmap_write(tcon->regs, SUN4I_TCON0_IO_TRI_REG, 0xe0000000);
 }
 
 static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
@@ -870,52 +876,56 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
                return ret;
        }
 
-       /*
-        * This can only be made optional since we've had DT nodes
-        * without the LVDS reset properties.
-        *
-        * If the property is missing, just disable LVDS, and print a
-        * warning.
-        */
-       tcon->lvds_rst = devm_reset_control_get_optional(dev, "lvds");
-       if (IS_ERR(tcon->lvds_rst)) {
-               dev_err(dev, "Couldn't get our reset line\n");
-               return PTR_ERR(tcon->lvds_rst);
-       } else if (tcon->lvds_rst) {
-               has_lvds_rst = true;
-               reset_control_reset(tcon->lvds_rst);
-       } else {
-               has_lvds_rst = false;
-       }
+       if (tcon->quirks->supports_lvds) {
+               /*
+                * This can only be made optional since we've had DT
+                * nodes without the LVDS reset properties.
+                *
+                * If the property is missing, just disable LVDS, and
+                * print a warning.
+                */
+               tcon->lvds_rst = devm_reset_control_get_optional(dev, "lvds");
+               if (IS_ERR(tcon->lvds_rst)) {
+                       dev_err(dev, "Couldn't get our reset line\n");
+                       return PTR_ERR(tcon->lvds_rst);
+               } else if (tcon->lvds_rst) {
+                       has_lvds_rst = true;
+                       reset_control_reset(tcon->lvds_rst);
+               } else {
+                       has_lvds_rst = false;
+               }
 
-       /*
-        * This can only be made optional since we've had DT nodes
-        * without the LVDS reset properties.
-        *
-        * If the property is missing, just disable LVDS, and print a
-        * warning.
-        */
-       if (tcon->quirks->has_lvds_alt) {
-               tcon->lvds_pll = devm_clk_get(dev, "lvds-alt");
-               if (IS_ERR(tcon->lvds_pll)) {
-                       if (PTR_ERR(tcon->lvds_pll) == -ENOENT) {
-                               has_lvds_alt = false;
+               /*
+                * This can only be made optional since we've had DT
+                * nodes without the LVDS reset properties.
+                *
+                * If the property is missing, just disable LVDS, and
+                * print a warning.
+                */
+               if (tcon->quirks->has_lvds_alt) {
+                       tcon->lvds_pll = devm_clk_get(dev, "lvds-alt");
+                       if (IS_ERR(tcon->lvds_pll)) {
+                               if (PTR_ERR(tcon->lvds_pll) == -ENOENT) {
+                                       has_lvds_alt = false;
+                               } else {
+                                       dev_err(dev, "Couldn't get the LVDS PLL\n");
+                                       return PTR_ERR(tcon->lvds_pll);
+                               }
                        } else {
-                               dev_err(dev, "Couldn't get the LVDS PLL\n");
-                               return PTR_ERR(tcon->lvds_pll);
+                               has_lvds_alt = true;
                        }
-               } else {
-                       has_lvds_alt = true;
                }
-       }
 
-       if (!has_lvds_rst || (tcon->quirks->has_lvds_alt && !has_lvds_alt)) {
-               dev_warn(dev,
-                        "Missing LVDS properties, Please upgrade your DT\n");
-               dev_warn(dev, "LVDS output disabled\n");
-               can_lvds = false;
+               if (!has_lvds_rst ||
+                   (tcon->quirks->has_lvds_alt && !has_lvds_alt)) {
+                       dev_warn(dev, "Missing LVDS properties, Please upgrade your DT\n");
+                       dev_warn(dev, "LVDS output disabled\n");
+                       can_lvds = false;
+               } else {
+                       can_lvds = true;
+               }
        } else {
-               can_lvds = true;
+               can_lvds = false;
        }
 
        ret = sun4i_tcon_init_clocks(dev, tcon);
@@ -1134,7 +1144,7 @@ static const struct sun4i_tcon_quirks sun8i_a33_quirks = {
 };
 
 static const struct sun4i_tcon_quirks sun8i_a83t_lcd_quirks = {
-       /* nothing is supported */
+       .supports_lvds          = true,
 };
 
 static const struct sun4i_tcon_quirks sun8i_v3s_quirks = {
index b761c7b823c560536b0f44a3c6eab6337d3e32d7..278700c7bf9f6f71ec3c85686c81919362aa40fa 100644 (file)
@@ -175,6 +175,7 @@ struct sun4i_tcon_quirks {
        bool    has_channel_1;  /* a33 does not have channel 1 */
        bool    has_lvds_alt;   /* Does the LVDS clock have a parent other than the TCON clock? */
        bool    needs_de_be_mux; /* sun6i needs mux to select backend */
+       bool    supports_lvds;   /* Does the TCON support an LVDS output? */
 
        /* callback to handle tcon muxing options */
        int     (*set_mux)(struct sun4i_tcon *, const struct drm_encoder *);
index b8403ed48285288c277d224e253285caebed3adb..fbffe1948b3bb2d5311ecbee000ee879fbd6ce28 100644 (file)
@@ -1903,8 +1903,12 @@ static int tegra_dc_init(struct host1x_client *client)
        if (!IS_ERR(primary))
                drm_plane_cleanup(primary);
 
-       if (group && tegra->domain) {
-               iommu_detach_group(tegra->domain, group);
+       if (group && dc->domain) {
+               if (group == tegra->group) {
+                       iommu_detach_group(dc->domain, group);
+                       tegra->group = NULL;
+               }
+
                dc->domain = NULL;
        }
 
@@ -1913,8 +1917,10 @@ static int tegra_dc_init(struct host1x_client *client)
 
 static int tegra_dc_exit(struct host1x_client *client)
 {
+       struct drm_device *drm = dev_get_drvdata(client->parent);
        struct iommu_group *group = iommu_group_get(client->dev);
        struct tegra_dc *dc = host1x_client_to_dc(client);
+       struct tegra_drm *tegra = drm->dev_private;
        int err;
 
        devm_free_irq(dc->dev, dc->irq, dc);
@@ -1926,7 +1932,11 @@ static int tegra_dc_exit(struct host1x_client *client)
        }
 
        if (group && dc->domain) {
-               iommu_detach_group(dc->domain, group);
+               if (group == tegra->group) {
+                       iommu_detach_group(dc->domain, group);
+                       tegra->group = NULL;
+               }
+
                dc->domain = NULL;
        }
 
index d50bddb2e4474e456b8105d6b90b47488ff21263..7fcf4a24284088ba798296016797032f70e5fa96 100644 (file)
@@ -250,6 +250,7 @@ static void tegra_drm_unload(struct drm_device *drm)
 
        drm_kms_helper_poll_fini(drm);
        tegra_drm_fb_exit(drm);
+       drm_atomic_helper_shutdown(drm);
        drm_mode_config_cleanup(drm);
 
        err = host1x_device_exit(device);
index 4d2ed966f9e3248a074ab25dcdfb6156e9c93200..87c5d89bc9baf3cd09012f5a2385510b645ebe1b 100644 (file)
@@ -1072,7 +1072,6 @@ static int tegra_dsi_exit(struct host1x_client *client)
        struct tegra_dsi *dsi = host1x_client_to_dsi(client);
 
        tegra_output_exit(&dsi->output);
-       regulator_disable(dsi->vdd);
 
        return 0;
 }
index 36a06a99369821aed88bbf9701550848b420493e..94dac79ac3c9641b84aa1af53093e104e0661ca4 100644 (file)
@@ -297,6 +297,10 @@ int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha)
        case WIN_COLOR_DEPTH_B8G8R8X8:
                *alpha = WIN_COLOR_DEPTH_B8G8R8A8;
                return 0;
+
+       case WIN_COLOR_DEPTH_B5G6R5:
+               *alpha = opaque;
+               return 0;
        }
 
        return -EINVAL;
@@ -330,9 +334,6 @@ void tegra_plane_check_dependent(struct tegra_plane *tegra,
        unsigned int zpos[2];
        unsigned int i;
 
-       for (i = 0; i < 3; i++)
-               state->dependent[i] = false;
-
        for (i = 0; i < 2; i++)
                zpos[i] = 0;
 
@@ -346,6 +347,8 @@ void tegra_plane_check_dependent(struct tegra_plane *tegra,
 
                index = tegra_plane_get_overlap_index(tegra, p);
 
+               state->dependent[index] = false;
+
                /*
                 * If any of the other planes is on top of this plane and uses
                 * a format with an alpha component, mark this plane as being
index b5b335c9b2bbe504fdddf47246820e1e64199d18..2ebdc6d5a76e60a33d6a271ff158258a61b7908c 100644 (file)
@@ -159,10 +159,15 @@ static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
        unsigned long start = vma->vm_start;
        unsigned long size = vma->vm_end - vma->vm_start;
-       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+       unsigned long offset;
        unsigned long page, pos;
 
-       if (offset + size > info->fix.smem_len)
+       if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
+               return -EINVAL;
+
+       offset = vma->vm_pgoff << PAGE_SHIFT;
+
+       if (offset > info->fix.smem_len || size > info->fix.smem_len - offset)
                return -EINVAL;
 
        pos = (unsigned long)info->fix.smem_start + offset;
index 5720a0d4ac0a9ebf242755921bc1ac6fea8c2630..677ac16c8a6dea750e80ec914973344d972bb765 100644 (file)
@@ -197,6 +197,9 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data,
        case VIRTGPU_PARAM_3D_FEATURES:
                value = vgdev->has_virgl_3d == true ? 1 : 0;
                break;
+       case VIRTGPU_PARAM_CAPSET_QUERY_FIX:
+               value = 1;
+               break;
        default:
                return -EINVAL;
        }
@@ -472,7 +475,7 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 {
        struct virtio_gpu_device *vgdev = dev->dev_private;
        struct drm_virtgpu_get_caps *args = data;
-       int size;
+       unsigned size, host_caps_size;
        int i;
        int found_valid = -1;
        int ret;
@@ -481,6 +484,10 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
        if (vgdev->num_capsets == 0)
                return -ENOSYS;
 
+       /* don't allow userspace to pass 0 */
+       if (args->size == 0)
+               return -EINVAL;
+
        spin_lock(&vgdev->display_info_lock);
        for (i = 0; i < vgdev->num_capsets; i++) {
                if (vgdev->capsets[i].id == args->cap_set_id) {
@@ -496,11 +503,9 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
                return -EINVAL;
        }
 
-       size = vgdev->capsets[found_valid].max_size;
-       if (args->size > size) {
-               spin_unlock(&vgdev->display_info_lock);
-               return -EINVAL;
-       }
+       host_caps_size = vgdev->capsets[found_valid].max_size;
+       /* only copy to user the minimum of the host caps size or the guest caps size */
+       size = min(args->size, host_caps_size);
 
        list_for_each_entry(cache_ent, &vgdev->cap_cache, head) {
                if (cache_ent->id == args->cap_set_id &&
index 184340d486c377d38a2f21bb66cc56385a52d37b..86d25f18aa992745e9c4ebb3c89e2bd3199212b9 100644 (file)
@@ -1337,6 +1337,19 @@ static void __vmw_svga_disable(struct vmw_private *dev_priv)
  */
 void vmw_svga_disable(struct vmw_private *dev_priv)
 {
+       /*
+        * Disabling SVGA will turn off device modesetting capabilities, so
+        * notify KMS about that so that it doesn't cache atomic state that
+        * isn't valid anymore, for example crtcs turned on.
+        * Strictly we'd want to do this under the SVGA lock (or an SVGA mutex),
+        * but vmw_kms_lost_device() takes the reservation sem and thus we'll
+        * end up with lock order reversal. Thus, a master may actually perform
+        * a new modeset just after we call vmw_kms_lost_device() and race with
+        * vmw_svga_disable(), but that should at worst cause atomic KMS state
+        * to be inconsistent with the device, causing modesetting problems.
+        *
+        */
+       vmw_kms_lost_device(dev_priv->dev);
        ttm_write_lock(&dev_priv->reservation_sem, false);
        spin_lock(&dev_priv->svga_lock);
        if (dev_priv->bdev.man[TTM_PL_VRAM].use_type) {
index d08753e8fd94f077b3815f2d64f5ac63d0cdd7f2..9116fe8baebcab24575a0b6dc329578ac4db0b40 100644 (file)
@@ -938,6 +938,7 @@ int vmw_kms_present(struct vmw_private *dev_priv,
 int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *file_priv);
 void vmw_kms_legacy_hotspot_clear(struct vmw_private *dev_priv);
+void vmw_kms_lost_device(struct drm_device *dev);
 
 int vmw_dumb_create(struct drm_file *file_priv,
                    struct drm_device *dev,
index ead61015cd79ceb0a2602615c72cddf7020da85d..3c824fd7cbf36d64e72e758bdb7c5b5e3b270dd6 100644 (file)
@@ -31,7 +31,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_rect.h>
 
-
 /* Might need a hrtimer here? */
 #define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1)
 
@@ -2517,9 +2516,12 @@ void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
  * Helper to be used if an error forces the caller to undo the actions of
  * vmw_kms_helper_resource_prepare.
  */
-void vmw_kms_helper_resource_revert(struct vmw_resource *res)
+void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx)
 {
-       vmw_kms_helper_buffer_revert(res->backup);
+       struct vmw_resource *res = ctx->res;
+
+       vmw_kms_helper_buffer_revert(ctx->buf);
+       vmw_dmabuf_unreference(&ctx->buf);
        vmw_resource_unreserve(res, false, NULL, 0);
        mutex_unlock(&res->dev_priv->cmdbuf_mutex);
 }
@@ -2536,10 +2538,14 @@ void vmw_kms_helper_resource_revert(struct vmw_resource *res)
  * interrupted by a signal.
  */
 int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
-                                   bool interruptible)
+                                   bool interruptible,
+                                   struct vmw_validation_ctx *ctx)
 {
        int ret = 0;
 
+       ctx->buf = NULL;
+       ctx->res = res;
+
        if (interruptible)
                ret = mutex_lock_interruptible(&res->dev_priv->cmdbuf_mutex);
        else
@@ -2558,6 +2564,8 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
                                                    res->dev_priv->has_mob);
                if (ret)
                        goto out_unreserve;
+
+               ctx->buf = vmw_dmabuf_reference(res->backup);
        }
        ret = vmw_resource_validate(res);
        if (ret)
@@ -2565,7 +2573,7 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
        return 0;
 
 out_revert:
-       vmw_kms_helper_buffer_revert(res->backup);
+       vmw_kms_helper_buffer_revert(ctx->buf);
 out_unreserve:
        vmw_resource_unreserve(res, false, NULL, 0);
 out_unlock:
@@ -2581,11 +2589,13 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
  * @out_fence: Optional pointer to a fence pointer. If non-NULL, a
  * ref-counted fence pointer is returned here.
  */
-void vmw_kms_helper_resource_finish(struct vmw_resource *res,
-                            struct vmw_fence_obj **out_fence)
+void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
+                                   struct vmw_fence_obj **out_fence)
 {
-       if (res->backup || out_fence)
-               vmw_kms_helper_buffer_finish(res->dev_priv, NULL, res->backup,
+       struct vmw_resource *res = ctx->res;
+
+       if (ctx->buf || out_fence)
+               vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf,
                                             out_fence, NULL);
 
        vmw_resource_unreserve(res, false, NULL, 0);
@@ -2851,3 +2861,14 @@ int vmw_kms_set_config(struct drm_mode_set *set,
 
        return drm_atomic_helper_set_config(set, ctx);
 }
+
+
+/**
+ * vmw_kms_lost_device - Notify kms that modesetting capabilities will be lost
+ *
+ * @dev: Pointer to the drm device
+ */
+void vmw_kms_lost_device(struct drm_device *dev)
+{
+       drm_atomic_helper_shutdown(dev);
+}
index cd9da2dd79af1a062d4aaa3d6b6ded468bd4207e..3d2ca280eaa72ee1a5f8b216bfe7b54467e3d842 100644 (file)
@@ -240,6 +240,11 @@ struct vmw_display_unit {
        int set_gui_y;
 };
 
+struct vmw_validation_ctx {
+       struct vmw_resource *res;
+       struct vmw_dma_buffer *buf;
+};
+
 #define vmw_crtc_to_du(x) \
        container_of(x, struct vmw_display_unit, crtc)
 #define vmw_connector_to_du(x) \
@@ -296,9 +301,10 @@ void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
                                  struct drm_vmw_fence_rep __user *
                                  user_fence_rep);
 int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
-                                   bool interruptible);
-void vmw_kms_helper_resource_revert(struct vmw_resource *res);
-void vmw_kms_helper_resource_finish(struct vmw_resource *res,
+                                   bool interruptible,
+                                   struct vmw_validation_ctx *ctx);
+void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx);
+void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
                                    struct vmw_fence_obj **out_fence);
 int vmw_kms_readback(struct vmw_private *dev_priv,
                     struct drm_file *file_priv,
@@ -439,5 +445,4 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
 
 int vmw_kms_set_config(struct drm_mode_set *set,
                       struct drm_modeset_acquire_ctx *ctx);
-
 #endif
index 63a4cd794b73a12821ea8adbfb72e997367c5193..3ec9eae831b8f15295a6da11a9a267e3e2b4fcb8 100644 (file)
@@ -909,12 +909,13 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
        struct vmw_framebuffer_surface *vfbs =
                container_of(framebuffer, typeof(*vfbs), base);
        struct vmw_kms_sou_surface_dirty sdirty;
+       struct vmw_validation_ctx ctx;
        int ret;
 
        if (!srf)
                srf = &vfbs->surface->res;
 
-       ret = vmw_kms_helper_resource_prepare(srf, true);
+       ret = vmw_kms_helper_resource_prepare(srf, true, &ctx);
        if (ret)
                return ret;
 
@@ -933,7 +934,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
        ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips,
                                   dest_x, dest_y, num_clips, inc,
                                   &sdirty.base);
-       vmw_kms_helper_resource_finish(srf, out_fence);
+       vmw_kms_helper_resource_finish(&ctx, out_fence);
 
        return ret;
 }
index b68d74888ab1100be82f8a2a9fdc3234a4e04293..6b969e5dea2a862b392153822649217783ac661b 100644 (file)
@@ -980,12 +980,13 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv,
        struct vmw_framebuffer_surface *vfbs =
                container_of(framebuffer, typeof(*vfbs), base);
        struct vmw_stdu_dirty sdirty;
+       struct vmw_validation_ctx ctx;
        int ret;
 
        if (!srf)
                srf = &vfbs->surface->res;
 
-       ret = vmw_kms_helper_resource_prepare(srf, true);
+       ret = vmw_kms_helper_resource_prepare(srf, true, &ctx);
        if (ret)
                return ret;
 
@@ -1008,7 +1009,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv,
                                   dest_x, dest_y, num_clips, inc,
                                   &sdirty.base);
 out_finish:
-       vmw_kms_helper_resource_finish(srf, out_fence);
+       vmw_kms_helper_resource_finish(&ctx, out_fence);
 
        return ret;
 }
index 97b99500153d3e1477e40e3c0c4d5958c37a9c71..83f9dd934a5dc37ecb95f57b91d19af0c08f9e51 100644 (file)
@@ -250,10 +250,14 @@ void ipu_prg_channel_disable(struct ipuv3_channel *ipu_chan)
 {
        int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num);
        struct ipu_prg *prg = ipu_chan->ipu->prg_priv;
-       struct ipu_prg_channel *chan = &prg->chan[prg_chan];
+       struct ipu_prg_channel *chan;
        u32 val;
 
-       if (!chan->enabled || prg_chan < 0)
+       if (prg_chan < 0)
+               return;
+
+       chan = &prg->chan[prg_chan];
+       if (!chan->enabled)
                return;
 
        pm_runtime_get_sync(prg->dev);
@@ -280,13 +284,15 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
 {
        int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num);
        struct ipu_prg *prg = ipu_chan->ipu->prg_priv;
-       struct ipu_prg_channel *chan = &prg->chan[prg_chan];
+       struct ipu_prg_channel *chan;
        u32 val;
        int ret;
 
        if (prg_chan < 0)
                return prg_chan;
 
+       chan = &prg->chan[prg_chan];
+
        if (chan->enabled) {
                ipu_pre_update(prg->pres[chan->used_pre], *eba);
                return 0;
index 1d87757990568c40b9cebe4df7dda54fe21918f7..d9607905dc2f1dd0db15c6ca457ebc445921d829 100644 (file)
@@ -233,6 +233,7 @@ static int octeon_i2c_check_status(struct octeon_i2c *i2c, int final_read)
                return -EOPNOTSUPP;
 
        case STAT_TXDATA_NAK:
+       case STAT_BUS_ERROR:
                return -EIO;
        case STAT_TXADDR_NAK:
        case STAT_RXADDR_NAK:
index a7ef19855bb8fa9b0670d7406652e1aa214e76c6..9bb9f64fdda0392364638ecbaafe3fab5612baf6 100644 (file)
@@ -43,7 +43,7 @@
 #define TWSI_CTL_AAK           0x04    /* Assert ACK */
 
 /* Status values */
-#define STAT_ERROR             0x00
+#define STAT_BUS_ERROR         0x00
 #define STAT_START             0x08
 #define STAT_REP_START         0x10
 #define STAT_TXADDR_ACK                0x18
index 17fd55af4d9247ee89c0b581c5de3ab3437860a2..caa20eb5f26b03c7e02569e54dbb599fd2e73ec1 100644 (file)
@@ -928,7 +928,7 @@ static int exact_lock(dev_t dev, void *data)
 {
        struct gendisk *p = data;
 
-       if (!get_disk(p))
+       if (!get_disk_and_module(p))
                return -1;
        return 0;
 }
index a5b4cf030c11b74291baa6859ee370295fd0c42f..9183d148d644484c11f4e26fc87ff332c4fbb4eb 100644 (file)
@@ -550,18 +550,13 @@ static int addr_resolve(struct sockaddr *src_in,
                dst_release(dst);
        }
 
-       if (ndev->flags & IFF_LOOPBACK) {
-               ret = rdma_translate_ip(dst_in, addr);
-               /*
-                * Put the loopback device and get the translated
-                * device instead.
-                */
+       if (ndev) {
+               if (ndev->flags & IFF_LOOPBACK)
+                       ret = rdma_translate_ip(dst_in, addr);
+               else
+                       addr->bound_dev_if = ndev->ifindex;
                dev_put(ndev);
-               ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
-       } else {
-               addr->bound_dev_if = ndev->ifindex;
        }
-       dev_put(ndev);
 
        return ret;
 }
index e66963ca58bddaf02813c4e080f48ad4cf18be00..66f203730e80b8ea3a9657320a46585d56e1ea3d 100644 (file)
@@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
                           IPV6_ADDR_LINKLOCAL;
        struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
                                         &src_addr->sin6_addr, net_dev->ifindex,
-                                        strict);
+                                        NULL, strict);
        bool ret;
 
        if (!rt)
@@ -3069,7 +3069,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
                        continue;
 
                /* different dest port -> unique */
-               if (!cma_any_port(cur_daddr) &&
+               if (!cma_any_port(daddr) &&
+                   !cma_any_port(cur_daddr) &&
                    (dport != cur_dport))
                        continue;
 
@@ -3080,7 +3081,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
                        continue;
 
                /* different dst address -> unique */
-               if (!cma_any_addr(cur_daddr) &&
+               if (!cma_any_addr(daddr) &&
+                   !cma_any_addr(cur_daddr) &&
                    cma_addr_cmp(daddr, cur_daddr))
                        continue;
 
@@ -3378,13 +3380,13 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
                }
 #endif
        }
+       daddr = cma_dst_addr(id_priv);
+       daddr->sa_family = addr->sa_family;
+
        ret = cma_get_port(id_priv);
        if (ret)
                goto err2;
 
-       daddr = cma_dst_addr(id_priv);
-       daddr->sa_family = addr->sa_family;
-
        return 0;
 err2:
        if (id_priv->cma_dev)
@@ -4173,6 +4175,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
        struct cma_multicast *mc;
        int ret;
 
+       if (!id->device)
+               return -EINVAL;
+
        id_priv = container_of(id, struct rdma_id_private, id);
        if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
            !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
@@ -4549,6 +4554,7 @@ static struct pernet_operations cma_pernet_operations = {
        .exit = cma_exit_net,
        .id = &cma_pernet_id,
        .size = sizeof(struct cma_pernet),
+       .async = true,
 };
 
 static int __init cma_init(void)
index bc79ca8215d7c25ff22c2a901db04b209b49052a..af5ad6a56ae404d1cd2aae64f95e59d7ccded0ac 100644 (file)
@@ -17,6 +17,7 @@
 
 /* # of WCs to poll for with a single call to ib_poll_cq */
 #define IB_POLL_BATCH                  16
+#define IB_POLL_BATCH_DIRECT           8
 
 /* # of WCs to iterate over before yielding */
 #define IB_POLL_BUDGET_IRQ             256
 #define IB_POLL_FLAGS \
        (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
 
-static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
+static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
+                          int batch)
 {
        int i, n, completed = 0;
-       struct ib_wc *wcs = poll_wc ? : cq->wc;
 
        /*
         * budget might be (-1) if the caller does not
         * want to bound this call, thus we need unsigned
         * minimum here.
         */
-       while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
-                       budget - completed), wcs)) > 0) {
+       while ((n = ib_poll_cq(cq, min_t(u32, batch,
+                                        budget - completed), wcs)) > 0) {
                for (i = 0; i < n; i++) {
                        struct ib_wc *wc = &wcs[i];
 
@@ -48,8 +49,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
 
                completed += n;
 
-               if (n != IB_POLL_BATCH ||
-                   (budget != -1 && completed >= budget))
+               if (n != batch || (budget != -1 && completed >= budget))
                        break;
        }
 
@@ -72,9 +72,9 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
  */
 int ib_process_cq_direct(struct ib_cq *cq, int budget)
 {
-       struct ib_wc wcs[IB_POLL_BATCH];
+       struct ib_wc wcs[IB_POLL_BATCH_DIRECT];
 
-       return __ib_process_cq(cq, budget, wcs);
+       return __ib_process_cq(cq, budget, wcs, IB_POLL_BATCH_DIRECT);
 }
 EXPORT_SYMBOL(ib_process_cq_direct);
 
@@ -88,7 +88,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
        struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
        int completed;
 
-       completed = __ib_process_cq(cq, budget, NULL);
+       completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
        if (completed < budget) {
                irq_poll_complete(&cq->iop);
                if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
@@ -108,7 +108,8 @@ static void ib_cq_poll_work(struct work_struct *work)
        struct ib_cq *cq = container_of(work, struct ib_cq, work);
        int completed;
 
-       completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, NULL);
+       completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, cq->wc,
+                                   IB_POLL_BATCH);
        if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
            ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
                queue_work(ib_comp_wq, &cq->work);
index e8010e73a1cf4fe27ec9e5d3c25190003d55107a..bb065c9449be46617bd82e2b26d7648aa00212a3 100644 (file)
@@ -536,14 +536,14 @@ int ib_register_device(struct ib_device *device,
        ret = device->query_device(device, &device->attrs, &uhw);
        if (ret) {
                pr_warn("Couldn't query the device attributes\n");
-               goto cache_cleanup;
+               goto cg_cleanup;
        }
 
        ret = ib_device_register_sysfs(device, port_callback);
        if (ret) {
                pr_warn("Couldn't register device %s with driver model\n",
                        device->name);
-               goto cache_cleanup;
+               goto cg_cleanup;
        }
 
        device->reg_state = IB_DEV_REGISTERED;
@@ -559,6 +559,8 @@ int ib_register_device(struct ib_device *device,
        mutex_unlock(&device_mutex);
        return 0;
 
+cg_cleanup:
+       ib_device_unregister_rdmacg(device);
 cache_cleanup:
        ib_cache_cleanup_one(device);
        ib_cache_release_one(device);
index 8cf15d4a8ac438db82971ad3d3bc16649a5ed6da..9f029a1ca5ea9555b2ab366012d5eabd04168e3c 100644 (file)
@@ -1291,10 +1291,9 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 
                resolved_dev = dev_get_by_index(dev_addr.net,
                                                dev_addr.bound_dev_if);
-               if (resolved_dev->flags & IFF_LOOPBACK) {
-                       dev_put(resolved_dev);
-                       resolved_dev = idev;
-                       dev_hold(resolved_dev);
+               if (!resolved_dev) {
+                       dev_put(idev);
+                       return -ENODEV;
                }
                ndev = ib_get_ndev_from_path(rec);
                rcu_read_lock();
index f015f1bf88c9c8c52c2a739bb130c2949348f0e4..e5a1e7d813265fc3f0947d3a6aa643367745d9cc 100644 (file)
@@ -132,7 +132,7 @@ static inline struct ucma_context *_ucma_find_context(int id,
        ctx = idr_find(&ctx_idr, id);
        if (!ctx)
                ctx = ERR_PTR(-ENOENT);
-       else if (ctx->file != file)
+       else if (ctx->file != file || !ctx->cm_id)
                ctx = ERR_PTR(-EINVAL);
        return ctx;
 }
@@ -456,6 +456,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
        struct rdma_ucm_create_id cmd;
        struct rdma_ucm_create_id_resp resp;
        struct ucma_context *ctx;
+       struct rdma_cm_id *cm_id;
        enum ib_qp_type qp_type;
        int ret;
 
@@ -476,10 +477,10 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
                return -ENOMEM;
 
        ctx->uid = cmd.uid;
-       ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
-                                   ucma_event_handler, ctx, cmd.ps, qp_type);
-       if (IS_ERR(ctx->cm_id)) {
-               ret = PTR_ERR(ctx->cm_id);
+       cm_id = rdma_create_id(current->nsproxy->net_ns,
+                              ucma_event_handler, ctx, cmd.ps, qp_type);
+       if (IS_ERR(cm_id)) {
+               ret = PTR_ERR(cm_id);
                goto err1;
        }
 
@@ -489,14 +490,19 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
                ret = -EFAULT;
                goto err2;
        }
+
+       ctx->cm_id = cm_id;
        return 0;
 
 err2:
-       rdma_destroy_id(ctx->cm_id);
+       rdma_destroy_id(cm_id);
 err1:
        mutex_lock(&mut);
        idr_remove(&ctx_idr, ctx->id);
        mutex_unlock(&mut);
+       mutex_lock(&file->mut);
+       list_del(&ctx->list);
+       mutex_unlock(&file->mut);
        kfree(ctx);
        return ret;
 }
@@ -664,19 +670,23 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
                               int in_len, int out_len)
 {
        struct rdma_ucm_resolve_ip cmd;
+       struct sockaddr *src, *dst;
        struct ucma_context *ctx;
        int ret;
 
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
+       src = (struct sockaddr *) &cmd.src_addr;
+       dst = (struct sockaddr *) &cmd.dst_addr;
+       if (!rdma_addr_size(src) || !rdma_addr_size(dst))
+               return -EINVAL;
+
        ctx = ucma_get_ctx(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
-       ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
-                               (struct sockaddr *) &cmd.dst_addr,
-                               cmd.timeout_ms);
+       ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
        ucma_put_ctx(ctx);
        return ret;
 }
@@ -1149,6 +1159,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
+       if (cmd.qp_state > IB_QPS_ERR)
+               return -EINVAL;
+
        ctx = ucma_get_ctx(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
@@ -1294,6 +1307,9 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
+       if (unlikely(cmd.optval > KMALLOC_MAX_SIZE))
+               return -EINVAL;
+
        optval = memdup_user((void __user *) (unsigned long) cmd.optval,
                             cmd.optlen);
        if (IS_ERR(optval)) {
@@ -1343,7 +1359,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
                return -ENOSPC;
 
        addr = (struct sockaddr *) &cmd->addr;
-       if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+       if (cmd->addr_size != rdma_addr_size(addr))
                return -EINVAL;
 
        if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
@@ -1411,6 +1427,9 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
        join_cmd.uid = cmd.uid;
        join_cmd.id = cmd.id;
        join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
+       if (!join_cmd.addr_size)
+               return -EINVAL;
+
        join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
        memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
 
@@ -1426,6 +1445,9 @@ static ssize_t ucma_join_multicast(struct ucma_file *file,
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
+       if (!rdma_addr_size((struct sockaddr *)&cmd.addr))
+               return -EINVAL;
+
        return ucma_process_join(file, &cmd, out_len);
 }
 
index 3eb7a8387116d4653cda1afc75a1ab8cabd29f65..96f76896488da6c0414411fea440520e75bf2843 100644 (file)
@@ -57,8 +57,8 @@
 #define BNXT_RE_PAGE_SIZE_8M           BIT(BNXT_RE_PAGE_SHIFT_8M)
 #define BNXT_RE_PAGE_SIZE_1G           BIT(BNXT_RE_PAGE_SHIFT_1G)
 
-#define BNXT_RE_MAX_MR_SIZE_LOW                BIT(BNXT_RE_PAGE_SHIFT_1G)
-#define BNXT_RE_MAX_MR_SIZE_HIGH       BIT(39)
+#define BNXT_RE_MAX_MR_SIZE_LOW                BIT_ULL(BNXT_RE_PAGE_SHIFT_1G)
+#define BNXT_RE_MAX_MR_SIZE_HIGH       BIT_ULL(39)
 #define BNXT_RE_MAX_MR_SIZE            BNXT_RE_MAX_MR_SIZE_HIGH
 
 #define BNXT_RE_MAX_QPC_COUNT          (64 * 1024)
index 643174d949a8c979a1f5878a258933ba7a939645..8301d7e5fa8c4c97364ee9fb834beca84319027b 100644 (file)
@@ -785,7 +785,7 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
        return 0;
 }
 
-static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
        __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
 {
        unsigned long flags;
@@ -799,8 +799,8 @@ static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
        return flags;
 }
 
-static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
-                              unsigned long flags)
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
+                       unsigned long flags)
        __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
 {
        if (qp->rcq != qp->scq)
@@ -1606,6 +1606,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
        int status;
        union ib_gid sgid;
        struct ib_gid_attr sgid_attr;
+       unsigned int flags;
        u8 nw_type;
 
        qp->qplib_qp.modify_flags = 0;
@@ -1634,14 +1635,18 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                        dev_dbg(rdev_to_dev(rdev),
                                "Move QP = %p to flush list\n",
                                qp);
+                       flags = bnxt_re_lock_cqs(qp);
                        bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+                       bnxt_re_unlock_cqs(qp, flags);
                }
                if (!qp->sumem &&
                    qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
                        dev_dbg(rdev_to_dev(rdev),
                                "Move QP = %p out of flush list\n",
                                qp);
+                       flags = bnxt_re_lock_cqs(qp);
                        bnxt_qplib_clean_qp(&qp->qplib_qp);
+                       bnxt_re_unlock_cqs(qp, flags);
                }
        }
        if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
@@ -2227,10 +2232,13 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
        wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
        wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
 
+       /* Need unconditional fence for local invalidate
+        * opcode to work as expected.
+        */
+       wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+
        if (wr->send_flags & IB_SEND_SIGNALED)
                wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
-       if (wr->send_flags & IB_SEND_FENCE)
-               wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
        if (wr->send_flags & IB_SEND_SOLICITED)
                wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
 
@@ -2251,8 +2259,12 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
        wqe->frmr.levels = qplib_frpl->hwq.level + 1;
        wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
 
-       if (wr->wr.send_flags & IB_SEND_FENCE)
-               wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+       /* Need unconditional fence for reg_mr
+        * opcode to function as expected.
+        */
+
+       wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+
        if (wr->wr.send_flags & IB_SEND_SIGNALED)
                wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
 
@@ -3586,7 +3598,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
        int umem_pgs, page_shift, rc;
 
        if (length > BNXT_RE_MAX_MR_SIZE) {
-               dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%ld\n",
+               dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%lld\n",
                        length, BNXT_RE_MAX_MR_SIZE);
                return ERR_PTR(-ENOMEM);
        }
index b88a48d43a9dddd49c210d0a16779f3d4d0369b9..e62b7c2c7da6a1953605fbe7d089a3d9b6f9a50a 100644 (file)
@@ -222,4 +222,7 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
                                           struct ib_udata *udata);
 int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
 int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
 #endif /* __BNXT_RE_IB_VERBS_H__ */
index 33a448036c2ebd99217a40b9bb9d6372a798af0d..f6e361750466f50acab01a4f1a397260e3a84472 100644 (file)
@@ -730,6 +730,13 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
                                         struct bnxt_re_qp *qp)
 {
        struct ib_event event;
+       unsigned int flags;
+
+       if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+               flags = bnxt_re_lock_cqs(qp);
+               bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+               bnxt_re_unlock_cqs(qp, flags);
+       }
 
        memset(&event, 0, sizeof(event));
        if (qp->qplib_qp.srq) {
@@ -1416,9 +1423,12 @@ static void bnxt_re_task(struct work_struct *work)
        switch (re_work->event) {
        case NETDEV_REGISTER:
                rc = bnxt_re_ib_reg(rdev);
-               if (rc)
+               if (rc) {
                        dev_err(rdev_to_dev(rdev),
                                "Failed to register with IB: %#x", rc);
+                       bnxt_re_remove_one(rdev);
+                       bnxt_re_dev_unreg(rdev);
+               }
                break;
        case NETDEV_UP:
                bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
index 3ea5b9624f6b79c3f54c6d069b4af7ab133d1f31..3a78faba8d91b255bd6526f47ed9c5c3fbd8026b 100644 (file)
@@ -88,75 +88,35 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
        }
 }
 
-void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
-                                unsigned long *flags)
-       __acquires(&qp->scq->hwq.lock) __acquires(&qp->rcq->hwq.lock)
+static void bnxt_qplib_acquire_cq_flush_locks(struct bnxt_qplib_qp *qp,
+                                      unsigned long *flags)
+       __acquires(&qp->scq->flush_lock) __acquires(&qp->rcq->flush_lock)
 {
-       spin_lock_irqsave(&qp->scq->hwq.lock, *flags);
+       spin_lock_irqsave(&qp->scq->flush_lock, *flags);
        if (qp->scq == qp->rcq)
-               __acquire(&qp->rcq->hwq.lock);
+               __acquire(&qp->rcq->flush_lock);
        else
-               spin_lock(&qp->rcq->hwq.lock);
+               spin_lock(&qp->rcq->flush_lock);
 }
 
-void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
-                                unsigned long *flags)
-       __releases(&qp->scq->hwq.lock) __releases(&qp->rcq->hwq.lock)
+static void bnxt_qplib_release_cq_flush_locks(struct bnxt_qplib_qp *qp,
+                                      unsigned long *flags)
+       __releases(&qp->scq->flush_lock) __releases(&qp->rcq->flush_lock)
 {
        if (qp->scq == qp->rcq)
-               __release(&qp->rcq->hwq.lock);
+               __release(&qp->rcq->flush_lock);
        else
-               spin_unlock(&qp->rcq->hwq.lock);
-       spin_unlock_irqrestore(&qp->scq->hwq.lock, *flags);
-}
-
-static struct bnxt_qplib_cq *bnxt_qplib_find_buddy_cq(struct bnxt_qplib_qp *qp,
-                                                     struct bnxt_qplib_cq *cq)
-{
-       struct bnxt_qplib_cq *buddy_cq = NULL;
-
-       if (qp->scq == qp->rcq)
-               buddy_cq = NULL;
-       else if (qp->scq == cq)
-               buddy_cq = qp->rcq;
-       else
-               buddy_cq = qp->scq;
-       return buddy_cq;
-}
-
-static void bnxt_qplib_lock_buddy_cq(struct bnxt_qplib_qp *qp,
-                                    struct bnxt_qplib_cq *cq)
-       __acquires(&buddy_cq->hwq.lock)
-{
-       struct bnxt_qplib_cq *buddy_cq = NULL;
-
-       buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
-       if (!buddy_cq)
-               __acquire(&cq->hwq.lock);
-       else
-               spin_lock(&buddy_cq->hwq.lock);
-}
-
-static void bnxt_qplib_unlock_buddy_cq(struct bnxt_qplib_qp *qp,
-                                      struct bnxt_qplib_cq *cq)
-       __releases(&buddy_cq->hwq.lock)
-{
-       struct bnxt_qplib_cq *buddy_cq = NULL;
-
-       buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq);
-       if (!buddy_cq)
-               __release(&cq->hwq.lock);
-       else
-               spin_unlock(&buddy_cq->hwq.lock);
+               spin_unlock(&qp->rcq->flush_lock);
+       spin_unlock_irqrestore(&qp->scq->flush_lock, *flags);
 }
 
 void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
 {
        unsigned long flags;
 
-       bnxt_qplib_acquire_cq_locks(qp, &flags);
+       bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
        __bnxt_qplib_add_flush_qp(qp);
-       bnxt_qplib_release_cq_locks(qp, &flags);
+       bnxt_qplib_release_cq_flush_locks(qp, &flags);
 }
 
 static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
@@ -177,7 +137,7 @@ void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
 {
        unsigned long flags;
 
-       bnxt_qplib_acquire_cq_locks(qp, &flags);
+       bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
        __clean_cq(qp->scq, (u64)(unsigned long)qp);
        qp->sq.hwq.prod = 0;
        qp->sq.hwq.cons = 0;
@@ -186,7 +146,7 @@ void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
        qp->rq.hwq.cons = 0;
 
        __bnxt_qplib_del_flush_qp(qp);
-       bnxt_qplib_release_cq_locks(qp, &flags);
+       bnxt_qplib_release_cq_flush_locks(qp, &flags);
 }
 
 static void bnxt_qpn_cqn_sched_task(struct work_struct *work)
@@ -283,7 +243,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
        u32 sw_cons, raw_cons;
        u16 type;
        int budget = nq->budget;
-       u64 q_handle;
+       uintptr_t q_handle;
 
        /* Service the NQ until empty */
        raw_cons = hwq->cons;
@@ -566,7 +526,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
 
        /* Configure the request */
        req.dpi = cpu_to_le32(srq->dpi->dpi);
-       req.srq_handle = cpu_to_le64(srq);
+       req.srq_handle = cpu_to_le64((uintptr_t)srq);
 
        req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements);
        pbl = &srq->hwq.pbl[PBL_LVL_0];
@@ -2107,9 +2067,6 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
        /* Must block new posting of SQ and RQ */
        qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
        bnxt_qplib_cancel_phantom_processing(qp);
-
-       /* Add qp to flush list of the CQ */
-       __bnxt_qplib_add_flush_qp(qp);
 }
 
 /* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
@@ -2285,9 +2242,9 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
                                sw_sq_cons, cqe->wr_id, cqe->status);
                        cqe++;
                        (*budget)--;
-                       bnxt_qplib_lock_buddy_cq(qp, cq);
                        bnxt_qplib_mark_qp_error(qp);
-                       bnxt_qplib_unlock_buddy_cq(qp, cq);
+                       /* Add qp to flush list of the CQ */
+                       bnxt_qplib_add_flush_qp(qp);
                } else {
                        if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
                                /* Before we complete, do WA 9060 */
@@ -2403,9 +2360,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
                if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
                        qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
                        /* Add qp to flush list of the CQ */
-                       bnxt_qplib_lock_buddy_cq(qp, cq);
-                       __bnxt_qplib_add_flush_qp(qp);
-                       bnxt_qplib_unlock_buddy_cq(qp, cq);
+                       bnxt_qplib_add_flush_qp(qp);
                }
        }
 
@@ -2489,9 +2444,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
                if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
                        qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
                        /* Add qp to flush list of the CQ */
-                       bnxt_qplib_lock_buddy_cq(qp, cq);
-                       __bnxt_qplib_add_flush_qp(qp);
-                       bnxt_qplib_unlock_buddy_cq(qp, cq);
+                       bnxt_qplib_add_flush_qp(qp);
                }
        }
 done:
@@ -2501,11 +2454,9 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 {
        struct cq_base *hw_cqe, **hw_cqe_ptr;
-       unsigned long flags;
        u32 sw_cons, raw_cons;
        bool rc = true;
 
-       spin_lock_irqsave(&cq->hwq.lock, flags);
        raw_cons = cq->hwq.cons;
        sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
        hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
@@ -2513,7 +2464,6 @@ bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 
         /* Check for Valid bit. If the CQE is valid, return false */
        rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
-       spin_unlock_irqrestore(&cq->hwq.lock, flags);
        return rc;
 }
 
@@ -2602,9 +2552,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
                if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
                        qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
                        /* Add qp to flush list of the CQ */
-                       bnxt_qplib_lock_buddy_cq(qp, cq);
-                       __bnxt_qplib_add_flush_qp(qp);
-                       bnxt_qplib_unlock_buddy_cq(qp, cq);
+                       bnxt_qplib_add_flush_qp(qp);
                }
        }
 
@@ -2719,9 +2667,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
         */
 
        /* Add qp to flush list of the CQ */
-       bnxt_qplib_lock_buddy_cq(qp, cq);
-       __bnxt_qplib_add_flush_qp(qp);
-       bnxt_qplib_unlock_buddy_cq(qp, cq);
+       bnxt_qplib_add_flush_qp(qp);
 done:
        return rc;
 }
@@ -2750,7 +2696,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
        u32 budget = num_cqes;
        unsigned long flags;
 
-       spin_lock_irqsave(&cq->hwq.lock, flags);
+       spin_lock_irqsave(&cq->flush_lock, flags);
        list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
                dev_dbg(&cq->hwq.pdev->dev,
                        "QPLIB: FP: Flushing SQ QP= %p",
@@ -2764,7 +2710,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
                        qp);
                __flush_rq(&qp->rq, qp, &cqe, &budget);
        }
-       spin_unlock_irqrestore(&cq->hwq.lock, flags);
+       spin_unlock_irqrestore(&cq->flush_lock, flags);
 
        return num_cqes - budget;
 }
@@ -2773,11 +2719,9 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
                       int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
        struct cq_base *hw_cqe, **hw_cqe_ptr;
-       unsigned long flags;
        u32 sw_cons, raw_cons;
        int budget, rc = 0;
 
-       spin_lock_irqsave(&cq->hwq.lock, flags);
        raw_cons = cq->hwq.cons;
        budget = num_cqes;
 
@@ -2853,20 +2797,15 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
                bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ);
        }
 exit:
-       spin_unlock_irqrestore(&cq->hwq.lock, flags);
        return num_cqes - budget;
 }
 
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&cq->hwq.lock, flags);
        if (arm_type)
                bnxt_qplib_arm_cq(cq, arm_type);
        /* Using cq->arm_state variable to track whether to issue cq handler */
        atomic_set(&cq->arm_state, 1);
-       spin_unlock_irqrestore(&cq->hwq.lock, flags);
 }
 
 void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp)
index ca0a2ffa3509093a7953a80ea310f3f9314ace03..ade9f13c0fd1bb2e8cc04a8a1a85c9da1221db41 100644 (file)
@@ -389,6 +389,18 @@ struct bnxt_qplib_cq {
        struct list_head                sqf_head, rqf_head;
        atomic_t                        arm_state;
        spinlock_t                      compl_lock; /* synch CQ handlers */
+/* Locking Notes:
+ * QP can move to error state from modify_qp, async error event or error
+ * CQE as part of poll_cq. When QP is moved to error state, it gets added
+ * to two flush lists, one each for SQ and RQ.
+ * Each flush list is protected by qplib_cq->flush_lock. Both scq and rcq
+ * flush_locks should be acquired when QP is moved to error. The control path
+ * operations(modify_qp and async error events) are synchronized with poll_cq
+ * using upper level CQ locks (bnxt_re_cq->cq_lock) of both SCQ and RCQ.
+ * The qplib_cq->flush_lock is required to synchronize two instances of poll_cq
+ * of the same QP while manipulating the flush list.
+ */
+       spinlock_t                      flush_lock; /* QP flush management */
 };
 
 #define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE        sizeof(struct xrrq_irrq)
index 8329ec6a794696ddbc4892bf255ea22150cd18e7..80027a494730df22944bfbdc678ca36deadcec51 100644 (file)
@@ -305,9 +305,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
                        err_event->res_err_state_reason);
                if (!qp)
                        break;
-               bnxt_qplib_acquire_cq_locks(qp, &flags);
                bnxt_qplib_mark_qp_error(qp);
-               bnxt_qplib_release_cq_locks(qp, &flags);
+               rcfw->aeq_handler(rcfw, qp_event, qp);
                break;
        default:
                /* Command Response */
@@ -460,7 +459,11 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
        int rc;
 
        RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
-
+       /* Supply (log-base-2-of-host-page-size - base-page-shift)
+        * to bono to adjust the doorbell page sizes.
+        */
+       req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
+                                          RCFW_DBR_BASE_PAGE_SHIFT);
        /*
         * VFs need not setup the HW context area, PF
         * shall setup this area for VF. Skipping the
index 6bee6e3636ea400d5bb1be21a1b064af77475f37..c7cce2e4185e687d2572f3cedc875566a5b3e4ff 100644 (file)
@@ -49,6 +49,7 @@
 #define RCFW_COMM_SIZE                 0x104
 
 #define RCFW_DBR_PCI_BAR_REGION                2
+#define RCFW_DBR_BASE_PAGE_SHIFT       12
 
 #define RCFW_CMD_PREP(req, CMD, cmd_flags)                             \
        do {                                                            \
index 03057983341f78b251370888095a6c9492112040..ee98e5efef84652f4accdc05231dccbf935f029d 100644 (file)
@@ -139,7 +139,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
        attr->max_pkey = le32_to_cpu(sb->max_pkeys);
 
        attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
-       attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE;
+       attr->l2_db_size = (sb->l2_db_space_size + 1) *
+                           (0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
        attr->max_sgid = le32_to_cpu(sb->max_gid);
 
        bnxt_qplib_query_version(rcfw, attr->fw_ver);
index 2d7ea096a247478392acf58b4a234112ea35f54a..3e5a4f760d0eb6332032f824bf3a9367d1bbd07a 100644 (file)
@@ -1761,7 +1761,30 @@ struct cmdq_initialize_fw {
        #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M               (0x3UL << 4)
        #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M               (0x4UL << 4)
        #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G               (0x5UL << 4)
-       __le16 reserved16;
+       /* This value is (log-base-2-of-DBR-page-size - 12).
+        * 0 for 4KB. HW supported values are enumerated below.
+        */
+       __le16  log2_dbr_pg_size;
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_MASK        0xfUL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_SFT         0
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4K       0x0UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8K       0x1UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16K      0x2UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32K      0x3UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64K      0x4UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128K     0x5UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_256K     0x6UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_512K     0x7UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_1M       0x8UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_2M       0x9UL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4M       0xaUL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8M       0xbUL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16M      0xcUL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32M      0xdUL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64M      0xeUL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M     0xfUL
+       #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_LAST                \
+                       CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M
        __le64 qpc_page_dir;
        __le64 mrw_page_dir;
        __le64 srq_page_dir;
index 7a9d0de89d6a3f1afc0560a08e43918a2f539c1c..e96771ddc9a7274ba4174c3c68206be1626cd10d 100644 (file)
@@ -1217,6 +1217,7 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
                if (ctx->dev)
                        c4iw_remove(ctx);
                break;
+       case CXGB4_STATE_FATAL_ERROR:
        case CXGB4_STATE_START_RECOVERY:
                pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
                if (ctx->dev) {
index 9a566ee3ceffeff4b76b90800f732727c1902ebd..82adc0d1d30ef39dfb716758164269a4a0702d52 100644 (file)
@@ -601,6 +601,7 @@ static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
        wc->dlid_path_bits = 0;
 
        if (is_eth) {
+               wc->slid = 0;
                wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
                memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
                memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
@@ -851,7 +852,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
                        }
                }
 
-               wc->slid           = be16_to_cpu(cqe->rlid);
                g_mlpath_rqpn      = be32_to_cpu(cqe->g_mlpath_rqpn);
                wc->src_qp         = g_mlpath_rqpn & 0xffffff;
                wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
@@ -860,6 +860,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
                wc->wc_flags      |= mlx4_ib_ipoib_csum_ok(cqe->status,
                                        cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
                if (is_eth) {
+                       wc->slid = 0;
                        wc->sl  = be16_to_cpu(cqe->sl_vid) >> 13;
                        if (be32_to_cpu(cqe->vlan_my_qpn) &
                                        MLX4_CQE_CVLAN_PRESENT_MASK) {
@@ -871,6 +872,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
                        memcpy(wc->smac, cqe->smac, ETH_ALEN);
                        wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
                } else {
+                       wc->slid = be16_to_cpu(cqe->rlid);
                        wc->sl  = be16_to_cpu(cqe->sl_vid) >> 12;
                        wc->vlan_id = 0xffff;
                }
index 8d2ee9322f2e04448cfbdbf72112432f8554c963..5a0e4fc4785aa0164fa9d25c7a11a8bbda8f1da5 100644 (file)
@@ -219,8 +219,6 @@ static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
                        gid_tbl[i].version = 2;
                        if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
                                gid_tbl[i].type = 1;
-                       else
-                               memset(&gid_tbl[i].gid, 0, 12);
                }
        }
 
@@ -366,8 +364,13 @@ static int mlx4_ib_del_gid(struct ib_device *device,
                if (!gids) {
                        ret = -ENOMEM;
                } else {
-                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
-                               memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
+                               memcpy(&gids[i].gid,
+                                      &port_gid_table->gids[i].gid,
+                                      sizeof(union ib_gid));
+                               gids[i].gid_type =
+                                   port_gid_table->gids[i].gid_type;
+                       }
                }
        }
        spin_unlock_bh(&iboe->lock);
index bc6299697ddae9ff3152ac8338402d91cd92ac6e..d42b922bede87255be13a15158e54d0e8d12c844 100644 (file)
@@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND)   += mlx5_ib.o
 
 mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
index c4c7b82f4ac12ee23f63db99fac6346113d0eeca..94a27d89a303942141fdde0a6789348755951678 100644 (file)
@@ -221,7 +221,6 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
                wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
                break;
        }
-       wc->slid           = be16_to_cpu(cqe->slid);
        wc->src_qp         = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
        wc->dlid_path_bits = cqe->ml_path;
        g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
@@ -236,10 +235,12 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
        }
 
        if (ll != IB_LINK_LAYER_ETHERNET) {
+               wc->slid = be16_to_cpu(cqe->slid);
                wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
                return;
        }
 
+       wc->slid = 0;
        vlan_present = cqe->l4_l3_hdr_type & 0x1;
        roce_packet_type   = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
        if (vlan_present) {
@@ -1188,7 +1189,12 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
        if (ucmd.reserved0 || ucmd.reserved1)
                return -EINVAL;
 
-       umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+       /* check multiplication overflow */
+       if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
+               return -EINVAL;
+
+       umem = ib_umem_get(context, ucmd.buf_addr,
+                          (size_t)ucmd.cqe_size * entries,
                           IB_ACCESS_LOCAL_WRITE, 1);
        if (IS_ERR(umem)) {
                err = PTR_ERR(umem);
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644 (file)
index 0000000..0e04fdd
--- /dev/null
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+       STAGE_CREATE(MLX5_IB_STAGE_INIT,
+                    mlx5_ib_stage_init_init,
+                    mlx5_ib_stage_init_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+                    mlx5_ib_stage_rep_flow_db_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+                    mlx5_ib_stage_caps_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+                    mlx5_ib_stage_rep_non_default_cb,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+                    mlx5_ib_stage_rep_roce_init,
+                    mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+                    mlx5_ib_stage_dev_res_init,
+                    mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+                    mlx5_ib_stage_counters_init,
+                    mlx5_ib_stage_counters_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+                    mlx5_ib_stage_bfrag_init,
+                    mlx5_ib_stage_bfrag_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+                    NULL,
+                    mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+                    mlx5_ib_stage_ib_reg_init,
+                    mlx5_ib_stage_ib_reg_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+                    mlx5_ib_stage_post_ib_reg_umr_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+                    mlx5_ib_stage_class_attr_init,
+                    NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+       return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+       rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5_ib_dev *ibdev;
+
+       ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+       if (!ibdev)
+               return -ENOMEM;
+
+       ibdev->rep = rep;
+       ibdev->mdev = dev;
+       ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+                              MLX5_CAP_GEN(dev, num_vhca_ports));
+       if (!__mlx5_ib_add(ibdev, &rep_profile))
+               return -EINVAL;
+
+       rep->rep_if[REP_IB].priv = ibdev;
+
+       return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5_ib_dev *dev;
+
+       if (!rep->rep_if[REP_IB].priv)
+               return;
+
+       dev = mlx5_ib_rep_to_dev(rep);
+       __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+       rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+       return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+       int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+       int vport;
+
+       for (vport = 1; vport < total_vfs; vport++) {
+               struct mlx5_eswitch_rep_if rep_if = {};
+
+               rep_if.load = mlx5_ib_vport_rep_load;
+               rep_if.unload = mlx5_ib_vport_rep_unload;
+               rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+               mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+       }
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+       int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+       int vport;
+
+       for (vport = 1; vport < total_vfs; vport++)
+               mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+       struct mlx5_eswitch_rep_if rep_if = {};
+
+       rep_if.load = mlx5_ib_nic_rep_load;
+       rep_if.unload = mlx5_ib_nic_rep_unload;
+       rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+       rep_if.priv = dev;
+
+       mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+       mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+
+       mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+       mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+       return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+       return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+       return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                             struct mlx5_ib_sq *sq)
+{
+       struct mlx5_flow_handle *flow_rule;
+       struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+       if (!dev->rep)
+               return 0;
+
+       flow_rule =
+               mlx5_eswitch_add_send_to_vport_rule(esw,
+                                                   dev->rep->vport,
+                                                   sq->base.mqp.qpn);
+       if (IS_ERR(flow_rule))
+               return PTR_ERR(flow_rule);
+       sq->flow_rule = flow_rule;
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644 (file)
index 0000000..046fd94
--- /dev/null
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+                                         int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+                                          int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                             struct mlx5_ib_sq *sq);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+                                         int vport_index);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+       return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return NULL;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+       return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+                                          int vport_index)
+{
+       return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                                           struct mlx5_ib_sq *sq)
+{
+       return 0;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+                                         int vport_index)
+{
+       return NULL;
+}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+       return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
index 4236c80868200822d30cfc3d8adea4c97ce9afc2..390e4375647ee2c5d58e894f7e00972a10e09fb3 100644 (file)
@@ -57,7 +57,9 @@
 #include <linux/in.h>
 #include <linux/etherdevice.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 #include "cmd.h"
+#include <linux/mlx5/fs_helpers.h>
 
 #define DRIVER_NAME "mlx5_ib"
 #define DRIVER_VERSION "5.0-0"
@@ -130,7 +132,7 @@ static int get_port_state(struct ib_device *ibdev,
        int ret;
 
        memset(&attr, 0, sizeof(attr));
-       ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+       ret = ibdev->query_port(ibdev, port_num, &attr);
        if (!ret)
                *state = attr.state;
        return ret;
@@ -154,10 +156,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
        case NETDEV_REGISTER:
        case NETDEV_UNREGISTER:
                write_lock(&roce->netdev_lock);
-
-               if (ndev->dev.parent == &mdev->pdev->dev)
-                       roce->netdev = (event == NETDEV_UNREGISTER) ?
+               if (ibdev->rep) {
+                       struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+                       struct net_device *rep_ndev;
+
+                       rep_ndev = mlx5_ib_get_rep_netdev(esw,
+                                                         ibdev->rep->vport);
+                       if (rep_ndev == ndev)
+                               roce->netdev = (event == NETDEV_UNREGISTER) ?
                                        NULL : ndev;
+               } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+                       roce->netdev = (event == NETDEV_UNREGISTER) ?
+                               NULL : ndev;
+               }
                write_unlock(&roce->netdev_lock);
                break;
 
@@ -245,12 +256,16 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
        struct mlx5_ib_multiport_info *mpi;
        struct mlx5_ib_port *port;
 
+       if (!mlx5_core_mp_enabled(ibdev->mdev) ||
+           ll != IB_LINK_LAYER_ETHERNET) {
+               if (native_port_num)
+                       *native_port_num = ib_port_num;
+               return ibdev->mdev;
+       }
+
        if (native_port_num)
                *native_port_num = 1;
 
-       if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
-               return ibdev->mdev;
-
        port = &ibdev->port[ib_port_num - 1];
        if (!port)
                return NULL;
@@ -1268,6 +1283,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
        return ret;
 }
 
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+                                 struct ib_port_attr *props)
+{
+       int ret;
+
+       /* Only link layer == ethernet is valid for representors */
+       ret = mlx5_query_port_roce(ibdev, port, props);
+       if (ret || !props)
+               return ret;
+
+       /* We don't support GIDS */
+       props->gid_tbl_len = 0;
+
+       return ret;
+}
+
 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
                             union ib_gid *gid)
 {
@@ -2286,11 +2317,9 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
                   offsetof(typeof(filter), field) -\
                   sizeof(filter.field))
 
-#define IPV4_VERSION 4
-#define IPV6_VERSION 6
 static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
                           u32 *match_v, const union ib_flow_spec *ib_spec,
-                          u32 *tag_id, bool *is_drop)
+                          struct mlx5_flow_act *action)
 {
        void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                           misc_parameters);
@@ -2373,7 +2402,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                                 ip_version, 0xf);
                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-                                ip_version, IPV4_VERSION);
+                                ip_version, MLX5_FS_IPV4_VERSION);
                } else {
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                                 ethertype, 0xffff);
@@ -2412,7 +2441,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                                 ip_version, 0xf);
                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-                                ip_version, IPV6_VERSION);
+                                ip_version, MLX5_FS_IPV6_VERSION);
                } else {
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                                 ethertype, 0xffff);
@@ -2508,13 +2537,14 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
                if (ib_spec->flow_tag.tag_id >= BIT(24))
                        return -EINVAL;
 
-               *tag_id = ib_spec->flow_tag.tag_id;
+               action->flow_tag = ib_spec->flow_tag.tag_id;
+               action->has_flow_tag = true;
                break;
        case IB_FLOW_SPEC_ACTION_DROP:
                if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
                                         LAST_DROP_FIELD))
                        return -EOPNOTSUPP;
-               *is_drop = true;
+               action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
                break;
        default:
                return -EINVAL;
@@ -2631,7 +2661,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
                                                          ibflow);
        struct mlx5_ib_flow_handler *iter, *tmp;
 
-       mutex_lock(&dev->flow_db.lock);
+       mutex_lock(&dev->flow_db->lock);
 
        list_for_each_entry_safe(iter, tmp, &handler->list, list) {
                mlx5_del_flow_rules(iter->rule);
@@ -2642,7 +2672,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 
        mlx5_del_flow_rules(handler->rule);
        put_flow_table(dev, handler->prio, true);
-       mutex_unlock(&dev->flow_db.lock);
+       mutex_unlock(&dev->flow_db->lock);
 
        kfree(handler);
 
@@ -2691,7 +2721,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                             MLX5_FLOW_NAMESPACE_BYPASS);
                num_entries = MLX5_FS_MAX_ENTRIES;
                num_groups = MLX5_FS_MAX_TYPES;
-               prio = &dev->flow_db.prios[priority];
+               prio = &dev->flow_db->prios[priority];
        } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
                   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
                ns = mlx5_get_flow_namespace(dev->mdev,
@@ -2699,7 +2729,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                build_leftovers_ft_param(&priority,
                                         &num_entries,
                                         &num_groups);
-               prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+               prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
        } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
                if (!MLX5_CAP_FLOWTABLE(dev->mdev,
                                        allow_sniffer_and_nic_rx_shared_tir))
@@ -2709,7 +2739,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                             MLX5_FLOW_NAMESPACE_SNIFFER_RX :
                                             MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 
-               prio = &dev->flow_db.sniffer[ft_type];
+               prio = &dev->flow_db->sniffer[ft_type];
                priority = 0;
                num_entries = 1;
                num_groups = 1;
@@ -2767,13 +2797,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 {
        struct mlx5_flow_table  *ft = ft_prio->flow_table;
        struct mlx5_ib_flow_handler *handler;
-       struct mlx5_flow_act flow_act = {0};
+       struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
        struct mlx5_flow_spec *spec;
        struct mlx5_flow_destination *rule_dst = dst;
        const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
        unsigned int spec_index;
-       u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
-       bool is_drop = false;
        int err = 0;
        int dest_num = 1;
 
@@ -2792,7 +2820,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
        for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
                err = parse_flow_attr(dev->mdev, spec->match_criteria,
                                      spec->match_value,
-                                     ib_flow, &flow_tag, &is_drop);
+                                     ib_flow, &flow_act);
                if (err < 0)
                        goto free;
 
@@ -2802,9 +2830,20 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
        if (!flow_is_multicast_only(flow_attr))
                set_underlay_qp(dev, spec, underlay_qpn);
 
+       if (dev->rep) {
+               void *misc;
+
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters);
+               MLX5_SET(fte_match_set_misc, misc, source_port,
+                        dev->rep->vport);
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                   misc_parameters);
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       }
+
        spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-       if (is_drop) {
-               flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+       if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
                rule_dst = NULL;
                dest_num = 0;
        } else {
@@ -2812,15 +2851,14 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
                    MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
        }
 
-       if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+       if (flow_act.has_flow_tag &&
            (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
             flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
                mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
-                            flow_tag, flow_attr->type);
+                            flow_act.flow_tag, flow_attr->type);
                err = -EINVAL;
                goto free;
        }
-       flow_act.flow_tag = flow_tag;
        handler->rule = mlx5_add_flow_rules(ft, spec,
                                            &flow_act,
                                            rule_dst, dest_num);
@@ -2999,7 +3037,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
        if (!dst)
                return ERR_PTR(-ENOMEM);
 
-       mutex_lock(&dev->flow_db.lock);
+       mutex_lock(&dev->flow_db->lock);
 
        ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
        if (IS_ERR(ft_prio)) {
@@ -3048,7 +3086,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                goto destroy_ft;
        }
 
-       mutex_unlock(&dev->flow_db.lock);
+       mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
 
        return &handler->ibflow;
@@ -3058,7 +3096,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
        if (ft_prio_tx)
                put_flow_table(dev, ft_prio_tx, false);
 unlock:
-       mutex_unlock(&dev->flow_db.lock);
+       mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
        kfree(handler);
        return ERR_PTR(err);
@@ -3263,7 +3301,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
        struct mlx5_ib_dev *ibdev;
        struct ib_event ibev;
        bool fatal = false;
-       u8 port = 0;
+       u8 port = (u8)work->param;
 
        if (mlx5_core_is_mp_slave(work->dev)) {
                ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
@@ -3283,8 +3321,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
        case MLX5_DEV_EVENT_PORT_UP:
        case MLX5_DEV_EVENT_PORT_DOWN:
        case MLX5_DEV_EVENT_PORT_INITIALIZED:
-               port = (u8)work->param;
-
                /* In RoCE, port up/down events are handled in
                 * mlx5_netdev_event().
                 */
@@ -3298,24 +3334,19 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
 
        case MLX5_DEV_EVENT_LID_CHANGE:
                ibev.event = IB_EVENT_LID_CHANGE;
-               port = (u8)work->param;
                break;
 
        case MLX5_DEV_EVENT_PKEY_CHANGE:
                ibev.event = IB_EVENT_PKEY_CHANGE;
-               port = (u8)work->param;
-
                schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
                break;
 
        case MLX5_DEV_EVENT_GUID_CHANGE:
                ibev.event = IB_EVENT_GID_CHANGE;
-               port = (u8)work->param;
                break;
 
        case MLX5_DEV_EVENT_CLIENT_REREG:
                ibev.event = IB_EVENT_CLIENT_REREGISTER;
-               port = (u8)work->param;
                break;
        case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
                schedule_work(&ibdev->delay_drop.delay_drop_work);
@@ -3327,7 +3358,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
        ibev.device           = &ibdev->ib_dev;
        ibev.element.port_num = port;
 
-       if (port < 1 || port > ibdev->num_ports) {
+       if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
                mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
                goto out;
        }
@@ -3772,6 +3803,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
        return 0;
 }
 
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+                                  struct ib_port_immutable *immutable)
+{
+       struct ib_port_attr attr;
+       int err;
+
+       immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+       err = ib_query_port(ibdev, port_num, &attr);
+       if (err)
+               return err;
+
+       immutable->pkey_tbl_len = attr.pkey_tbl_len;
+       immutable->gid_tbl_len = attr.gid_tbl_len;
+       immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+       return 0;
+}
+
 static void get_dev_fw_str(struct ib_device *ibdev, char *str)
 {
        struct mlx5_ib_dev *dev =
@@ -3802,7 +3852,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
                goto err_destroy_vport_lag;
        }
 
-       dev->flow_db.lag_demux_ft = ft;
+       dev->flow_db->lag_demux_ft = ft;
        return 0;
 
 err_destroy_vport_lag:
@@ -3814,9 +3864,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
 
-       if (dev->flow_db.lag_demux_ft) {
-               mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
-               dev->flow_db.lag_demux_ft = NULL;
+       if (dev->flow_db->lag_demux_ft) {
+               mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+               dev->flow_db->lag_demux_ft = NULL;
 
                mlx5_cmd_destroy_vport_lag(mdev);
        }
@@ -3848,14 +3898,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
 {
        int err;
 
-       err = mlx5_add_netdev_notifier(dev, port_num);
-       if (err)
-               return err;
-
        if (MLX5_CAP_GEN(dev->mdev, roce)) {
                err = mlx5_nic_vport_enable_roce(dev->mdev);
                if (err)
-                       goto err_unregister_netdevice_notifier;
+                       return err;
        }
 
        err = mlx5_eth_lag_init(dev);
@@ -3868,8 +3914,6 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
        if (MLX5_CAP_GEN(dev->mdev, roce))
                mlx5_nic_vport_disable_roce(dev->mdev);
 
-err_unregister_netdevice_notifier:
-       mlx5_remove_netdev_notifier(dev, port_num);
        return err;
 }
 
@@ -4503,7 +4547,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
        mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
        mlx5_ib_cleanup_multiport_master(dev);
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -4512,7 +4556,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
        kfree(dev->port);
 }
 
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
        const char *name;
@@ -4534,8 +4578,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
                goto err_free_port;
 
        if (!mlx5_core_mp_enabled(mdev)) {
-               int i;
-
                for (i = 1; i <= dev->num_ports; i++) {
                        err = get_port_caps(dev, i);
                        if (err)
@@ -4564,7 +4606,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
                dev->mdev->priv.eq_table.num_comp_vectors;
        dev->ib_dev.dev.parent          = &mdev->pdev->dev;
 
-       mutex_init(&dev->flow_db.lock);
        mutex_init(&dev->cap_mask_mutex);
        INIT_LIST_HEAD(&dev->qp_list);
        spin_lock_init(&dev->reset_flow_resource_lock);
@@ -4585,7 +4626,38 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
        return -ENOMEM;
 }
 
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+       dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+       if (!dev->flow_db)
+               return -ENOMEM;
+
+       mutex_init(&dev->flow_db->lock);
+
+       return 0;
+}
+
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_ib_dev *nic_dev;
+
+       nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+       if (!nic_dev)
+               return -EINVAL;
+
+       dev->flow_db = nic_dev->flow_db;
+
+       return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+       kfree(dev->flow_db);
+}
+
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
        int err;
@@ -4626,7 +4698,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
                (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
 
        dev->ib_dev.query_device        = mlx5_ib_query_device;
-       dev->ib_dev.query_port          = mlx5_ib_query_port;
        dev->ib_dev.get_link_layer      = mlx5_ib_port_link_layer;
        dev->ib_dev.query_gid           = mlx5_ib_query_gid;
        dev->ib_dev.add_gid             = mlx5_ib_add_gid;
@@ -4669,7 +4740,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
        dev->ib_dev.alloc_mr            = mlx5_ib_alloc_mr;
        dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
        dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
-       dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
        dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
        dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
        if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
@@ -4720,6 +4790,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+       dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+       dev->ib_dev.query_port          = mlx5_ib_query_port;
+
+       return 0;
+}
+
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+       dev->ib_dev.get_port_immutable  = mlx5_port_rep_immutable;
+       dev->ib_dev.query_port          = mlx5_ib_rep_query_port;
+
+       return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+                                         u8 port_num)
+{
+       int i;
+
+       for (i = 0; i < dev->num_ports; i++) {
+               dev->roce[i].dev = dev;
+               dev->roce[i].native_port_num = i + 1;
+               dev->roce[i].last_port_state = IB_PORT_DOWN;
+       }
+
+       dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
+       dev->ib_dev.create_wq    = mlx5_ib_create_wq;
+       dev->ib_dev.modify_wq    = mlx5_ib_modify_wq;
+       dev->ib_dev.destroy_wq   = mlx5_ib_destroy_wq;
+       dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+       dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+       dev->ib_dev.uverbs_ex_cmd_mask |=
+                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+                       (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+       return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+       u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+       mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_core_dev *mdev = dev->mdev;
+       enum rdma_link_layer ll;
+       int port_type_cap;
+       int err = 0;
+       u8 port_num;
+
+       port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+       port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+       ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+       if (ll == IB_LINK_LAYER_ETHERNET)
+               err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+       return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
 static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_core_dev *mdev = dev->mdev;
@@ -4727,37 +4871,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
        int port_type_cap;
        u8 port_num;
        int err;
-       int i;
 
        port_num = mlx5_core_native_port_num(dev->mdev) - 1;
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
        if (ll == IB_LINK_LAYER_ETHERNET) {
-               for (i = 0; i < dev->num_ports; i++) {
-                       dev->roce[i].dev = dev;
-                       dev->roce[i].native_port_num = i + 1;
-                       dev->roce[i].last_port_state = IB_PORT_DOWN;
-               }
+               err = mlx5_ib_stage_common_roce_init(dev, port_num);
+               if (err)
+                       return err;
 
-               dev->ib_dev.get_netdev  = mlx5_ib_get_netdev;
-               dev->ib_dev.create_wq    = mlx5_ib_create_wq;
-               dev->ib_dev.modify_wq    = mlx5_ib_modify_wq;
-               dev->ib_dev.destroy_wq   = mlx5_ib_destroy_wq;
-               dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
-               dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
-               dev->ib_dev.uverbs_ex_cmd_mask |=
-                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
-                       (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
-                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
-                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
-                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
                err = mlx5_enable_eth(dev, port_num);
                if (err)
-                       return err;
+                       goto cleanup;
        }
 
        return 0;
+cleanup:
+       mlx5_ib_stage_common_roce_cleanup(dev);
+
+       return err;
 }
 
 static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
@@ -4773,16 +4906,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
 
        if (ll == IB_LINK_LAYER_ETHERNET) {
                mlx5_disable_eth(dev);
-               mlx5_remove_netdev_notifier(dev, port_num);
+               mlx5_ib_stage_common_roce_cleanup(dev);
        }
 }
 
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
 {
        return create_dev_resources(&dev->devr);
 }
 
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
 {
        destroy_dev_resources(&dev->devr);
 }
@@ -4794,7 +4927,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
        return mlx5_ib_odp_init_one(dev);
 }
 
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
                dev->ib_dev.get_hw_stats        = mlx5_ib_get_hw_stats;
@@ -4806,7 +4939,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
                mlx5_ib_dealloc_counters(dev);
@@ -4837,7 +4970,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
        mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
 }
 
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 {
        int err;
 
@@ -4852,30 +4985,30 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
        return err;
 }
 
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
 {
        mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
        mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 }
 
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
        return ib_register_device(&dev->ib_dev, NULL);
 }
 
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
 {
-       ib_unregister_device(&dev->ib_dev);
+       destroy_umrc_res(dev);
 }
 
-static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
 {
-       return create_umr_res(dev);
+       ib_unregister_device(&dev->ib_dev);
 }
 
-static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
 {
-       destroy_umrc_res(dev);
+       return create_umr_res(dev);
 }
 
 static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
@@ -4890,7 +5023,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
        cancel_delay_drop(dev);
 }
 
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
 {
        int err;
        int i;
@@ -4905,9 +5038,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
        return 0;
 }
 
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
-                            const struct mlx5_ib_profile *profile,
-                            int stage)
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_register_vport_reps(dev);
+
+       return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+       mlx5_ib_unregister_vport_reps(dev);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+                     const struct mlx5_ib_profile *profile,
+                     int stage)
 {
        /* Number of stages to cleanup */
        while (stage) {
@@ -4921,23 +5066,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
 
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
-                          const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+                   const struct mlx5_ib_profile *profile)
 {
-       struct mlx5_ib_dev *dev;
        int err;
        int i;
 
        printk_once(KERN_INFO "%s", mlx5_version);
 
-       dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
-       if (!dev)
-               return NULL;
-
-       dev->mdev = mdev;
-       dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
-                            MLX5_CAP_GEN(mdev, num_vhca_ports));
-
        for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
                if (profile->stage[i].init) {
                        err = profile->stage[i].init(dev);
@@ -4961,9 +5097,15 @@ static const struct mlx5_ib_profile pf_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_INIT,
                     mlx5_ib_stage_init_init,
                     mlx5_ib_stage_init_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+                    mlx5_ib_stage_flow_db_init,
+                    mlx5_ib_stage_flow_db_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_CAPS,
                     mlx5_ib_stage_caps_init,
                     NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+                    mlx5_ib_stage_non_default_cb,
+                    NULL),
        STAGE_CREATE(MLX5_IB_STAGE_ROCE,
                     mlx5_ib_stage_roce_init,
                     mlx5_ib_stage_roce_cleanup),
@@ -4985,12 +5127,15 @@ static const struct mlx5_ib_profile pf_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_BFREG,
                     mlx5_ib_stage_bfrag_init,
                     mlx5_ib_stage_bfrag_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+                    NULL,
+                    mlx5_ib_stage_pre_ib_reg_umr_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
                     mlx5_ib_stage_ib_reg_init,
                     mlx5_ib_stage_ib_reg_cleanup),
-       STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
-                    mlx5_ib_stage_umr_res_init,
-                    mlx5_ib_stage_umr_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+                    mlx5_ib_stage_post_ib_reg_umr_init,
+                    NULL),
        STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
                     mlx5_ib_stage_delay_drop_init,
                     mlx5_ib_stage_delay_drop_cleanup),
@@ -4999,6 +5144,51 @@ static const struct mlx5_ib_profile pf_profile = {
                     NULL),
 };
 
+static const struct mlx5_ib_profile nic_rep_profile = {
+       STAGE_CREATE(MLX5_IB_STAGE_INIT,
+                    mlx5_ib_stage_init_init,
+                    mlx5_ib_stage_init_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+                    mlx5_ib_stage_flow_db_init,
+                    mlx5_ib_stage_flow_db_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+                    mlx5_ib_stage_caps_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+                    mlx5_ib_stage_rep_non_default_cb,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+                    mlx5_ib_stage_rep_roce_init,
+                    mlx5_ib_stage_rep_roce_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+                    mlx5_ib_stage_dev_res_init,
+                    mlx5_ib_stage_dev_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+                    mlx5_ib_stage_counters_init,
+                    mlx5_ib_stage_counters_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_UAR,
+                    mlx5_ib_stage_uar_init,
+                    mlx5_ib_stage_uar_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+                    mlx5_ib_stage_bfrag_init,
+                    mlx5_ib_stage_bfrag_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+                    NULL,
+                    mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+                    mlx5_ib_stage_ib_reg_init,
+                    mlx5_ib_stage_ib_reg_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+                    mlx5_ib_stage_post_ib_reg_umr_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+                    mlx5_ib_stage_class_attr_init,
+                    NULL),
+       STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+                    mlx5_ib_stage_rep_reg_init,
+                    mlx5_ib_stage_rep_reg_cleanup),
+};
+
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 {
        struct mlx5_ib_multiport_info *mpi;
@@ -5044,8 +5234,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
        enum rdma_link_layer ll;
+       struct mlx5_ib_dev *dev;
        int port_type_cap;
 
+       printk_once(KERN_INFO "%s", mlx5_version);
+
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
@@ -5055,7 +5248,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                return mlx5_ib_add_slave_port(mdev, port_num);
        }
 
-       return __mlx5_ib_add(mdev, &pf_profile);
+       dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+       if (!dev)
+               return NULL;
+
+       dev->mdev = mdev;
+       dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+                            MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+       if (MLX5_VPORT_MANAGER(mdev) &&
+           mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+               dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+               return __mlx5_ib_add(dev, &nic_rep_profile);
+       }
+
+       return __mlx5_ib_add(dev, &pf_profile);
 }
 
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
index eafb9751daf61c13f537b8aa999a100767b85858..c33bf1523d677c2faadd983b91a5baa15c1b9cc0 100644 (file)
@@ -343,6 +343,7 @@ struct mlx5_ib_sq {
        struct mlx5_ib_wq       *sq;
        struct mlx5_ib_ubuffer  ubuffer;
        struct mlx5_db          *doorbell;
+       struct mlx5_flow_handle *flow_rule;
        u32                     tisn;
        u8                      state;
 };
@@ -731,7 +732,9 @@ struct mlx5_ib_delay_drop {
 
 enum mlx5_ib_stages {
        MLX5_IB_STAGE_INIT,
+       MLX5_IB_STAGE_FLOW_DB,
        MLX5_IB_STAGE_CAPS,
+       MLX5_IB_STAGE_NON_DEFAULT_CB,
        MLX5_IB_STAGE_ROCE,
        MLX5_IB_STAGE_DEVICE_RESOURCES,
        MLX5_IB_STAGE_ODP,
@@ -739,10 +742,12 @@ enum mlx5_ib_stages {
        MLX5_IB_STAGE_CONG_DEBUGFS,
        MLX5_IB_STAGE_UAR,
        MLX5_IB_STAGE_BFREG,
+       MLX5_IB_STAGE_PRE_IB_REG_UMR,
        MLX5_IB_STAGE_IB_REG,
-       MLX5_IB_STAGE_UMR_RESOURCES,
+       MLX5_IB_STAGE_POST_IB_REG_UMR,
        MLX5_IB_STAGE_DELAY_DROP,
        MLX5_IB_STAGE_CLASS_ATTR,
+       MLX5_IB_STAGE_REP_REG,
        MLX5_IB_STAGE_MAX,
 };
 
@@ -797,7 +802,7 @@ struct mlx5_ib_dev {
        struct srcu_struct      mr_srcu;
        u32                     null_mkey;
 #endif
-       struct mlx5_ib_flow_db  flow_db;
+       struct mlx5_ib_flow_db  *flow_db;
        /* protect resources needed as part of reset flow */
        spinlock_t              reset_flow_resource_lock;
        struct list_head        qp_list;
@@ -807,6 +812,7 @@ struct mlx5_ib_dev {
        struct mlx5_sq_bfreg    fp_bfreg;
        struct mlx5_ib_delay_drop       delay_drop;
        const struct mlx5_ib_profile    *profile;
+       struct mlx5_eswitch_rep         *rep;
 
        /* protect the user_td */
        struct mutex            lb_mutex;
@@ -1049,6 +1055,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+                     const struct mlx5_ib_profile *profile,
+                     int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+                   const struct mlx5_ib_profile *profile);
+
 int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
                          u8 port, struct ifla_vf_info *info);
 int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
index 556e015678de26809f45d74ac6357ecf9dcbf501..95a36e9ea552c209a0b57df83aa990663329cad4 100644 (file)
@@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 
 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 {
-       if (!mlx5_debugfs_root)
+       if (!mlx5_debugfs_root || dev->rep)
                return;
 
        debugfs_remove_recursive(dev->cache.root);
@@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
        struct mlx5_cache_ent *ent;
        int i;
 
-       if (!mlx5_debugfs_root)
+       if (!mlx5_debugfs_root || dev->rep)
                return 0;
 
        cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
@@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
                           MLX5_IB_UMR_OCTOWORD;
                ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
                if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+                   !dev->rep &&
                    mlx5_core_is_pf(dev->mdev))
                        ent->limit = dev->mdev->profile->mr_cache[i].limit;
                else
@@ -838,7 +839,8 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
        *umem = ib_umem_get(pd->uobject->context, start, length,
                            access_flags, 0);
        err = PTR_ERR_OR_ZERO(*umem);
-       if (err < 0) {
+       if (err) {
+               *umem = NULL;
                mlx5_ib_err(dev, "umem get failed (%d)\n", err);
                return err;
        }
@@ -1415,6 +1417,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                if (err) {
                        mlx5_ib_warn(dev, "Failed to rereg UMR\n");
                        ib_umem_release(mr->umem);
+                       mr->umem = NULL;
                        clean_mr(dev, mr);
                        return err;
                }
@@ -1498,14 +1501,11 @@ static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
                u32 key = mr->mmkey.key;
 
                err = destroy_mkey(dev, mr);
-               kfree(mr);
                if (err) {
                        mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
                                     key, err);
                        return err;
                }
-       } else {
-               mlx5_mr_cache_free(dev, mr);
        }
 
        return 0;
@@ -1548,6 +1548,11 @@ static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
                atomic_sub(npages, &dev->mdev->priv.reg_pages);
        }
 
+       if (!mr->allocated_from_cache)
+               kfree(mr);
+       else
+               mlx5_mr_cache_free(dev, mr);
+
        return 0;
 }
 
@@ -1816,7 +1821,6 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
 
        mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
        mr->ibmr.length = 0;
-       mr->ndescs = sg_nents;
 
        for_each_sg(sgl, sg, sg_nents, i) {
                if (unlikely(i >= mr->max_descs))
@@ -1828,6 +1832,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
 
                sg_offset = 0;
        }
+       mr->ndescs = i;
 
        if (sg_offset_p)
                *sg_offset_p = sg_offset;
index 39d24bf694a864788f2d3dc806c3019bb4b12b8b..85c612ac547a4ca4a221892483c3cb474743b62b 100644 (file)
@@ -36,6 +36,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 
 /* not supported currently */
 static int wq_signature;
@@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
        mlx5_core_destroy_tis(dev->mdev, sq->tisn);
 }
 
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+                                      struct mlx5_ib_sq *sq)
+{
+       if (sq->flow_rule)
+               mlx5_del_flow_rules(sq->flow_rule);
+}
+
 static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
                                   struct mlx5_ib_sq *sq, void *qpin,
                                   struct ib_pd *pd)
@@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
        if (err)
                goto err_umem;
 
+       err = create_flow_rule_vport_sq(dev, sq);
+       if (err)
+               goto err_flow;
+
        return 0;
 
+err_flow:
+       mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
 err_umem:
        ib_umem_release(sq->ubuffer.umem);
        sq->ubuffer.umem = NULL;
@@ -1157,11 +1172,12 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
                                     struct mlx5_ib_sq *sq)
 {
+       destroy_flow_rule_vport_sq(dev, sq);
        mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
        ib_umem_release(sq->ubuffer.umem);
 }
 
-static int get_rq_pas_size(void *qpc)
+static size_t get_rq_pas_size(void *qpc)
 {
        u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
        u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
@@ -1177,7 +1193,8 @@ static int get_rq_pas_size(void *qpc)
 }
 
 static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
-                                  struct mlx5_ib_rq *rq, void *qpin)
+                                  struct mlx5_ib_rq *rq, void *qpin,
+                                  size_t qpinlen)
 {
        struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
        __be64 *pas;
@@ -1186,9 +1203,12 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
        void *rqc;
        void *wq;
        void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
-       int inlen;
+       size_t rq_pas_size = get_rq_pas_size(qpc);
+       size_t inlen;
        int err;
-       u32 rq_pas_size = get_rq_pas_size(qpc);
+
+       if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas))
+               return -EINVAL;
 
        inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
        in = kvzalloc(inlen, GFP_KERNEL);
@@ -1263,6 +1283,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
        if (tunnel_offload_en)
                MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
 
+       if (dev->rep)
+               MLX5_SET(tirc, tirc, self_lb_block,
+                        MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
        err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
 
        kvfree(in);
@@ -1277,7 +1301,7 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 }
 
 static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-                               u32 *in,
+                               u32 *in, size_t inlen,
                                struct ib_pd *pd)
 {
        struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
@@ -1309,7 +1333,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
                        rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
                if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING)
                        rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
-               err = create_raw_packet_qp_rq(dev, rq, in);
+               err = create_raw_packet_qp_rq(dev, rq, in, inlen);
                if (err)
                        goto err_destroy_sq;
 
@@ -1554,6 +1578,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
        MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
 
 create_tir:
+       if (dev->rep)
+               MLX5_SET(tirc, tirc, self_lb_block,
+                        MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
        err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
 
        if (err)
@@ -1584,6 +1612,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        u32 uidx = MLX5_IB_DEFAULT_UIDX;
        struct mlx5_ib_create_qp ucmd;
        struct mlx5_ib_qp_base *base;
+       int mlx5_st;
        void *qpc;
        u32 *in;
        int err;
@@ -1592,6 +1621,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        spin_lock_init(&qp->sq.lock);
        spin_lock_init(&qp->rq.lock);
 
+       mlx5_st = to_mlx5_st(init_attr->qp_type);
+       if (mlx5_st < 0)
+               return -EINVAL;
+
        if (init_attr->rwq_ind_tbl) {
                if (!udata)
                        return -ENOSYS;
@@ -1753,7 +1786,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 
-       MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
+       MLX5_SET(qpc, qpc, st, mlx5_st);
        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 
        if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
@@ -1867,11 +1900,16 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                }
        }
 
+       if (inlen < 0) {
+               err = -EINVAL;
+               goto err;
+       }
+
        if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
            qp->flags & MLX5_IB_QP_UNDERLAY) {
                qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
                raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
-               err = create_raw_packet_qp(dev, qp, in, pd);
+               err = create_raw_packet_qp(dev, qp, in, inlen, pd);
        } else {
                err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
        }
@@ -2129,7 +2167,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
                                        struct ib_qp_init_attr *attr,
                                        struct mlx5_ib_create_qp *ucmd)
 {
-       struct mlx5_ib_dev *dev;
        struct mlx5_ib_qp *qp;
        int err = 0;
        u32 uidx = MLX5_IB_DEFAULT_UIDX;
@@ -2138,8 +2175,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
        if (!attr->srq || !attr->recv_cq)
                return ERR_PTR(-EINVAL);
 
-       dev = to_mdev(pd->device);
-
        err = get_qp_user_index(to_mucontext(pd->uobject->context),
                                ucmd, sizeof(*ucmd), &uidx);
        if (err)
@@ -3095,8 +3130,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                goto out;
 
        if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
-           !optab[mlx5_cur][mlx5_new])
+           !optab[mlx5_cur][mlx5_new]) {
+               err = -EINVAL;
                goto out;
+       }
 
        op = optab[mlx5_cur][mlx5_new];
        optpar = ib_mask_to_mlx5_opt(attr_mask);
index 6d5fadad909081d17b80c967cbe69b8c381e6bd8..3c7522d025f2b559f638fd2bde05200702488dde 100644 (file)
@@ -241,8 +241,8 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
        struct mlx5_ib_srq *srq;
-       int desc_size;
-       int buf_size;
+       size_t desc_size;
+       size_t buf_size;
        int err;
        struct mlx5_srq_attr in = {0};
        __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
@@ -266,15 +266,18 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 
        desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
                    srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
+       if (desc_size == 0 || srq->msrq.max_gs > desc_size)
+               return ERR_PTR(-EINVAL);
        desc_size = roundup_pow_of_two(desc_size);
-       desc_size = max_t(int, 32, desc_size);
+       desc_size = max_t(size_t, 32, desc_size);
+       if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg))
+               return ERR_PTR(-EINVAL);
        srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
                sizeof(struct mlx5_wqe_data_seg);
        srq->msrq.wqe_shift = ilog2(desc_size);
        buf_size = srq->msrq.max * desc_size;
-       mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
-                   desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
-                   srq->msrq.max_avail_gather);
+       if (buf_size < desc_size)
+               return ERR_PTR(-EINVAL);
        in.type = init_attr->srq_type;
 
        if (pd->uobject)
index 478b7317b80ab49fa281cb80621efb29ccde5e9a..26dc374787f74843fd0326ca87b37a312e422c3d 100644 (file)
@@ -458,8 +458,7 @@ qedr_addr6_resolve(struct qedr_dev *dev,
                }
                return -EINVAL;
        }
-       neigh = dst_neigh_lookup(dst, &dst_in);
-
+       neigh = dst_neigh_lookup(dst, &fl6.daddr);
        if (neigh) {
                rcu_read_lock();
                if (neigh->nud_state & NUD_VALID) {
@@ -494,10 +493,14 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        qp = idr_find(&dev->qpidr, conn_param->qpn);
 
-       laddr = (struct sockaddr_in *)&cm_id->local_addr;
-       raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-       laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
-       raddr6 = (struct sockaddr_in6 *)&cm_id->remote_addr;
+       laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+       raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+       laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+       raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+       DP_DEBUG(dev, QEDR_MSG_IWARP, "MAPPED %d %d\n",
+                ntohs(((struct sockaddr_in *)&cm_id->remote_addr)->sin_port),
+                ntohs(raddr->sin_port));
 
        DP_DEBUG(dev, QEDR_MSG_IWARP,
                 "Connect source address: %pISpc, remote address: %pISpc\n",
@@ -599,8 +602,8 @@ int qedr_iw_create_listen(struct iw_cm_id *cm_id, int backlog)
        int rc;
        int i;
 
-       laddr = (struct sockaddr_in *)&cm_id->local_addr;
-       laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
+       laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+       laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
 
        DP_DEBUG(dev, QEDR_MSG_IWARP,
                 "Create Listener address: %pISpc\n", &cm_id->local_addr);
index 53f00dbf313f757941d32451ae23e62305f9cf53..875b17272d65289d2cfa826f3635683831e19886 100644 (file)
@@ -3034,6 +3034,11 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
        switch (wr->opcode) {
        case IB_WR_SEND_WITH_IMM:
+               if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+                       rc = -EINVAL;
+                       *bad_wr = wr;
+                       break;
+               }
                wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
                swqe = (struct rdma_sq_send_wqe_1st *)wqe;
                swqe->wqe_size = 2;
@@ -3075,6 +3080,11 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                break;
 
        case IB_WR_RDMA_WRITE_WITH_IMM:
+               if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+                       rc = -EINVAL;
+                       *bad_wr = wr;
+                       break;
+               }
                wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
                rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
 
@@ -3724,7 +3734,7 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 {
        struct qedr_dev *dev = get_qedr_dev(ibcq->device);
        struct qedr_cq *cq = get_qedr_cq(ibcq);
-       union rdma_cqe *cqe = cq->latest_cqe;
+       union rdma_cqe *cqe;
        u32 old_cons, new_cons;
        unsigned long flags;
        int update = 0;
@@ -3741,6 +3751,7 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
                return qedr_gsi_poll_cq(ibcq, num_entries, wc);
 
        spin_lock_irqsave(&cq->cq_lock, flags);
+       cqe = cq->latest_cqe;
        old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
        while (num_entries && is_valid_cqe(cq, cqe)) {
                struct qedr_qp *qp;
index 1b2e5362a3ffeaeb3813707ed37d2ddb386c2f0d..cc429b567d0a4d1a10ee97a61f71b9f59c0d4561 100644 (file)
@@ -489,11 +489,13 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
        unsigned long timeout;
        struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
 
-       if (percpu_ref_is_zero(&mr->refcount))
-               return 0;
-       /* avoid dma mr */
-       if (mr->lkey)
+       if (mr->lkey) {
+               /* avoid dma mr */
                rvt_dereg_clean_qps(mr);
+               /* @mr was indexed on rcu protected @lkey_table */
+               synchronize_rcu();
+       }
+
        timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
        if (!timeout) {
                rvt_pr_err(rdi,
index 1f316d66e6f71eaa232b295c9ba161be4c6a5a9d..41614c185918259d54546a50125601270f3852d7 100644 (file)
@@ -218,8 +218,10 @@ static void matrix_keypad_stop(struct input_dev *dev)
 {
        struct matrix_keypad *keypad = input_get_drvdata(dev);
 
+       spin_lock_irq(&keypad->lock);
        keypad->stopped = true;
-       mb();
+       spin_unlock_irq(&keypad->lock);
+
        flush_work(&keypad->work.work);
        /*
         * matrix_keypad_scan() will leave IRQs enabled;
index 3d2e23a0ae39dd6b9475db31ce2b30cc930ddc93..a246fc686bb728dbe48b2fc84b90a1734af60c66 100644 (file)
@@ -173,7 +173,6 @@ static const char * const smbus_pnp_ids[] = {
        "LEN0046", /* X250 */
        "LEN004a", /* W541 */
        "LEN200f", /* T450s */
-       "LEN2018", /* T460p */
        NULL
 };
 
index db4f6bb502e3fecd59607795e98ec68498641359..a5ab774da4cccceb584f94f41c50a68d990b72c7 100644 (file)
@@ -1,11 +1,8 @@
-/*
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- * Author: Joonyoung Shim <jy0922.shim@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
+// Melfas MMS114/MMS152 touchscreen device driver
+//
+// Copyright (c) 2012 Samsung Electronics Co., Ltd.
+// Author: Joonyoung Shim <jy0922.shim@samsung.com>
 
 #include <linux/module.h>
 #include <linux/delay.h>
@@ -624,4 +621,4 @@ module_i2c_driver(mms114_driver);
 /* Module information */
 MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
 MODULE_DESCRIPTION("MELFAS mms114 Touchscreen driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index 35a408d0ae4fae9e4ea33e184dfc60a6d0ddc0b4..99bc9bd64b9ecc1de640cba1432a750db6e9491f 100644 (file)
@@ -205,7 +205,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
                         * for example, an "address" value of 0x12345f000 will
                         * flush from 0x123440000 to 0x12347ffff (256KiB). */
                        unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
-                       unsigned long mask = __rounddown_pow_of_two(address ^ last);;
+                       unsigned long mask = __rounddown_pow_of_two(address ^ last);
 
                        desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
                } else {
index 1d3056f5374721f794b5794baa678cb381cccc04..2cbb19cddbf8e06c1d08aef5fb1926131238fe31 100644 (file)
@@ -1412,7 +1412,7 @@ static struct irq_chip its_irq_chip = {
  * This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations.
  */
 #define IRQS_PER_CHUNK_SHIFT   5
-#define IRQS_PER_CHUNK         (1 << IRQS_PER_CHUNK_SHIFT)
+#define IRQS_PER_CHUNK         (1UL << IRQS_PER_CHUNK_SHIFT)
 #define ITS_MAX_LPI_NRBITS     16 /* 64K LPIs */
 
 static unsigned long *lpi_bitmap;
@@ -2119,11 +2119,10 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        /*
-        * At least one bit of EventID is being used, hence a minimum
-        * of two entries. No, the architecture doesn't let you
-        * express an ITT with a single entry.
+        * We allocate at least one chunk worth of LPIs bet device,
+        * and thus that many ITEs. The device may require less though.
         */
-       nr_ites = max(2UL, roundup_pow_of_two(nvecs));
+       nr_ites = max(IRQS_PER_CHUNK, roundup_pow_of_two(nvecs));
        sz = nr_ites * its->ite_size;
        sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
        itt = kzalloc(sz, GFP_KERNEL);
@@ -2495,7 +2494,7 @@ static int its_vpe_set_affinity(struct irq_data *d,
 
 static void its_vpe_schedule(struct its_vpe *vpe)
 {
-       void * __iomem vlpi_base = gic_data_rdist_vlpi_base();
+       void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
        u64 val;
 
        /* Schedule the VPE */
@@ -2527,7 +2526,7 @@ static void its_vpe_schedule(struct its_vpe *vpe)
 
 static void its_vpe_deschedule(struct its_vpe *vpe)
 {
-       void * __iomem vlpi_base = gic_data_rdist_vlpi_base();
+       void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
        u32 count = 1000000;    /* 1s! */
        bool clean;
        u64 val;
index 675eda5ff2b85f6037eaea58d49a3d48a9e2c33d..4760307ab43fc33404b6b2ec07b2c3b49a6f6405 100644 (file)
@@ -28,20 +28,6 @@ struct gpcv2_irqchip_data {
 
 static struct gpcv2_irqchip_data *imx_gpcv2_instance;
 
-/*
- * Interface for the low level wakeup code.
- */
-u32 imx_gpcv2_get_wakeup_source(u32 **sources)
-{
-       if (!imx_gpcv2_instance)
-               return 0;
-
-       if (sources)
-               *sources = imx_gpcv2_instance->wakeup_sources;
-
-       return IMR_NUM;
-}
-
 static int gpcv2_wakeup_source_save(void)
 {
        struct gpcv2_irqchip_data *cd;
index 1a46b41dac7018bbf43e1dc610abeacab21cb7a1..6422846b546ed27122dc277bd65568421e6986fc 100644 (file)
@@ -659,11 +659,11 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio)
 static void search_free(struct closure *cl)
 {
        struct search *s = container_of(cl, struct search, cl);
-       bio_complete(s);
 
        if (s->iop.bio)
                bio_put(s->iop.bio);
 
+       bio_complete(s);
        closure_debug_destroy(cl);
        mempool_free(s, s->d->c->search);
 }
index 312895788036705cb0426d1af5b5662a4797faf3..f2273143b3cb2384109ba47bf3518adf31337ad8 100644 (file)
@@ -963,6 +963,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
        uint32_t rtime = cpu_to_le32(get_seconds());
        struct uuid_entry *u;
        char buf[BDEVNAME_SIZE];
+       struct cached_dev *exist_dc, *t;
 
        bdevname(dc->bdev, buf);
 
@@ -987,6 +988,16 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
                return -EINVAL;
        }
 
+       /* Check whether already attached */
+       list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
+               if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
+                       pr_err("Tried to attach %s but duplicate UUID already attached",
+                               buf);
+
+                       return -EINVAL;
+               }
+       }
+
        u = uuid_find(c, dc->sb.uuid);
 
        if (u &&
@@ -1204,7 +1215,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
 
        return;
 err:
-       pr_notice("error opening %s: %s", bdevname(bdev, name), err);
+       pr_notice("error %s: %s", bdevname(bdev, name), err);
        bcache_device_stop(&dc->disk);
 }
 
@@ -1274,7 +1285,7 @@ static int flash_devs_run(struct cache_set *c)
        struct uuid_entry *u;
 
        for (u = c->uuids;
-            u < c->uuids + c->devices_max_used && !ret;
+            u < c->uuids + c->nr_uuids && !ret;
             u++)
                if (UUID_FLASH_ONLY(u))
                        ret = flash_dev_run(c, u);
@@ -1883,6 +1894,8 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
        const char *err = NULL; /* must be set for any error case */
        int ret = 0;
 
+       bdevname(bdev, name);
+
        memcpy(&ca->sb, sb, sizeof(struct cache_sb));
        ca->bdev = bdev;
        ca->bdev->bd_holder = ca;
@@ -1891,11 +1904,12 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
        bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page;
        get_page(sb_page);
 
-       if (blk_queue_discard(bdev_get_queue(ca->bdev)))
+       if (blk_queue_discard(bdev_get_queue(bdev)))
                ca->discard = CACHE_DISCARD(&ca->sb);
 
        ret = cache_alloc(ca);
        if (ret != 0) {
+               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
                if (ret == -ENOMEM)
                        err = "cache_alloc(): -ENOMEM";
                else
@@ -1918,14 +1932,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
                goto out;
        }
 
-       pr_info("registered cache device %s", bdevname(bdev, name));
+       pr_info("registered cache device %s", name);
 
 out:
        kobject_put(&ca->kobj);
 
 err:
        if (err)
-               pr_notice("error opening %s: %s", bdevname(bdev, name), err);
+               pr_notice("error %s: %s", name, err);
 
        return ret;
 }
@@ -2014,6 +2028,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
        if (err)
                goto err_close;
 
+       err = "failed to register device";
        if (SB_IS_BDEV(sb)) {
                struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
                if (!dc)
@@ -2028,7 +2043,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
                        goto err_close;
 
                if (register_cache(sb, sb_page, bdev, ca) != 0)
-                       goto err_close;
+                       goto err;
        }
 out:
        if (sb_page)
@@ -2041,7 +2056,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 err_close:
        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 err:
-       pr_info("error opening %s: %s", path, err);
+       pr_info("error %s: %s", path, err);
        ret = -EINVAL;
        goto out;
 }
index 414c9af54ded2fde89531cedf3da430c53fcac9d..aa2032fa80d49eebaccf937a4a9a5182d91dadc9 100644 (file)
@@ -386,9 +386,6 @@ static void __cache_size_refresh(void)
 static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
                               enum data_mode *data_mode)
 {
-       unsigned noio_flag;
-       void *ptr;
-
        if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
                *data_mode = DATA_MODE_SLAB;
                return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
@@ -412,16 +409,15 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
         * all allocations done by this process (including pagetables) are done
         * as if GFP_NOIO was specified.
         */
+       if (gfp_mask & __GFP_NORETRY) {
+               unsigned noio_flag = memalloc_noio_save();
+               void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
 
-       if (gfp_mask & __GFP_NORETRY)
-               noio_flag = memalloc_noio_save();
-
-       ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
-
-       if (gfp_mask & __GFP_NORETRY)
                memalloc_noio_restore(noio_flag);
+               return ptr;
+       }
 
-       return ptr;
+       return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
 }
 
 /*
index 7d3e572072f51f865d07a79ae5a0ac974bd98e4a..a05a560d3cba628eb8a5ed23e07a753c1efb0acd 100644 (file)
@@ -211,29 +211,27 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
                else
                        m->queue_mode = DM_TYPE_REQUEST_BASED;
 
-       } else if (m->queue_mode == DM_TYPE_BIO_BASED ||
-                  m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+       } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
                INIT_WORK(&m->process_queued_bios, process_queued_bios);
-
-               if (m->queue_mode == DM_TYPE_BIO_BASED) {
-                       /*
-                        * bio-based doesn't support any direct scsi_dh management;
-                        * it just discovers if a scsi_dh is attached.
-                        */
-                       set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
-               }
-       }
-
-       if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
-               set_bit(MPATHF_QUEUE_IO, &m->flags);
-               atomic_set(&m->pg_init_in_progress, 0);
-               atomic_set(&m->pg_init_count, 0);
-               m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
-               init_waitqueue_head(&m->pg_init_wait);
+               /*
+                * bio-based doesn't support any direct scsi_dh management;
+                * it just discovers if a scsi_dh is attached.
+                */
+               set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
        }
 
        dm_table_set_type(ti->table, m->queue_mode);
 
+       /*
+        * Init fields that are only used when a scsi_dh is attached
+        * - must do this unconditionally (really doesn't hurt non-SCSI uses)
+        */
+       set_bit(MPATHF_QUEUE_IO, &m->flags);
+       atomic_set(&m->pg_init_in_progress, 0);
+       atomic_set(&m->pg_init_count, 0);
+       m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
+       init_waitqueue_head(&m->pg_init_wait);
+
        return 0;
 }
 
@@ -337,9 +335,6 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg)
 {
        m->current_pg = pg;
 
-       if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
-               return;
-
        /* Must we initialise the PG first, and queue I/O till it's ready? */
        if (m->hw_handler_name) {
                set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
@@ -385,8 +380,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
        unsigned bypassed = 1;
 
        if (!atomic_read(&m->nr_valid_paths)) {
-               if (m->queue_mode != DM_TYPE_NVME_BIO_BASED)
-                       clear_bit(MPATHF_QUEUE_IO, &m->flags);
+               clear_bit(MPATHF_QUEUE_IO, &m->flags);
                goto failed;
        }
 
@@ -599,7 +593,7 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
        return pgpath;
 }
 
-static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio)
+static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
 {
        struct pgpath *pgpath;
        unsigned long flags;
@@ -634,8 +628,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
 {
        struct pgpath *pgpath;
 
-       if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
-               pgpath = __map_bio_nvme(m, bio);
+       if (!m->hw_handler_name)
+               pgpath = __map_bio_fast(m, bio);
        else
                pgpath = __map_bio(m, bio);
 
@@ -675,8 +669,7 @@ static void process_queued_io_list(struct multipath *m)
 {
        if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
                dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
-       else if (m->queue_mode == DM_TYPE_BIO_BASED ||
-                m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+       else if (m->queue_mode == DM_TYPE_BIO_BASED)
                queue_work(kmultipathd, &m->process_queued_bios);
 }
 
@@ -811,15 +804,14 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
        return 0;
 }
 
-static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, char **error)
+static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
+                        const char *attached_handler_name, char **error)
 {
        struct request_queue *q = bdev_get_queue(bdev);
-       const char *attached_handler_name;
        int r;
 
        if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
 retain:
-               attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
                if (attached_handler_name) {
                        /*
                         * Clear any hw_handler_params associated with a
@@ -873,6 +865,8 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
        int r;
        struct pgpath *p;
        struct multipath *m = ti->private;
+       struct request_queue *q;
+       const char *attached_handler_name;
 
        /* we need at least a path arg */
        if (as->argc < 1) {
@@ -891,9 +885,11 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
                goto bad;
        }
 
-       if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
+       q = bdev_get_queue(p->path.dev->bdev);
+       attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
+       if (attached_handler_name) {
                INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
-               r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error);
+               r = setup_scsi_dh(p->path.dev->bdev, m, attached_handler_name, &ti->error);
                if (r) {
                        dm_put_device(ti, p->path.dev);
                        goto bad;
@@ -1001,8 +997,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
        if (!hw_argc)
                return 0;
 
-       if (m->queue_mode == DM_TYPE_BIO_BASED ||
-           m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+       if (m->queue_mode == DM_TYPE_BIO_BASED) {
                dm_consume_args(as, hw_argc);
                DMERR("bio-based multipath doesn't allow hardware handler args");
                return 0;
@@ -1091,8 +1086,6 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 
                        if (!strcasecmp(queue_mode_name, "bio"))
                                m->queue_mode = DM_TYPE_BIO_BASED;
-                       else if (!strcasecmp(queue_mode_name, "nvme"))
-                               m->queue_mode = DM_TYPE_NVME_BIO_BASED;
                        else if (!strcasecmp(queue_mode_name, "rq"))
                                m->queue_mode = DM_TYPE_REQUEST_BASED;
                        else if (!strcasecmp(queue_mode_name, "mq"))
@@ -1193,7 +1186,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
        ti->num_discard_bios = 1;
        ti->num_write_same_bios = 1;
        ti->num_write_zeroes_bios = 1;
-       if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+       if (m->queue_mode == DM_TYPE_BIO_BASED)
                ti->per_io_data_size = multipath_per_bio_data_size();
        else
                ti->per_io_data_size = sizeof(struct dm_mpath_io);
@@ -1730,9 +1723,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
                        case DM_TYPE_BIO_BASED:
                                DMEMIT("queue_mode bio ");
                                break;
-                       case DM_TYPE_NVME_BIO_BASED:
-                               DMEMIT("queue_mode nvme ");
-                               break;
                        case DM_TYPE_MQ_REQUEST_BASED:
                                DMEMIT("queue_mode mq ");
                                break;
@@ -2030,8 +2020,9 @@ static int multipath_busy(struct dm_target *ti)
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
        .name = "multipath",
-       .version = {1, 12, 0},
-       .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
+       .version = {1, 13, 0},
+       .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
+                   DM_TARGET_PASSES_INTEGRITY,
        .module = THIS_MODULE,
        .ctr = multipath_ctr,
        .dtr = multipath_dtr,
index 7ef469e902c620126b95f69d899e528ae114b3bc..c1d1034ff7b75eb740d40920615c0ae10a308c43 100644 (file)
@@ -3408,9 +3408,10 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
                set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
 
        } else {
-               if (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
-                   test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
-                   test_bit(MD_RECOVERY_RUNNING, &recovery))
+               if (!test_bit(MD_RECOVERY_INTR, &recovery) &&
+                   (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
+                    test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
+                    test_bit(MD_RECOVERY_RUNNING, &recovery)))
                        r = mddev->curr_resync_completed;
                else
                        r = mddev->recovery_cp;
index 5fe7ec356c333c940edb41e61f0e5844de0aea72..7eb3e2a3c07d5a607669d36aa0a462bfd175d9b3 100644 (file)
@@ -942,17 +942,12 @@ static int dm_table_determine_type(struct dm_table *t)
 
        if (t->type != DM_TYPE_NONE) {
                /* target already set the table's type */
-               if (t->type == DM_TYPE_BIO_BASED)
-                       return 0;
-               else if (t->type == DM_TYPE_NVME_BIO_BASED) {
-                       if (!dm_table_does_not_support_partial_completion(t)) {
-                               DMERR("nvme bio-based is only possible with devices"
-                                     " that don't support partial completion");
-                               return -EINVAL;
-                       }
-                       /* Fallthru, also verify all devices are blk-mq */
+               if (t->type == DM_TYPE_BIO_BASED) {
+                       /* possibly upgrade to a variant of bio-based */
+                       goto verify_bio_based;
                }
                BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
+               BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED);
                goto verify_rq_based;
        }
 
@@ -985,6 +980,7 @@ static int dm_table_determine_type(struct dm_table *t)
        }
 
        if (bio_based) {
+verify_bio_based:
                /* We must use this table as bio-based */
                t->type = DM_TYPE_BIO_BASED;
                if (dm_table_supports_dax(t) ||
@@ -1755,7 +1751,7 @@ static int device_no_partial_completion(struct dm_target *ti, struct dm_dev *dev
        char b[BDEVNAME_SIZE];
 
        /* For now, NVMe devices are the only devices of this class */
-       return (strncmp(bdevname(dev->bdev, b), "nvme", 3) == 0);
+       return (strncmp(bdevname(dev->bdev, b), "nvme", 4) == 0);
 }
 
 static bool dm_table_does_not_support_partial_completion(struct dm_table *t)
index 68136806d365821f63ace7675ce21ba2bf10ed8d..45328d8b2859640d04a01b6b7b14cd9990fa2b7e 100644 (file)
@@ -458,9 +458,11 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
        return dm_get_geometry(md, geo);
 }
 
-static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
-                                 struct block_device **bdev,
-                                 fmode_t *mode)
+static char *_dm_claim_ptr = "I belong to device-mapper";
+
+static int dm_get_bdev_for_ioctl(struct mapped_device *md,
+                                struct block_device **bdev,
+                                fmode_t *mode)
 {
        struct dm_target *tgt;
        struct dm_table *map;
@@ -490,6 +492,10 @@ static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
                goto out;
 
        bdgrab(*bdev);
+       r = blkdev_get(*bdev, *mode, _dm_claim_ptr);
+       if (r < 0)
+               goto out;
+
        dm_put_live_table(md, srcu_idx);
        return r;
 
@@ -508,7 +514,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
        struct mapped_device *md = bdev->bd_disk->private_data;
        int r;
 
-       r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+       r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
        if (r < 0)
                return r;
 
@@ -528,7 +534,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 
        r =  __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 out:
-       bdput(bdev);
+       blkdev_put(bdev, mode);
        return r;
 }
 
@@ -708,14 +714,13 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
 static int open_table_device(struct table_device *td, dev_t dev,
                             struct mapped_device *md)
 {
-       static char *_claim_ptr = "I belong to device-mapper";
        struct block_device *bdev;
 
        int r;
 
        BUG_ON(td->dm_dev.bdev);
 
-       bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
+       bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
        if (IS_ERR(bdev))
                return PTR_ERR(bdev);
 
@@ -3011,7 +3016,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
        fmode_t mode;
        int r;
 
-       r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+       r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
        if (r < 0)
                return r;
 
@@ -3021,7 +3026,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
        else
                r = -EOPNOTSUPP;
 
-       bdput(bdev);
+       blkdev_put(bdev, mode);
        return r;
 }
 
@@ -3032,7 +3037,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
        fmode_t mode;
        int r;
 
-       r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+       r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
        if (r < 0)
                return r;
 
@@ -3042,7 +3047,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
        else
                r = -EOPNOTSUPP;
 
-       bdput(bdev);
+       blkdev_put(bdev, mode);
        return r;
 }
 
@@ -3054,7 +3059,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
        fmode_t mode;
        int r;
 
-       r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+       r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
        if (r < 0)
                return r;
 
@@ -3064,7 +3069,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
        else
                r = -EOPNOTSUPP;
 
-       bdput(bdev);
+       blkdev_put(bdev, mode);
        return r;
 }
 
@@ -3075,7 +3080,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
        fmode_t mode;
        int r;
 
-       r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+       r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
        if (r < 0)
                return r;
 
@@ -3085,7 +3090,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
        else
                r = -EOPNOTSUPP;
 
-       bdput(bdev);
+       blkdev_put(bdev, mode);
        return r;
 }
 
index e40065bdbfc84ef8455dbcc88d6a3208a6780571..0a7e99d62c69048d7000d3dc6bbcaa3320c742b5 100644 (file)
@@ -157,7 +157,7 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
                seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
        }
        rcu_read_unlock();
-       seq_printf (seq, "]");
+       seq_putc(seq, ']');
 }
 
 static int multipath_congested(struct mddev *mddev, int bits)
index bc67ab6844f02d540cf4ae3725ebdad13ee568b8..254e44e44668f5fff8cc2d95bdb3b682450a204f 100644 (file)
@@ -801,6 +801,9 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
        struct bio *bio;
        int ff = 0;
 
+       if (!page)
+               return;
+
        if (test_bit(Faulty, &rdev->flags))
                return;
 
@@ -5452,6 +5455,7 @@ int md_run(struct mddev *mddev)
         * the only valid external interface is through the md
         * device.
         */
+       mddev->has_superblocks = false;
        rdev_for_each(rdev, mddev) {
                if (test_bit(Faulty, &rdev->flags))
                        continue;
@@ -5465,6 +5469,9 @@ int md_run(struct mddev *mddev)
                                set_disk_ro(mddev->gendisk, 1);
                }
 
+               if (rdev->sb_page)
+                       mddev->has_superblocks = true;
+
                /* perform some consistency tests on the device.
                 * We don't want the data to overlap the metadata,
                 * Internal Bitmap issues have been handled elsewhere.
@@ -5497,8 +5504,10 @@ int md_run(struct mddev *mddev)
        }
        if (mddev->sync_set == NULL) {
                mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-               if (!mddev->sync_set)
-                       return -ENOMEM;
+               if (!mddev->sync_set) {
+                       err = -ENOMEM;
+                       goto abort;
+               }
        }
 
        spin_lock(&pers_lock);
@@ -5511,7 +5520,8 @@ int md_run(struct mddev *mddev)
                else
                        pr_warn("md: personality for level %s is not loaded!\n",
                                mddev->clevel);
-               return -EINVAL;
+               err = -EINVAL;
+               goto abort;
        }
        spin_unlock(&pers_lock);
        if (mddev->level != pers->level) {
@@ -5524,7 +5534,8 @@ int md_run(struct mddev *mddev)
            pers->start_reshape == NULL) {
                /* This personality cannot handle reshaping... */
                module_put(pers->owner);
-               return -EINVAL;
+               err = -EINVAL;
+               goto abort;
        }
 
        if (pers->sync_request) {
@@ -5593,7 +5604,7 @@ int md_run(struct mddev *mddev)
                mddev->private = NULL;
                module_put(pers->owner);
                bitmap_destroy(mddev);
-               return err;
+               goto abort;
        }
        if (mddev->queue) {
                bool nonrot = true;
@@ -5655,6 +5666,18 @@ int md_run(struct mddev *mddev)
        sysfs_notify_dirent_safe(mddev->sysfs_action);
        sysfs_notify(&mddev->kobj, NULL, "degraded");
        return 0;
+
+abort:
+       if (mddev->bio_set) {
+               bioset_free(mddev->bio_set);
+               mddev->bio_set = NULL;
+       }
+       if (mddev->sync_set) {
+               bioset_free(mddev->sync_set);
+               mddev->sync_set = NULL;
+       }
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(md_run);
 
@@ -8049,6 +8072,7 @@ EXPORT_SYMBOL(md_done_sync);
 bool md_write_start(struct mddev *mddev, struct bio *bi)
 {
        int did_change = 0;
+
        if (bio_data_dir(bi) != WRITE)
                return true;
 
@@ -8081,6 +8105,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
        rcu_read_unlock();
        if (did_change)
                sysfs_notify_dirent_safe(mddev->sysfs_state);
+       if (!mddev->has_superblocks)
+               return true;
        wait_event(mddev->sb_wait,
                   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
                   mddev->suspended);
@@ -8543,6 +8569,19 @@ void md_do_sync(struct md_thread *thread)
        set_mask_bits(&mddev->sb_flags, 0,
                      BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
 
+       if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+                       !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
+                       mddev->delta_disks > 0 &&
+                       mddev->pers->finish_reshape &&
+                       mddev->pers->size &&
+                       mddev->queue) {
+               mddev_lock_nointr(mddev);
+               md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
+               mddev_unlock(mddev);
+               set_capacity(mddev->gendisk, mddev->array_sectors);
+               revalidate_disk(mddev->gendisk);
+       }
+
        spin_lock(&mddev->lock);
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
                /* We completed so min/max setting can be forgotten if used. */
@@ -8569,6 +8608,10 @@ static int remove_and_add_spares(struct mddev *mddev,
        int removed = 0;
        bool remove_some = false;
 
+       if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+               /* Mustn't remove devices when resync thread is running */
+               return 0;
+
        rdev_for_each(rdev, mddev) {
                if ((this == NULL || rdev == this) &&
                    rdev->raid_disk >= 0 &&
index 58cd20a5e85edb1db853661dcd749d99682a796c..fbc925cce8107019dcc4b2aa5310c7d100431a46 100644 (file)
@@ -468,6 +468,8 @@ struct mddev {
        void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
        struct md_cluster_info          *cluster_info;
        unsigned int                    good_device_nr; /* good device num within cluster raid */
+
+       bool    has_superblocks:1;
 };
 
 enum recovery_flags {
index b2eae332e1a29ee585c04ba6d22c2a23b8b99ed3..fe872dc6712ed0c5c00caa60e5f152876f0b1025 100644 (file)
@@ -1108,7 +1108,7 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
 
        bio_copy_data(behind_bio, bio);
 skip_copy:
-       r1_bio->behind_master_bio = behind_bio;;
+       r1_bio->behind_master_bio = behind_bio;
        set_bit(R1BIO_BehindIO, &r1_bio->state);
 
        return;
@@ -1809,6 +1809,17 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                        struct md_rdev *repl =
                                conf->mirrors[conf->raid_disks + number].rdev;
                        freeze_array(conf, 0);
+                       if (atomic_read(&repl->nr_pending)) {
+                               /* It means that some queued IO of retry_list
+                                * hold repl. Thus, we cannot set replacement
+                                * as NULL, avoiding rdev NULL pointer
+                                * dereference in sync_request_write and
+                                * handle_write_finished.
+                                */
+                               err = -EBUSY;
+                               unfreeze_array(conf);
+                               goto abort;
+                       }
                        clear_bit(Replacement, &repl->flags);
                        p->rdev = repl;
                        conf->mirrors[conf->raid_disks + number].rdev = NULL;
index c7294e7557e038bf5248e8c8bf9ce84cb6afad58..eb84bc68e2fd4c31cc996bc4d399183cd852b8f2 100644 (file)
 #define BARRIER_BUCKETS_NR_BITS                (PAGE_SHIFT - ilog2(sizeof(atomic_t)))
 #define BARRIER_BUCKETS_NR             (1<<BARRIER_BUCKETS_NR_BITS)
 
+/* Note: raid1_info.rdev can be set to NULL asynchronously by raid1_remove_disk.
+ * There are three safe ways to access raid1_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery is known to be happening - i.e. in code that is
+ *    called as part of performing resync/recovery.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ *    and if it is non-NULL, increment rdev->nr_pending before dropping the
+ *    RCU lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if it has
+ * been incremented, the pointer is put back in .rdev.
+ */
+
 struct raid1_info {
        struct md_rdev  *rdev;
        sector_t        head_position;
index 99c9207899a777c615880f286a6f617ffc4a70bf..c5e6c60fc0d41b53a578874087ae87259e3ebcce 100644 (file)
@@ -141,7 +141,7 @@ static void r10bio_pool_free(void *r10_bio, void *data)
 #define RESYNC_WINDOW (1024*1024)
 /* maximum number of concurrent requests, memory permitting */
 #define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
-#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
+#define CLUSTER_RESYNC_WINDOW (32 * RESYNC_WINDOW)
 #define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
 
 /*
@@ -2655,7 +2655,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
                for (m = 0; m < conf->copies; m++) {
                        int dev = r10_bio->devs[m].devnum;
                        rdev = conf->mirrors[dev].rdev;
-                       if (r10_bio->devs[m].bio == NULL)
+                       if (r10_bio->devs[m].bio == NULL ||
+                               r10_bio->devs[m].bio->bi_end_io == NULL)
                                continue;
                        if (!r10_bio->devs[m].bio->bi_status) {
                                rdev_clear_badblocks(
@@ -2670,7 +2671,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
                                        md_error(conf->mddev, rdev);
                        }
                        rdev = conf->mirrors[dev].replacement;
-                       if (r10_bio->devs[m].repl_bio == NULL)
+                       if (r10_bio->devs[m].repl_bio == NULL ||
+                               r10_bio->devs[m].repl_bio->bi_end_io == NULL)
                                continue;
 
                        if (!r10_bio->devs[m].repl_bio->bi_status) {
@@ -3782,7 +3784,7 @@ static int raid10_run(struct mddev *mddev)
                if (fc > 1 || fo > 0) {
                        pr_err("only near layout is supported by clustered"
                                " raid10\n");
-                       goto out;
+                       goto out_free_conf;
                }
        }
 
@@ -4830,17 +4832,11 @@ static void raid10_finish_reshape(struct mddev *mddev)
                return;
 
        if (mddev->delta_disks > 0) {
-               sector_t size = raid10_size(mddev, 0, 0);
-               md_set_array_sectors(mddev, size);
                if (mddev->recovery_cp > mddev->resync_max_sectors) {
                        mddev->recovery_cp = mddev->resync_max_sectors;
                        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
                }
-               mddev->resync_max_sectors = size;
-               if (mddev->queue) {
-                       set_capacity(mddev->gendisk, mddev->array_sectors);
-                       revalidate_disk(mddev->gendisk);
-               }
+               mddev->resync_max_sectors = mddev->array_sectors;
        } else {
                int d;
                rcu_read_lock();
index db2ac22ac1b42801a14e5eab8641109a3aa61505..e2e8840de9bfab734e9238bc59ca8e9058c7de0b 100644 (file)
@@ -2,6 +2,19 @@
 #ifndef _RAID10_H
 #define _RAID10_H
 
+/* Note: raid10_info.rdev can be set to NULL asynchronously by
+ * raid10_remove_disk.
+ * There are three safe ways to access raid10_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
+ *    that is called as part of performing resync/recovery/reshape.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ *    and if it is non-NULL, increment rdev->nr_pending before dropping the
+ *    RCU lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if it has
+ * been incremented, the pointer is put back in .rdev.
+ */
+
 struct raid10_info {
        struct md_rdev  *rdev, *replacement;
        sector_t        head_position;
index 0c76bcedfc1cbd8b0af33008652095e3862f22c9..a001808a2b77da16bc2ae3c1d4aa32b5f944a939 100644 (file)
@@ -44,6 +44,7 @@ extern void ppl_write_stripe_run(struct r5conf *conf);
 extern void ppl_stripe_write_finished(struct stripe_head *sh);
 extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
 extern void ppl_quiesce(struct r5conf *conf, int quiesce);
+extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
 
 static inline bool raid5_has_ppl(struct r5conf *conf)
 {
@@ -104,7 +105,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
        if (conf->log)
                ret = r5l_handle_flush_request(conf->log, bio);
        else if (raid5_has_ppl(conf))
-               ret = 0;
+               ret = ppl_handle_flush_request(conf->log, bio);
 
        return ret;
 }
index 2764c2290062862a607dca9145ef8f0e24cb5d89..42890a08375bc73b6bcdba7961e9c88062dded5d 100644 (file)
@@ -693,6 +693,16 @@ void ppl_quiesce(struct r5conf *conf, int quiesce)
        }
 }
 
+int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio)
+{
+       if (bio->bi_iter.bi_size == 0) {
+               bio_endio(bio);
+               return 0;
+       }
+       bio->bi_opf &= ~REQ_PREFLUSH;
+       return -EAGAIN;
+}
+
 void ppl_stripe_write_finished(struct stripe_head *sh)
 {
        struct ppl_io_unit *io;
index 50d01144b80535e2e937c16021cdeeba632b201e..b5d2601483e34fa97fc8cfb7faeff6f014c0e035 100644 (file)
@@ -2196,15 +2196,16 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 static int grow_stripes(struct r5conf *conf, int num)
 {
        struct kmem_cache *sc;
+       size_t namelen = sizeof(conf->cache_name[0]);
        int devs = max(conf->raid_disks, conf->previous_raid_disks);
 
        if (conf->mddev->gendisk)
-               sprintf(conf->cache_name[0],
+               snprintf(conf->cache_name[0], namelen,
                        "raid%d-%s", conf->level, mdname(conf->mddev));
        else
-               sprintf(conf->cache_name[0],
+               snprintf(conf->cache_name[0], namelen,
                        "raid%d-%p", conf->level, conf->mddev);
-       sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
+       snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
 
        conf->active_name = 0;
        sc = kmem_cache_create(conf->cache_name[conf->active_name],
@@ -6764,9 +6765,7 @@ static void free_conf(struct r5conf *conf)
 
        log_exit(conf);
 
-       if (conf->shrinker.nr_deferred)
-               unregister_shrinker(&conf->shrinker);
-
+       unregister_shrinker(&conf->shrinker);
        free_thread_groups(conf);
        shrink_stripes(conf);
        raid5_free_percpu(conf);
@@ -8001,13 +8000,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
 
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 
-               if (mddev->delta_disks > 0) {
-                       md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
-                       if (mddev->queue) {
-                               set_capacity(mddev->gendisk, mddev->array_sectors);
-                               revalidate_disk(mddev->gendisk);
-                       }
-               } else {
+               if (mddev->delta_disks <= 0) {
                        int d;
                        spin_lock_irq(&conf->device_lock);
                        mddev->degraded = raid5_calc_degraded(conf);
index 2e6123825095296555e8a73a52d63872f2fa2bff..3f8da26032accce9fdf492b58fc120a94ac31582 100644 (file)
@@ -450,6 +450,18 @@ enum {
  * HANDLE gets cleared if stripe_handle leaves nothing locked.
  */
 
+/* Note: disk_info.rdev can be set to NULL asynchronously by raid5_remove_disk.
+ * There are three safe ways to access disk_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery/reshape is known to be happening - i.e. in code that
+ *    is called as part of performing resync/recovery/reshape.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ *    and if it is non-NULL, increment rdev->nr_pending before dropping the RCU
+ *    lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if
+ * it has been incremented, the pointer is put back in .rdev.
+ */
+
 struct disk_info {
        struct md_rdev  *rdev, *replacement;
        struct page     *extra_page; /* extra page to use in prexor */
index 145e12bfb8190ab8274645c43493b9978fff6e1a..372c074bb1b90fa0d1010d697faa18a7005231d7 100644 (file)
@@ -147,6 +147,8 @@ config DVB_CORE
 config DVB_MMAP
        bool "Enable DVB memory-mapped API (EXPERIMENTAL)"
        depends on DVB_CORE
+       depends on VIDEO_V4L2=y || VIDEO_V4L2=DVB_CORE
+       select VIDEOBUF2_VMALLOC
        default n
        help
          This option enables DVB experimental memory-mapped API, with
index 5df05250de947d7cf2bc396dc5104851f52fcb36..17c32ea58395d78f9fafe5525bbfbf1bdfe4b690 100644 (file)
@@ -3,6 +3,9 @@ config VIDEOBUF2_CORE
        select DMA_SHARED_BUFFER
        tristate
 
+config VIDEOBUF2_V4L2
+       tristate
+
 config VIDEOBUF2_MEMOPS
        tristate
        select FRAME_VECTOR
index 19de5ccda20b3c9de62a616077cfd11c6d743c58..77bebe8b202f46695ef239caaec98a567e088a03 100644 (file)
@@ -1,5 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+videobuf2-common-objs := videobuf2-core.o
 
-obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-core.o videobuf2-v4l2.o
+ifeq ($(CONFIG_TRACEPOINTS),y)
+  videobuf2-common-objs += vb2-trace.o
+endif
+
+obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-common.o
+obj-$(CONFIG_VIDEOBUF2_V4L2) += videobuf2-v4l2.o
 obj-$(CONFIG_VIDEOBUF2_MEMOPS) += videobuf2-memops.o
 obj-$(CONFIG_VIDEOBUF2_VMALLOC) += videobuf2-vmalloc.o
 obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o
index 3a105d82019a0288b37f7a7143b1e8c79adffb42..62b028ded9f784438175969610608e60f35a9353 100644 (file)
@@ -4,7 +4,7 @@
 #
 
 dvb-net-$(CONFIG_DVB_NET) := dvb_net.o
-dvb-vb2-$(CONFIG_DVB_MMSP) := dvb_vb2.o
+dvb-vb2-$(CONFIG_DVB_MMAP) := dvb_vb2.o
 
 dvb-core-objs := dvbdev.o dmxdev.o dvb_demux.o                 \
                 dvb_ca_en50221.o dvb_frontend.o                \
index 6d53af00190e396fda03bab661b56d4e4a1fc4a2..61a750fae4653be8da2ee47e60ae94e36e2143e6 100644 (file)
@@ -128,11 +128,7 @@ static int dvb_dvr_open(struct inode *inode, struct file *file)
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
        struct dmx_frontend *front;
-#ifndef DVB_MMAP
        bool need_ringbuffer = false;
-#else
-       const bool need_ringbuffer = true;
-#endif
 
        dprintk("%s\n", __func__);
 
@@ -144,17 +140,31 @@ static int dvb_dvr_open(struct inode *inode, struct file *file)
                return -ENODEV;
        }
 
-#ifndef DVB_MMAP
+       dmxdev->may_do_mmap = 0;
+
+       /*
+        * The logic here is a little tricky due to the ifdef.
+        *
+        * The ringbuffer is used for both read and mmap.
+        *
+        * It is not needed, however, on two situations:
+        *      - Write devices (access with O_WRONLY);
+        *      - For duplex device nodes, opened with O_RDWR.
+        */
+
        if ((file->f_flags & O_ACCMODE) == O_RDONLY)
                need_ringbuffer = true;
-#else
-       if ((file->f_flags & O_ACCMODE) == O_RDWR) {
+       else if ((file->f_flags & O_ACCMODE) == O_RDWR) {
                if (!(dmxdev->capabilities & DMXDEV_CAP_DUPLEX)) {
+#ifdef CONFIG_DVB_MMAP
+                       dmxdev->may_do_mmap = 1;
+                       need_ringbuffer = true;
+#else
                        mutex_unlock(&dmxdev->mutex);
                        return -EOPNOTSUPP;
+#endif
                }
        }
-#endif
 
        if (need_ringbuffer) {
                void *mem;
@@ -169,8 +179,9 @@ static int dvb_dvr_open(struct inode *inode, struct file *file)
                        return -ENOMEM;
                }
                dvb_ringbuffer_init(&dmxdev->dvr_buffer, mem, DVR_BUFFER_SIZE);
-               dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr",
-                            file->f_flags & O_NONBLOCK);
+               if (dmxdev->may_do_mmap)
+                       dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr",
+                                    file->f_flags & O_NONBLOCK);
                dvbdev->readers--;
        }
 
@@ -200,11 +211,6 @@ static int dvb_dvr_release(struct inode *inode, struct file *file)
 {
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
-#ifndef DVB_MMAP
-       bool need_ringbuffer = false;
-#else
-       const bool need_ringbuffer = true;
-#endif
 
        mutex_lock(&dmxdev->mutex);
 
@@ -213,15 +219,14 @@ static int dvb_dvr_release(struct inode *inode, struct file *file)
                dmxdev->demux->connect_frontend(dmxdev->demux,
                                                dmxdev->dvr_orig_fe);
        }
-#ifndef DVB_MMAP
-       if ((file->f_flags & O_ACCMODE) == O_RDONLY)
-               need_ringbuffer = true;
-#endif
 
-       if (need_ringbuffer) {
-               if (dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx))
-                       dvb_vb2_stream_off(&dmxdev->dvr_vb2_ctx);
-               dvb_vb2_release(&dmxdev->dvr_vb2_ctx);
+       if (((file->f_flags & O_ACCMODE) == O_RDONLY) ||
+           dmxdev->may_do_mmap) {
+               if (dmxdev->may_do_mmap) {
+                       if (dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx))
+                               dvb_vb2_stream_off(&dmxdev->dvr_vb2_ctx);
+                       dvb_vb2_release(&dmxdev->dvr_vb2_ctx);
+               }
                dvbdev->readers++;
                if (dmxdev->dvr_buffer.data) {
                        void *mem = dmxdev->dvr_buffer.data;
@@ -380,7 +385,8 @@ static void dvb_dmxdev_filter_timer(struct dmxdev_filter *dmxdevfilter)
 
 static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len,
                                       const u8 *buffer2, size_t buffer2_len,
-                                      struct dmx_section_filter *filter)
+                                      struct dmx_section_filter *filter,
+                                      u32 *buffer_flags)
 {
        struct dmxdev_filter *dmxdevfilter = filter->priv;
        int ret;
@@ -399,10 +405,12 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len,
        dprintk("section callback %*ph\n", 6, buffer1);
        if (dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx)) {
                ret = dvb_vb2_fill_buffer(&dmxdevfilter->vb2_ctx,
-                                         buffer1, buffer1_len);
+                                         buffer1, buffer1_len,
+                                         buffer_flags);
                if (ret == buffer1_len)
                        ret = dvb_vb2_fill_buffer(&dmxdevfilter->vb2_ctx,
-                                                 buffer2, buffer2_len);
+                                                 buffer2, buffer2_len,
+                                                 buffer_flags);
        } else {
                ret = dvb_dmxdev_buffer_write(&dmxdevfilter->buffer,
                                              buffer1, buffer1_len);
@@ -422,11 +430,12 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len,
 
 static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
                                  const u8 *buffer2, size_t buffer2_len,
-                                 struct dmx_ts_feed *feed)
+                                 struct dmx_ts_feed *feed,
+                                 u32 *buffer_flags)
 {
        struct dmxdev_filter *dmxdevfilter = feed->priv;
        struct dvb_ringbuffer *buffer;
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
        struct dvb_vb2_ctx *ctx;
 #endif
        int ret;
@@ -440,20 +449,22 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
        if (dmxdevfilter->params.pes.output == DMX_OUT_TAP ||
            dmxdevfilter->params.pes.output == DMX_OUT_TSDEMUX_TAP) {
                buffer = &dmxdevfilter->buffer;
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
                ctx = &dmxdevfilter->vb2_ctx;
 #endif
        } else {
                buffer = &dmxdevfilter->dev->dvr_buffer;
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
                ctx = &dmxdevfilter->dev->dvr_vb2_ctx;
 #endif
        }
 
        if (dvb_vb2_is_streaming(ctx)) {
-               ret = dvb_vb2_fill_buffer(ctx, buffer1, buffer1_len);
+               ret = dvb_vb2_fill_buffer(ctx, buffer1, buffer1_len,
+                                         buffer_flags);
                if (ret == buffer1_len)
-                       ret = dvb_vb2_fill_buffer(ctx, buffer2, buffer2_len);
+                       ret = dvb_vb2_fill_buffer(ctx, buffer2, buffer2_len,
+                                                 buffer_flags);
        } else {
                if (buffer->error) {
                        spin_unlock(&dmxdevfilter->dev->lock);
@@ -802,6 +813,12 @@ static int dvb_demux_open(struct inode *inode, struct file *file)
        mutex_init(&dmxdevfilter->mutex);
        file->private_data = dmxdevfilter;
 
+#ifdef CONFIG_DVB_MMAP
+       dmxdev->may_do_mmap = 1;
+#else
+       dmxdev->may_do_mmap = 0;
+#endif
+
        dvb_ringbuffer_init(&dmxdevfilter->buffer, NULL, 8192);
        dvb_vb2_init(&dmxdevfilter->vb2_ctx, "demux_filter",
                     file->f_flags & O_NONBLOCK);
@@ -1111,7 +1128,7 @@ static int dvb_demux_do_ioctl(struct file *file,
                mutex_unlock(&dmxdevfilter->mutex);
                break;
 
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
        case DMX_REQBUFS:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
@@ -1160,7 +1177,7 @@ static int dvb_demux_do_ioctl(struct file *file,
                break;
 #endif
        default:
-               ret = -EINVAL;
+               ret = -ENOTTY;
                break;
        }
        mutex_unlock(&dmxdev->mutex);
@@ -1199,13 +1216,16 @@ static __poll_t dvb_demux_poll(struct file *file, poll_table *wait)
        return mask;
 }
 
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 static int dvb_demux_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct dmxdev_filter *dmxdevfilter = file->private_data;
        struct dmxdev *dmxdev = dmxdevfilter->dev;
        int ret;
 
+       if (!dmxdev->may_do_mmap)
+               return -ENOTTY;
+
        if (mutex_lock_interruptible(&dmxdev->mutex))
                return -ERESTARTSYS;
 
@@ -1249,7 +1269,7 @@ static const struct file_operations dvb_demux_fops = {
        .release = dvb_demux_release,
        .poll = dvb_demux_poll,
        .llseek = default_llseek,
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
        .mmap = dvb_demux_mmap,
 #endif
 };
@@ -1280,7 +1300,7 @@ static int dvb_dvr_do_ioctl(struct file *file,
                ret = dvb_dvr_set_buffer_size(dmxdev, arg);
                break;
 
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
        case DMX_REQBUFS:
                ret = dvb_vb2_reqbufs(&dmxdev->dvr_vb2_ctx, parg);
                break;
@@ -1304,7 +1324,7 @@ static int dvb_dvr_do_ioctl(struct file *file,
                break;
 #endif
        default:
-               ret = -EINVAL;
+               ret = -ENOTTY;
                break;
        }
        mutex_unlock(&dmxdev->mutex);
@@ -1322,11 +1342,6 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait)
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
        __poll_t mask = 0;
-#ifndef DVB_MMAP
-       bool need_ringbuffer = false;
-#else
-       const bool need_ringbuffer = true;
-#endif
 
        dprintk("%s\n", __func__);
 
@@ -1337,11 +1352,8 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait)
 
        poll_wait(file, &dmxdev->dvr_buffer.queue, wait);
 
-#ifndef DVB_MMAP
-       if ((file->f_flags & O_ACCMODE) == O_RDONLY)
-               need_ringbuffer = true;
-#endif
-       if (need_ringbuffer) {
+       if (((file->f_flags & O_ACCMODE) == O_RDONLY) ||
+           dmxdev->may_do_mmap) {
                if (dmxdev->dvr_buffer.error)
                        mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI | EPOLLERR);
 
@@ -1353,13 +1365,16 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait)
        return mask;
 }
 
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
 static int dvb_dvr_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
        int ret;
 
+       if (!dmxdev->may_do_mmap)
+               return -ENOTTY;
+
        if (dmxdev->exit)
                return -ENODEV;
 
@@ -1381,7 +1396,7 @@ static const struct file_operations dvb_dvr_fops = {
        .release = dvb_dvr_release,
        .poll = dvb_dvr_poll,
        .llseek = default_llseek,
-#ifdef DVB_MMAP
+#ifdef CONFIG_DVB_MMAP
        .mmap = dvb_dvr_mmap,
 #endif
 };
index 210eed0269b085d5b8c99d5f8941928cf0613a30..f45091246bdca90d870dc0a10bd35e8fd4d73eea 100644 (file)
@@ -55,6 +55,17 @@ MODULE_PARM_DESC(dvb_demux_feed_err_pkts,
                dprintk(x);                             \
 } while (0)
 
+#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
+#  define dprintk_sect_loss(x...) dprintk(x)
+#else
+#  define dprintk_sect_loss(x...)
+#endif
+
+#define set_buf_flags(__feed, __flag)                  \
+       do {                                            \
+               (__feed)->buffer_flags |= (__flag);     \
+       } while (0)
+
 /******************************************************************************
  * static inlined helper functions
  ******************************************************************************/
@@ -104,31 +115,30 @@ static inline int dvb_dmx_swfilter_payload(struct dvb_demux_feed *feed,
 {
        int count = payload(buf);
        int p;
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
        int ccok;
        u8 cc;
-#endif
 
        if (count == 0)
                return -1;
 
        p = 188 - count;
 
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
        cc = buf[3] & 0x0f;
        ccok = ((feed->cc + 1) & 0x0f) == cc;
        feed->cc = cc;
-       if (!ccok)
-               dprintk("missed packet: %d instead of %d!\n",
-                       cc, (feed->cc + 1) & 0x0f);
-#endif
+       if (!ccok) {
+               set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+               dprintk_sect_loss("missed packet: %d instead of %d!\n",
+                                 cc, (feed->cc + 1) & 0x0f);
+       }
 
        if (buf[1] & 0x40)      // PUSI ?
                feed->peslen = 0xfffa;
 
        feed->peslen += count;
 
-       return feed->cb.ts(&buf[p], count, NULL, 0, &feed->feed.ts);
+       return feed->cb.ts(&buf[p], count, NULL, 0, &feed->feed.ts,
+                          &feed->buffer_flags);
 }
 
 static int dvb_dmx_swfilter_sectionfilter(struct dvb_demux_feed *feed,
@@ -150,7 +160,7 @@ static int dvb_dmx_swfilter_sectionfilter(struct dvb_demux_feed *feed,
                return 0;
 
        return feed->cb.sec(feed->feed.sec.secbuf, feed->feed.sec.seclen,
-                           NULL, 0, &f->filter);
+                           NULL, 0, &f->filter, &feed->buffer_flags);
 }
 
 static inline int dvb_dmx_swfilter_section_feed(struct dvb_demux_feed *feed)
@@ -169,8 +179,10 @@ static inline int dvb_dmx_swfilter_section_feed(struct dvb_demux_feed *feed)
        if (sec->check_crc) {
                section_syntax_indicator = ((sec->secbuf[1] & 0x80) != 0);
                if (section_syntax_indicator &&
-                   demux->check_crc32(feed, sec->secbuf, sec->seclen))
+                   demux->check_crc32(feed, sec->secbuf, sec->seclen)) {
+                       set_buf_flags(feed, DMX_BUFFER_FLAG_HAD_CRC32_DISCARD);
                        return -1;
+               }
        }
 
        do {
@@ -187,7 +199,6 @@ static void dvb_dmx_swfilter_section_new(struct dvb_demux_feed *feed)
 {
        struct dmx_section_feed *sec = &feed->feed.sec;
 
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
        if (sec->secbufp < sec->tsfeedp) {
                int n = sec->tsfeedp - sec->secbufp;
 
@@ -197,12 +208,13 @@ static void dvb_dmx_swfilter_section_new(struct dvb_demux_feed *feed)
                 * but just first and last.
                 */
                if (sec->secbuf[0] != 0xff || sec->secbuf[n - 1] != 0xff) {
-                       dprintk("section ts padding loss: %d/%d\n",
-                              n, sec->tsfeedp);
-                       dprintk("pad data: %*ph\n", n, sec->secbuf);
+                       set_buf_flags(feed,
+                                     DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+                       dprintk_sect_loss("section ts padding loss: %d/%d\n",
+                                         n, sec->tsfeedp);
+                       dprintk_sect_loss("pad data: %*ph\n", n, sec->secbuf);
                }
        }
-#endif
 
        sec->tsfeedp = sec->secbufp = sec->seclen = 0;
        sec->secbuf = sec->secbuf_base;
@@ -237,11 +249,10 @@ static int dvb_dmx_swfilter_section_copy_dump(struct dvb_demux_feed *feed,
                return 0;
 
        if (sec->tsfeedp + len > DMX_MAX_SECFEED_SIZE) {
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
-               dprintk("section buffer full loss: %d/%d\n",
-                       sec->tsfeedp + len - DMX_MAX_SECFEED_SIZE,
-                       DMX_MAX_SECFEED_SIZE);
-#endif
+               set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+               dprintk_sect_loss("section buffer full loss: %d/%d\n",
+                                 sec->tsfeedp + len - DMX_MAX_SECFEED_SIZE,
+                                 DMX_MAX_SECFEED_SIZE);
                len = DMX_MAX_SECFEED_SIZE - sec->tsfeedp;
        }
 
@@ -269,12 +280,13 @@ static int dvb_dmx_swfilter_section_copy_dump(struct dvb_demux_feed *feed,
                sec->seclen = seclen;
                sec->crc_val = ~0;
                /* dump [secbuf .. secbuf+seclen) */
-               if (feed->pusi_seen)
+               if (feed->pusi_seen) {
                        dvb_dmx_swfilter_section_feed(feed);
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
-               else
-                       dprintk("pusi not seen, discarding section data\n");
-#endif
+               } else {
+                       set_buf_flags(feed,
+                                     DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+                       dprintk_sect_loss("pusi not seen, discarding section data\n");
+               }
                sec->secbufp += seclen; /* secbufp and secbuf moving together is */
                sec->secbuf += seclen;  /* redundant but saves pointer arithmetic */
        }
@@ -307,18 +319,22 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
        }
 
        if (!ccok || dc_i) {
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
-               if (dc_i)
-                       dprintk("%d frame with disconnect indicator\n",
+               if (dc_i) {
+                       set_buf_flags(feed,
+                                     DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR);
+                       dprintk_sect_loss("%d frame with disconnect indicator\n",
                                cc);
-               else
-                       dprintk("discontinuity: %d instead of %d. %d bytes lost\n",
+               } else {
+                       set_buf_flags(feed,
+                                     DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+                       dprintk_sect_loss("discontinuity: %d instead of %d. %d bytes lost\n",
                                cc, (feed->cc + 1) & 0x0f, count + 4);
+               }
                /*
-                * those bytes under sume circumstances will again be reported
+                * those bytes under some circumstances will again be reported
                 * in the following dvb_dmx_swfilter_section_new
                 */
-#endif
+
                /*
                 * Discontinuity detected. Reset pusi_seen to
                 * stop feeding of suspicious data until next PUSI=1 arrives
@@ -326,6 +342,7 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
                 * FIXME: does it make sense if the MPEG-TS is the one
                 *      reporting discontinuity?
                 */
+
                feed->pusi_seen = false;
                dvb_dmx_swfilter_section_new(feed);
        }
@@ -345,11 +362,11 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
                        dvb_dmx_swfilter_section_new(feed);
                        dvb_dmx_swfilter_section_copy_dump(feed, after,
                                                           after_len);
+               } else if (count > 0) {
+                       set_buf_flags(feed,
+                                     DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
+                       dprintk_sect_loss("PUSI=1 but %d bytes lost\n", count);
                }
-#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
-               else if (count > 0)
-                       dprintk("PUSI=1 but %d bytes lost\n", count);
-#endif
        } else {
                /* PUSI=0 (is not set), no section boundary */
                dvb_dmx_swfilter_section_copy_dump(feed, &buf[p], count);
@@ -369,7 +386,8 @@ static inline void dvb_dmx_swfilter_packet_type(struct dvb_demux_feed *feed,
                        if (feed->ts_type & TS_PAYLOAD_ONLY)
                                dvb_dmx_swfilter_payload(feed, buf);
                        else
-                               feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts);
+                               feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts,
+                                           &feed->buffer_flags);
                }
                /* Used only on full-featured devices */
                if (feed->ts_type & TS_DECODER)
@@ -430,6 +448,11 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
        }
 
        if (buf[1] & 0x80) {
+               list_for_each_entry(feed, &demux->feed_list, list_head) {
+                       if ((feed->pid != pid) && (feed->pid != 0x2000))
+                               continue;
+                       set_buf_flags(feed, DMX_BUFFER_FLAG_TEI);
+               }
                dprintk_tscheck("TEI detected. PID=0x%x data1=0x%x\n",
                                pid, buf[1]);
                /* data in this packet can't be trusted - drop it unless
@@ -445,6 +468,13 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
                                                (demux->cnt_storage[pid] + 1) & 0xf;
 
                                if ((buf[3] & 0xf) != demux->cnt_storage[pid]) {
+                                       list_for_each_entry(feed, &demux->feed_list, list_head) {
+                                               if ((feed->pid != pid) && (feed->pid != 0x2000))
+                                                       continue;
+                                               set_buf_flags(feed,
+                                                             DMX_BUFFER_PKT_COUNTER_MISMATCH);
+                                       }
+
                                        dprintk_tscheck("TS packet counter mismatch. PID=0x%x expected 0x%x got 0x%x\n",
                                                        pid, demux->cnt_storage[pid],
                                                        buf[3] & 0xf);
@@ -466,7 +496,8 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
                if (feed->pid == pid)
                        dvb_dmx_swfilter_packet_type(feed, buf);
                else if (feed->pid == 0x2000)
-                       feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts);
+                       feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts,
+                                   &feed->buffer_flags);
        }
 }
 
@@ -585,7 +616,8 @@ void dvb_dmx_swfilter_raw(struct dvb_demux *demux, const u8 *buf, size_t count)
 
        spin_lock_irqsave(&demux->lock, flags);
 
-       demux->feed->cb.ts(buf, count, NULL, 0, &demux->feed->feed.ts);
+       demux->feed->cb.ts(buf, count, NULL, 0, &demux->feed->feed.ts,
+                          &demux->feed->buffer_flags);
 
        spin_unlock_irqrestore(&demux->lock, flags);
 }
@@ -785,6 +817,7 @@ static int dvbdmx_allocate_ts_feed(struct dmx_demux *dmx,
        feed->demux = demux;
        feed->pid = 0xffff;
        feed->peslen = 0xfffa;
+       feed->buffer_flags = 0;
 
        (*ts_feed) = &feed->feed.ts;
        (*ts_feed)->parent = dmx;
@@ -1042,6 +1075,7 @@ static int dvbdmx_allocate_section_feed(struct dmx_demux *demux,
        dvbdmxfeed->cb.sec = callback;
        dvbdmxfeed->demux = dvbdmx;
        dvbdmxfeed->pid = 0xffff;
+       dvbdmxfeed->buffer_flags = 0;
        dvbdmxfeed->feed.sec.secbuf = dvbdmxfeed->feed.sec.secbuf_base;
        dvbdmxfeed->feed.sec.secbufp = dvbdmxfeed->feed.sec.seclen = 0;
        dvbdmxfeed->feed.sec.tsfeedp = 0;
index b6c7eec863b9232b5fb30d07b809a01298d85c22..ba39f9942e1db06f0eeadc6473287f641ff4af85 100644 (file)
@@ -883,7 +883,8 @@ static void dvb_net_ule(struct net_device *dev, const u8 *buf, size_t buf_len)
 
 static int dvb_net_ts_callback(const u8 *buffer1, size_t buffer1_len,
                               const u8 *buffer2, size_t buffer2_len,
-                              struct dmx_ts_feed *feed)
+                              struct dmx_ts_feed *feed,
+                              u32 *buffer_flags)
 {
        struct net_device *dev = feed->priv;
 
@@ -992,7 +993,7 @@ static void dvb_net_sec(struct net_device *dev,
 
 static int dvb_net_sec_callback(const u8 *buffer1, size_t buffer1_len,
                 const u8 *buffer2, size_t buffer2_len,
-                struct dmx_section_filter *filter)
+                struct dmx_section_filter *filter, u32 *buffer_flags)
 {
        struct net_device *dev = filter->priv;
 
index 763145d74e836aba5ae400e20522cce0c84a308d..b811adf88afa3424d14b6584969928d2b5cdae54 100644 (file)
@@ -256,7 +256,8 @@ int dvb_vb2_is_streaming(struct dvb_vb2_ctx *ctx)
 }
 
 int dvb_vb2_fill_buffer(struct dvb_vb2_ctx *ctx,
-                       const unsigned char *src, int len)
+                       const unsigned char *src, int len,
+                       enum dmx_buffer_flags *buffer_flags)
 {
        unsigned long flags = 0;
        void *vbuf = NULL;
@@ -264,15 +265,17 @@ int dvb_vb2_fill_buffer(struct dvb_vb2_ctx *ctx,
        unsigned char *psrc = (unsigned char *)src;
        int ll = 0;
 
-       dprintk(3, "[%s] %d bytes are rcvd\n", ctx->name, len);
-       if (!src) {
-               dprintk(3, "[%s]:NULL pointer src\n", ctx->name);
-               /**normal case: This func is called twice from demux driver
-                * once with valid src pointer, second time with NULL pointer
-                */
+       /*
+        * normal case: This func is called twice from demux driver
+        * one with valid src pointer, second time with NULL pointer
+        */
+       if (!src || !len)
                return 0;
-       }
        spin_lock_irqsave(&ctx->slock, flags);
+       if (buffer_flags && *buffer_flags) {
+               ctx->flags |= *buffer_flags;
+               *buffer_flags = 0;
+       }
        while (todo) {
                if (!ctx->buf) {
                        if (list_empty(&ctx->dvb_q)) {
@@ -395,6 +398,7 @@ int dvb_vb2_qbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b)
 
 int dvb_vb2_dqbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b)
 {
+       unsigned long flags;
        int ret;
 
        ret = vb2_core_dqbuf(&ctx->vb_q, &b->index, b, ctx->nonblocking);
@@ -402,7 +406,16 @@ int dvb_vb2_dqbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b)
                dprintk(1, "[%s] errno=%d\n", ctx->name, ret);
                return ret;
        }
-       dprintk(5, "[%s] index=%d\n", ctx->name, b->index);
+
+       spin_lock_irqsave(&ctx->slock, flags);
+       b->count = ctx->count++;
+       b->flags = ctx->flags;
+       ctx->flags = 0;
+       spin_unlock_irqrestore(&ctx->slock, flags);
+
+       dprintk(5, "[%s] index=%d, count=%d, flags=%d\n",
+               ctx->name, b->index, ctx->count, b->flags);
+
 
        return 0;
 }
index 50bce68ffd6646526fa86e1eaa5c65692b02a7b6..65d157fe76d19dad45ac94fa9eb21cbdad5dd575 100644 (file)
@@ -1262,11 +1262,12 @@ static int m88ds3103_select(struct i2c_mux_core *muxc, u32 chan)
  * New users must use I2C client binding directly!
  */
 struct dvb_frontend *m88ds3103_attach(const struct m88ds3103_config *cfg,
-               struct i2c_adapter *i2c, struct i2c_adapter **tuner_i2c_adapter)
+                                     struct i2c_adapter *i2c,
+                                     struct i2c_adapter **tuner_i2c_adapter)
 {
        struct i2c_client *client;
        struct i2c_board_info board_info;
-       struct m88ds3103_platform_data pdata;
+       struct m88ds3103_platform_data pdata = {};
 
        pdata.clk = cfg->clock;
        pdata.i2c_wr_max = cfg->i2c_wr_max;
@@ -1409,6 +1410,8 @@ static int m88ds3103_probe(struct i2c_client *client,
        case M88DS3103_CHIP_ID:
                break;
        default:
+               ret = -ENODEV;
+               dev_err(&client->dev, "Unknown device. Chip_id=%02x\n", dev->chip_id);
                goto err_kfree;
        }
 
index 3c1851984b907a54b22920dec24e865731006f96..2476d812f669476a286672de807009df8f4980fb 100644 (file)
@@ -505,80 +505,77 @@ static struct i2c_vbi_ram_value vbi_ram_default[] =
        /* FIXME: Current api doesn't handle all VBI types, those not
           yet supported are placed under #if 0 */
 #if 0
-       {0x010, /* Teletext, SECAM, WST System A */
+       [0] = {0x010, /* Teletext, SECAM, WST System A */
                {V4L2_SLICED_TELETEXT_SECAM,6,23,1},
                { 0xaa, 0xaa, 0xff, 0xff, 0xe7, 0x2e, 0x20, 0x26,
                  0xe6, 0xb4, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00 }
        },
 #endif
-       {0x030, /* Teletext, PAL, WST System B */
+       [1] = {0x030, /* Teletext, PAL, WST System B */
                {V4L2_SLICED_TELETEXT_B,6,22,1},
                { 0xaa, 0xaa, 0xff, 0xff, 0x27, 0x2e, 0x20, 0x2b,
                  0xa6, 0x72, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00 }
        },
 #if 0
-       {0x050, /* Teletext, PAL, WST System C */
+       [2] = {0x050, /* Teletext, PAL, WST System C */
                {V4L2_SLICED_TELETEXT_PAL_C,6,22,1},
                { 0xaa, 0xaa, 0xff, 0xff, 0xe7, 0x2e, 0x20, 0x22,
                  0xa6, 0x98, 0x0d, 0x00, 0x00, 0x00, 0x10, 0x00 }
        },
-       {0x070, /* Teletext, NTSC, WST System B */
+       [3] = {0x070, /* Teletext, NTSC, WST System B */
                {V4L2_SLICED_TELETEXT_NTSC_B,10,21,1},
                { 0xaa, 0xaa, 0xff, 0xff, 0x27, 0x2e, 0x20, 0x23,
                  0x69, 0x93, 0x0d, 0x00, 0x00, 0x00, 0x10, 0x00 }
        },
-       {0x090, /* Tetetext, NTSC NABTS System C */
+       [4] = {0x090, /* Tetetext, NTSC NABTS System C */
                {V4L2_SLICED_TELETEXT_NTSC_C,10,21,1},
                { 0xaa, 0xaa, 0xff, 0xff, 0xe7, 0x2e, 0x20, 0x22,
                  0x69, 0x93, 0x0d, 0x00, 0x00, 0x00, 0x15, 0x00 }
        },
-       {0x0b0, /* Teletext, NTSC-J, NABTS System D */
+       [5] = {0x0b0, /* Teletext, NTSC-J, NABTS System D */
                {V4L2_SLICED_TELETEXT_NTSC_D,10,21,1},
                { 0xaa, 0xaa, 0xff, 0xff, 0xa7, 0x2e, 0x20, 0x23,
                  0x69, 0x93, 0x0d, 0x00, 0x00, 0x00, 0x10, 0x00 }
        },
-       {0x0d0, /* Closed Caption, PAL/SECAM */
+       [6] = {0x0d0, /* Closed Caption, PAL/SECAM */
                {V4L2_SLICED_CAPTION_625,22,22,1},
                { 0xaa, 0x2a, 0xff, 0x3f, 0x04, 0x51, 0x6e, 0x02,
                  0xa6, 0x7b, 0x09, 0x00, 0x00, 0x00, 0x27, 0x00 }
        },
 #endif
-       {0x0f0, /* Closed Caption, NTSC */
+       [7] = {0x0f0, /* Closed Caption, NTSC */
                {V4L2_SLICED_CAPTION_525,21,21,1},
                { 0xaa, 0x2a, 0xff, 0x3f, 0x04, 0x51, 0x6e, 0x02,
                  0x69, 0x8c, 0x09, 0x00, 0x00, 0x00, 0x27, 0x00 }
        },
-       {0x110, /* Wide Screen Signal, PAL/SECAM */
+       [8] = {0x110, /* Wide Screen Signal, PAL/SECAM */
                {V4L2_SLICED_WSS_625,23,23,1},
                { 0x5b, 0x55, 0xc5, 0xff, 0x00, 0x71, 0x6e, 0x42,
                  0xa6, 0xcd, 0x0f, 0x00, 0x00, 0x00, 0x3a, 0x00 }
        },
 #if 0
-       {0x130, /* Wide Screen Signal, NTSC C */
+       [9] = {0x130, /* Wide Screen Signal, NTSC C */
                {V4L2_SLICED_WSS_525,20,20,1},
                { 0x38, 0x00, 0x3f, 0x00, 0x00, 0x71, 0x6e, 0x43,
                  0x69, 0x7c, 0x08, 0x00, 0x00, 0x00, 0x39, 0x00 }
        },
-       {0x150, /* Vertical Interval Timecode (VITC), PAL/SECAM */
+       [10] = {0x150, /* Vertical Interval Timecode (VITC), PAL/SECAM */
                {V4l2_SLICED_VITC_625,6,22,0},
                { 0x00, 0x00, 0x00, 0x00, 0x00, 0x8f, 0x6d, 0x49,
                  0xa6, 0x85, 0x08, 0x00, 0x00, 0x00, 0x4c, 0x00 }
        },
-       {0x170, /* Vertical Interval Timecode (VITC), NTSC */
+       [11] = {0x170, /* Vertical Interval Timecode (VITC), NTSC */
                {V4l2_SLICED_VITC_525,10,20,0},
                { 0x00, 0x00, 0x00, 0x00, 0x00, 0x8f, 0x6d, 0x49,
                  0x69, 0x94, 0x08, 0x00, 0x00, 0x00, 0x4c, 0x00 }
        },
 #endif
-       {0x190, /* Video Program System (VPS), PAL */
+       [12] = {0x190, /* Video Program System (VPS), PAL */
                {V4L2_SLICED_VPS,16,16,0},
                { 0xaa, 0xaa, 0xff, 0xff, 0xba, 0xce, 0x2b, 0x0d,
                  0xa6, 0xda, 0x0b, 0x00, 0x00, 0x00, 0x60, 0x00 }
        },
        /* 0x1d0 User programmable */
-
-       /* End of struct */
-       { (u16)-1 }
 };
 
 static int tvp5150_write_inittab(struct v4l2_subdev *sd,
@@ -591,10 +588,10 @@ static int tvp5150_write_inittab(struct v4l2_subdev *sd,
        return 0;
 }
 
-static int tvp5150_vdp_init(struct v4l2_subdev *sd,
-                               const struct i2c_vbi_ram_value *regs)
+static int tvp5150_vdp_init(struct v4l2_subdev *sd)
 {
        unsigned int i;
+       int j;
 
        /* Disable Full Field */
        tvp5150_write(sd, TVP5150_FULL_FIELD_ENA, 0);
@@ -604,14 +601,17 @@ static int tvp5150_vdp_init(struct v4l2_subdev *sd,
                tvp5150_write(sd, i, 0xff);
 
        /* Load Ram Table */
-       while (regs->reg != (u16)-1) {
+       for (j = 0; j < ARRAY_SIZE(vbi_ram_default); j++) {
+               const struct i2c_vbi_ram_value *regs = &vbi_ram_default[j];
+
+               if (!regs->type.vbi_type)
+                       continue;
+
                tvp5150_write(sd, TVP5150_CONF_RAM_ADDR_HIGH, regs->reg >> 8);
                tvp5150_write(sd, TVP5150_CONF_RAM_ADDR_LOW, regs->reg);
 
                for (i = 0; i < 16; i++)
                        tvp5150_write(sd, TVP5150_VDP_CONF_RAM_DATA, regs->values[i]);
-
-               regs++;
        }
        return 0;
 }
@@ -620,19 +620,23 @@ static int tvp5150_vdp_init(struct v4l2_subdev *sd,
 static int tvp5150_g_sliced_vbi_cap(struct v4l2_subdev *sd,
                                struct v4l2_sliced_vbi_cap *cap)
 {
-       const struct i2c_vbi_ram_value *regs = vbi_ram_default;
-       int line;
+       int line, i;
 
        dev_dbg_lvl(sd->dev, 1, debug, "g_sliced_vbi_cap\n");
        memset(cap, 0, sizeof *cap);
 
-       while (regs->reg != (u16)-1 ) {
-               for (line=regs->type.ini_line;line<=regs->type.end_line;line++) {
+       for (i = 0; i < ARRAY_SIZE(vbi_ram_default); i++) {
+               const struct i2c_vbi_ram_value *regs = &vbi_ram_default[i];
+
+               if (!regs->type.vbi_type)
+                       continue;
+
+               for (line = regs->type.ini_line;
+                    line <= regs->type.end_line;
+                    line++) {
                        cap->service_lines[0][line] |= regs->type.vbi_type;
                }
                cap->service_set |= regs->type.vbi_type;
-
-               regs++;
        }
        return 0;
 }
@@ -651,14 +655,13 @@ static int tvp5150_g_sliced_vbi_cap(struct v4l2_subdev *sd,
  *     MSB = field2
  */
 static int tvp5150_set_vbi(struct v4l2_subdev *sd,
-                       const struct i2c_vbi_ram_value *regs,
                        unsigned int type,u8 flags, int line,
                        const int fields)
 {
        struct tvp5150 *decoder = to_tvp5150(sd);
        v4l2_std_id std = decoder->norm;
        u8 reg;
-       int pos = 0;
+       int i, pos = 0;
 
        if (std == V4L2_STD_ALL) {
                dev_err(sd->dev, "VBI can't be configured without knowing number of lines\n");
@@ -671,19 +674,19 @@ static int tvp5150_set_vbi(struct v4l2_subdev *sd,
        if (line < 6 || line > 27)
                return 0;
 
-       while (regs->reg != (u16)-1) {
+       for (i = 0; i < ARRAY_SIZE(vbi_ram_default); i++) {
+               const struct i2c_vbi_ram_value *regs =  &vbi_ram_default[i];
+
+               if (!regs->type.vbi_type)
+                       continue;
+
                if ((type & regs->type.vbi_type) &&
                    (line >= regs->type.ini_line) &&
                    (line <= regs->type.end_line))
                        break;
-
-               regs++;
                pos++;
        }
 
-       if (regs->reg == (u16)-1)
-               return 0;
-
        type = pos | (flags & 0xf0);
        reg = ((line - 6) << 1) + TVP5150_LINE_MODE_INI;
 
@@ -696,8 +699,7 @@ static int tvp5150_set_vbi(struct v4l2_subdev *sd,
        return type;
 }
 
-static int tvp5150_get_vbi(struct v4l2_subdev *sd,
-                       const struct i2c_vbi_ram_value *regs, int line)
+static int tvp5150_get_vbi(struct v4l2_subdev *sd, int line)
 {
        struct tvp5150 *decoder = to_tvp5150(sd);
        v4l2_std_id std = decoder->norm;
@@ -726,8 +728,8 @@ static int tvp5150_get_vbi(struct v4l2_subdev *sd,
                        return 0;
                }
                pos = ret & 0x0f;
-               if (pos < 0x0f)
-                       type |= regs[pos].type.vbi_type;
+               if (pos < ARRAY_SIZE(vbi_ram_default))
+                       type |= vbi_ram_default[pos].type.vbi_type;
        }
 
        return type;
@@ -788,7 +790,7 @@ static int tvp5150_reset(struct v4l2_subdev *sd, u32 val)
        tvp5150_write_inittab(sd, tvp5150_init_default);
 
        /* Initializes VDP registers */
-       tvp5150_vdp_init(sd, vbi_ram_default);
+       tvp5150_vdp_init(sd);
 
        /* Selects decoder input */
        tvp5150_selmux(sd);
@@ -1121,8 +1123,8 @@ static int tvp5150_s_sliced_fmt(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_f
                for (i = 0; i <= 23; i++) {
                        svbi->service_lines[1][i] = 0;
                        svbi->service_lines[0][i] =
-                               tvp5150_set_vbi(sd, vbi_ram_default,
-                                      svbi->service_lines[0][i], 0xf0, i, 3);
+                               tvp5150_set_vbi(sd, svbi->service_lines[0][i],
+                                               0xf0, i, 3);
                }
                /* Enables FIFO */
                tvp5150_write(sd, TVP5150_FIFO_OUT_CTRL, 1);
@@ -1148,7 +1150,7 @@ static int tvp5150_g_sliced_fmt(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_f
 
        for (i = 0; i <= 23; i++) {
                svbi->service_lines[0][i] =
-                       tvp5150_get_vbi(sd, vbi_ram_default, i);
+                       tvp5150_get_vbi(sd, i);
                mask |= svbi->service_lines[0][i];
        }
        svbi->service_set = mask;
index dc8e577b2f748a4d287dacaf817beed1096e903e..d6816effb87866b80eae57d7c45004524d8cc031 100644 (file)
@@ -324,14 +324,15 @@ static int DvbDmxFilterCallback(u8 *buffer1, size_t buffer1_len,
                }
                return dvbdmxfilter->feed->cb.sec(buffer1, buffer1_len,
                                                  buffer2, buffer2_len,
-                                                 &dvbdmxfilter->filter);
+                                                 &dvbdmxfilter->filter, NULL);
        case DMX_TYPE_TS:
                if (!(dvbdmxfilter->feed->ts_type & TS_PACKET))
                        return 0;
                if (dvbdmxfilter->feed->ts_type & TS_PAYLOAD_ONLY)
                        return dvbdmxfilter->feed->cb.ts(buffer1, buffer1_len,
                                                         buffer2, buffer2_len,
-                                                        &dvbdmxfilter->feed->feed.ts);
+                                                        &dvbdmxfilter->feed->feed.ts,
+                                                        NULL);
                else
                        av7110_p2t_write(buffer1, buffer1_len,
                                         dvbdmxfilter->feed->pid,
index 4daba76ec240bc382e3b48e17f3d1cbafaaa6858..ef1bc17cdc4d37a8549b86d7085a8fdfd76abca3 100644 (file)
@@ -99,7 +99,7 @@ int av7110_record_cb(struct dvb_filter_pes2ts *p2t, u8 *buf, size_t len)
                buf[4] = buf[5] = 0;
        if (dvbdmxfeed->ts_type & TS_PAYLOAD_ONLY)
                return dvbdmxfeed->cb.ts(buf, len, NULL, 0,
-                                        &dvbdmxfeed->feed.ts);
+                                        &dvbdmxfeed->feed.ts, NULL);
        else
                return dvb_filter_pes2ts(p2t, buf, len, 1);
 }
@@ -109,7 +109,7 @@ static int dvb_filter_pes2ts_cb(void *priv, unsigned char *data)
        struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *) priv;
 
        dvbdmxfeed->cb.ts(data, 188, NULL, 0,
-                         &dvbdmxfeed->feed.ts);
+                         &dvbdmxfeed->feed.ts, NULL);
        return 0;
 }
 
@@ -814,7 +814,7 @@ static void p_to_t(u8 const *buf, long int length, u16 pid, u8 *counter,
                        memcpy(obuf + l, buf + c, TS_SIZE - l);
                        c = length;
                }
-               feed->cb.ts(obuf, 188, NULL, 0, &feed->feed.ts);
+               feed->cb.ts(obuf, 188, NULL, 0, &feed->feed.ts, NULL);
                pes_start = 0;
        }
 }
index 70521e0b4c5348b6a1ab563e122b557fb58c111b..bfaa806633df740ee72047d5ea81158eedfcb4b0 100644 (file)
@@ -1,7 +1,7 @@
 
 config VIDEO_AU0828
        tristate "Auvitek AU0828 support"
-       depends on I2C && INPUT && DVB_CORE && USB
+       depends on I2C && INPUT && DVB_CORE && USB && VIDEO_V4L2
        select I2C_ALGOBIT
        select VIDEO_TVEEPROM
        select VIDEOBUF2_VMALLOC
index a8900f5571f784014d273eb29058fa04ca08a55c..44ca66cb9b8f141e15d7ca0ccc0b7ae5c2abee60 100644 (file)
@@ -428,7 +428,7 @@ static int ttusb_dec_audio_pes2ts_cb(void *priv, unsigned char *data)
        struct ttusb_dec *dec = priv;
 
        dec->audio_filter->feed->cb.ts(data, 188, NULL, 0,
-                                      &dec->audio_filter->feed->feed.ts);
+                                      &dec->audio_filter->feed->feed.ts, NULL);
 
        return 0;
 }
@@ -438,7 +438,7 @@ static int ttusb_dec_video_pes2ts_cb(void *priv, unsigned char *data)
        struct ttusb_dec *dec = priv;
 
        dec->video_filter->feed->cb.ts(data, 188, NULL, 0,
-                                      &dec->video_filter->feed->feed.ts);
+                                      &dec->video_filter->feed->feed.ts, NULL);
 
        return 0;
 }
@@ -490,7 +490,7 @@ static void ttusb_dec_process_pva(struct ttusb_dec *dec, u8 *pva, int length)
 
                if (output_pva) {
                        dec->video_filter->feed->cb.ts(pva, length, NULL, 0,
-                               &dec->video_filter->feed->feed.ts);
+                               &dec->video_filter->feed->feed.ts, NULL);
                        return;
                }
 
@@ -551,7 +551,7 @@ static void ttusb_dec_process_pva(struct ttusb_dec *dec, u8 *pva, int length)
        case 0x02:              /* MainAudioStream */
                if (output_pva) {
                        dec->audio_filter->feed->cb.ts(pva, length, NULL, 0,
-                               &dec->audio_filter->feed->feed.ts);
+                               &dec->audio_filter->feed->feed.ts, NULL);
                        return;
                }
 
@@ -589,7 +589,7 @@ static void ttusb_dec_process_filter(struct ttusb_dec *dec, u8 *packet,
 
        if (filter)
                filter->feed->cb.sec(&packet[2], length - 2, NULL, 0,
-                                    &filter->filter);
+                                    &filter->filter, NULL);
 }
 
 static void ttusb_dec_process_packet(struct ttusb_dec *dec)
index bf52fbd07aeddc3f228cef870d05e9622699bd03..8e37e7c5e0f7e25aca250b8b052f274f2b8f3c09 100644 (file)
@@ -7,6 +7,7 @@ config VIDEO_V4L2
        tristate
        depends on (I2C || I2C=n) && VIDEO_DEV
        select RATIONAL
+       select VIDEOBUF2_V4L2 if VIDEOBUF2_CORE
        default (I2C || I2C=n) && VIDEO_DEV
 
 config VIDEO_ADV_DEBUG
index 80de2cb9c476a256634815155a4d033a428228af..7df54582e95689ef2c189652dd5c33ea735a8023 100644 (file)
@@ -13,7 +13,7 @@ ifeq ($(CONFIG_COMPAT),y)
 endif
 obj-$(CONFIG_V4L2_FWNODE) += v4l2-fwnode.o
 ifeq ($(CONFIG_TRACEPOINTS),y)
-  videodev-objs += vb2-trace.o v4l2-trace.o
+  videodev-objs += v4l2-trace.o
 endif
 videodev-$(CONFIG_MEDIA_CONTROLLER) += v4l2-mc.o
 
@@ -35,4 +35,3 @@ obj-$(CONFIG_VIDEOBUF_DVB) += videobuf-dvb.o
 
 ccflags-y += -I$(srctree)/drivers/media/dvb-frontends
 ccflags-y += -I$(srctree)/drivers/media/tuners
-
index 0a7bdbed3a6f0185ce35ec49d2e6ccce80592a9c..e9c1485c32b95c66c670e698557632964a946072 100644 (file)
 #define REG_TO_DCPU_MBOX       0x10
 #define REG_TO_HOST_MBOX       0x14
 
+/* Macros to process offsets returned by the DCPU */
+#define DRAM_MSG_ADDR_OFFSET   0x0
+#define DRAM_MSG_TYPE_OFFSET   0x1c
+#define DRAM_MSG_ADDR_MASK     ((1UL << DRAM_MSG_TYPE_OFFSET) - 1)
+#define DRAM_MSG_TYPE_MASK     ((1UL << \
+                                (BITS_PER_LONG - DRAM_MSG_TYPE_OFFSET)) - 1)
+
 /* Message RAM */
-#define DCPU_MSG_RAM(x)                (0x100 + (x) * sizeof(u32))
+#define DCPU_MSG_RAM_START     0x100
+#define DCPU_MSG_RAM(x)                (DCPU_MSG_RAM_START + (x) * sizeof(u32))
 
 /* DRAM Info Offsets & Masks */
 #define DRAM_INFO_INTERVAL     0x0
@@ -255,6 +263,40 @@ static unsigned int get_msg_chksum(const u32 msg[])
        return sum;
 }
 
+static void __iomem *get_msg_ptr(struct private_data *priv, u32 response,
+                                char *buf, ssize_t *size)
+{
+       unsigned int msg_type;
+       unsigned int offset;
+       void __iomem *ptr = NULL;
+
+       msg_type = (response >> DRAM_MSG_TYPE_OFFSET) & DRAM_MSG_TYPE_MASK;
+       offset = (response >> DRAM_MSG_ADDR_OFFSET) & DRAM_MSG_ADDR_MASK;
+
+       /*
+        * msg_type == 1: the offset is relative to the message RAM
+        * msg_type == 0: the offset is relative to the data RAM (this is the
+        *                previous way of passing data)
+        * msg_type is anything else: there's critical hardware problem
+        */
+       switch (msg_type) {
+       case 1:
+               ptr = priv->regs + DCPU_MSG_RAM_START + offset;
+               break;
+       case 0:
+               ptr = priv->dmem + offset;
+               break;
+       default:
+               dev_emerg(priv->dev, "invalid message reply from DCPU: %#x\n",
+                       response);
+               if (buf && size)
+                       *size = sprintf(buf,
+                               "FATAL: communication error with DCPU\n");
+       }
+
+       return ptr;
+}
+
 static int __send_command(struct private_data *priv, unsigned int cmd,
                          u32 result[])
 {
@@ -507,7 +549,7 @@ static ssize_t show_info(struct device *dev, struct device_attribute *devattr,
 {
        u32 response[MSG_FIELD_MAX];
        unsigned int info;
-       int ret;
+       ssize_t ret;
 
        ret = generic_show(DPFE_CMD_GET_INFO, response, dev, buf);
        if (ret)
@@ -528,18 +570,19 @@ static ssize_t show_refresh(struct device *dev,
        u32 response[MSG_FIELD_MAX];
        void __iomem *info;
        struct private_data *priv;
-       unsigned int offset;
        u8 refresh, sr_abort, ppre, thermal_offs, tuf;
        u32 mr4;
-       int ret;
+       ssize_t ret;
 
        ret = generic_show(DPFE_CMD_GET_REFRESH, response, dev, buf);
        if (ret)
                return ret;
 
        priv = dev_get_drvdata(dev);
-       offset = response[MSG_ARG0];
-       info = priv->dmem + offset;
+
+       info = get_msg_ptr(priv, response[MSG_ARG0], buf, &ret);
+       if (!info)
+               return ret;
 
        mr4 = readl_relaxed(info + DRAM_INFO_MR4) & DRAM_INFO_MR4_MASK;
 
@@ -561,7 +604,6 @@ static ssize_t store_refresh(struct device *dev, struct device_attribute *attr,
        u32 response[MSG_FIELD_MAX];
        struct private_data *priv;
        void __iomem *info;
-       unsigned int offset;
        unsigned long val;
        int ret;
 
@@ -574,8 +616,10 @@ static ssize_t store_refresh(struct device *dev, struct device_attribute *attr,
        if (ret)
                return ret;
 
-       offset = response[MSG_ARG0];
-       info = priv->dmem + offset;
+       info = get_msg_ptr(priv, response[MSG_ARG0], NULL, NULL);
+       if (!info)
+               return -EIO;
+
        writel_relaxed(val, info + DRAM_INFO_INTERVAL);
 
        return count;
@@ -587,23 +631,25 @@ static ssize_t show_vendor(struct device *dev, struct device_attribute *devattr,
        u32 response[MSG_FIELD_MAX];
        struct private_data *priv;
        void __iomem *info;
-       unsigned int offset;
-       int ret;
+       ssize_t ret;
 
        ret = generic_show(DPFE_CMD_GET_VENDOR, response, dev, buf);
        if (ret)
                return ret;
 
-       offset = response[MSG_ARG0];
        priv = dev_get_drvdata(dev);
-       info = priv->dmem + offset;
+
+       info = get_msg_ptr(priv, response[MSG_ARG0], buf, &ret);
+       if (!info)
+               return ret;
 
        return sprintf(buf, "%#x %#x %#x %#x %#x\n",
                       readl_relaxed(info + DRAM_VENDOR_MR5) & DRAM_VENDOR_MASK,
                       readl_relaxed(info + DRAM_VENDOR_MR6) & DRAM_VENDOR_MASK,
                       readl_relaxed(info + DRAM_VENDOR_MR7) & DRAM_VENDOR_MASK,
                       readl_relaxed(info + DRAM_VENDOR_MR8) & DRAM_VENDOR_MASK,
-                      readl_relaxed(info + DRAM_VENDOR_ERROR));
+                      readl_relaxed(info + DRAM_VENDOR_ERROR) &
+                                    DRAM_VENDOR_MASK);
 }
 
 static int brcmstb_dpfe_resume(struct platform_device *pdev)
index 2dd2db9bc1c90f771ab19a4feabb167a6c367071..038509e5d031f44e99fee275c59b7b08421c9f29 100644 (file)
@@ -102,10 +102,32 @@ static long afu_ioctl_attach(struct ocxl_context *ctx,
        return rc;
 }
 
+static long afu_ioctl_get_metadata(struct ocxl_context *ctx,
+               struct ocxl_ioctl_metadata __user *uarg)
+{
+       struct ocxl_ioctl_metadata arg;
+
+       memset(&arg, 0, sizeof(arg));
+
+       arg.version = 0;
+
+       arg.afu_version_major = ctx->afu->config.version_major;
+       arg.afu_version_minor = ctx->afu->config.version_minor;
+       arg.pasid = ctx->pasid;
+       arg.pp_mmio_size = ctx->afu->config.pp_mmio_stride;
+       arg.global_mmio_size = ctx->afu->config.global_mmio_size;
+
+       if (copy_to_user(uarg, &arg, sizeof(arg)))
+               return -EFAULT;
+
+       return 0;
+}
+
 #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" :                        \
                        x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" :       \
                        x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" :         \
                        x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" :     \
+                       x == OCXL_IOCTL_GET_METADATA ? "GET_METADATA" : \
                        "UNKNOWN")
 
 static long afu_ioctl(struct file *file, unsigned int cmd,
@@ -133,8 +155,10 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
                if (!rc) {
                        rc = copy_to_user((u64 __user *) args, &irq_offset,
                                        sizeof(irq_offset));
-                       if (rc)
+                       if (rc) {
                                ocxl_afu_irq_free(ctx, irq_offset);
+                               return -EFAULT;
+                       }
                }
                break;
 
@@ -157,6 +181,11 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
                                        irq_fd.eventfd);
                break;
 
+       case OCXL_IOCTL_GET_METADATA:
+               rc = afu_ioctl_get_metadata(ctx,
+                               (struct ocxl_ioctl_metadata __user *) args);
+               break;
+
        default:
                rc = -EINVAL;
        }
@@ -329,7 +358,7 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
 
        used += sizeof(header);
 
-       rc = (ssize_t) used;
+       rc = used;
        return rc;
 }
 
index 20135a5de748846ec9101b5b88860d6da3c3850e..2cfb963d9f379ed5a29c6d357430f7dd3a1a4a79 100644 (file)
@@ -72,6 +72,7 @@ MODULE_ALIAS("mmc:block");
 #define MMC_BLK_TIMEOUT_MS  (10 * 1000)
 #define MMC_SANITIZE_REQ_TIMEOUT 240000
 #define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16)
+#define MMC_EXTRACT_VALUE_FROM_ARG(x) ((x & 0x0000FF00) >> 8)
 
 #define mmc_req_rel_wr(req)    ((req->cmd_flags & REQ_FUA) && \
                                  (rq_data_dir(req) == WRITE))
@@ -586,6 +587,24 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
                return data.error;
        }
 
+       /*
+        * Make sure the cache of the PARTITION_CONFIG register and
+        * PARTITION_ACCESS bits is updated in case the ioctl ext_csd write
+        * changed it successfully.
+        */
+       if ((MMC_EXTRACT_INDEX_FROM_ARG(cmd.arg) == EXT_CSD_PART_CONFIG) &&
+           (cmd.opcode == MMC_SWITCH)) {
+               struct mmc_blk_data *main_md = dev_get_drvdata(&card->dev);
+               u8 value = MMC_EXTRACT_VALUE_FROM_ARG(cmd.arg);
+
+               /*
+                * Update cache so the next mmc_blk_part_switch call operates
+                * on up-to-date data.
+                */
+               card->ext_csd.part_config = value;
+               main_md->part_curr = value & EXT_CSD_PART_CONFIG_ACC_MASK;
+       }
+
        /*
         * According to the SD specs, some commands require a delay after
         * issuing the command.
index 79a5b985ccf5ee8fe5ba06b5aec717f36799794e..9c821eedd1566750ce044507bd986b5cad9a2ec0 100644 (file)
@@ -82,6 +82,7 @@ struct mmc_fixup {
 #define CID_MANFID_APACER       0x27
 #define CID_MANFID_KINGSTON     0x70
 #define CID_MANFID_HYNIX       0x90
+#define CID_MANFID_NUMONYX     0xFE
 
 #define END_FIXUP { NULL }
 
index 908e4db03535b038d548563188c0a11ac1ce4761..42d6aa89a48a95ad82bc60445f73971ac48730d2 100644 (file)
@@ -848,7 +848,6 @@ int mmc_interrupt_hpi(struct mmc_card *card)
                return 1;
        }
 
-       mmc_claim_host(card->host);
        err = mmc_send_status(card, &status);
        if (err) {
                pr_err("%s: Get card status fail\n", mmc_hostname(card->host));
@@ -890,7 +889,6 @@ int mmc_interrupt_hpi(struct mmc_card *card)
        } while (!err);
 
 out:
-       mmc_release_host(card->host);
        return err;
 }
 
@@ -932,9 +930,7 @@ static int mmc_read_bkops_status(struct mmc_card *card)
        int err;
        u8 *ext_csd;
 
-       mmc_claim_host(card->host);
        err = mmc_get_ext_csd(card, &ext_csd);
-       mmc_release_host(card->host);
        if (err)
                return err;
 
index 75d317623852dc9f55586e41a176311a48144e1d..5153577754f02861ceab4689813441f9ac4ea443 100644 (file)
@@ -109,6 +109,12 @@ static const struct mmc_fixup mmc_ext_csd_fixups[] = {
         */
        MMC_FIXUP_EXT_CSD_REV(CID_NAME_ANY, CID_MANFID_HYNIX,
                              0x014a, add_quirk, MMC_QUIRK_BROKEN_HPI, 5),
+       /*
+        * Certain Micron (Numonyx) eMMC 4.5 cards might get broken when HPI
+        * feature is used so disable the HPI feature for such buggy cards.
+        */
+       MMC_FIXUP_EXT_CSD_REV(CID_NAME_ANY, CID_MANFID_NUMONYX,
+                             0x014e, add_quirk, MMC_QUIRK_BROKEN_HPI, 6),
 
        END_FIXUP
 };
index 35026795be2803c7387203c5719f4442c15b7c50..a84aa3f1ae8547c4cdbf24cb05ef7e32dca8d94a 100644 (file)
@@ -165,9 +165,15 @@ static void dw_mci_exynos_set_clksel_timing(struct dw_mci *host, u32 timing)
 static int dw_mci_exynos_runtime_resume(struct device *dev)
 {
        struct dw_mci *host = dev_get_drvdata(dev);
+       int ret;
+
+       ret = dw_mci_runtime_resume(dev);
+       if (ret)
+               return ret;
 
        dw_mci_exynos_config_smu(host);
-       return dw_mci_runtime_resume(dev);
+
+       return ret;
 }
 
 /**
@@ -487,6 +493,7 @@ static unsigned long exynos_dwmmc_caps[4] = {
 
 static const struct dw_mci_drv_data exynos_drv_data = {
        .caps                   = exynos_dwmmc_caps,
+       .num_caps               = ARRAY_SIZE(exynos_dwmmc_caps),
        .init                   = dw_mci_exynos_priv_init,
        .set_ios                = dw_mci_exynos_set_ios,
        .parse_dt               = dw_mci_exynos_parse_dt,
index 73fd75c3c824904d7171a51f16943192f6c1bc03..89cdb3d533bb519f57e3a4c7d6a7570f54f3077c 100644 (file)
@@ -135,6 +135,9 @@ static int dw_mci_hi6220_parse_dt(struct dw_mci *host)
        if (priv->ctrl_id < 0)
                priv->ctrl_id = 0;
 
+       if (priv->ctrl_id >= TIMING_MODE)
+               return -EINVAL;
+
        host->priv = priv;
        return 0;
 }
@@ -207,6 +210,7 @@ static int dw_mci_hi6220_execute_tuning(struct dw_mci_slot *slot, u32 opcode)
 
 static const struct dw_mci_drv_data hi6220_data = {
        .caps                   = dw_mci_hi6220_caps,
+       .num_caps               = ARRAY_SIZE(dw_mci_hi6220_caps),
        .switch_voltage         = dw_mci_hi6220_switch_voltage,
        .set_ios                = dw_mci_hi6220_set_ios,
        .parse_dt               = dw_mci_hi6220_parse_dt,
index a3f1c2b3014534515db2246b0921b1be3a8b9c7d..3392952129355b2752f3131c0ff38a96b4e9d088 100644 (file)
@@ -319,6 +319,7 @@ static const struct dw_mci_drv_data rk2928_drv_data = {
 
 static const struct dw_mci_drv_data rk3288_drv_data = {
        .caps                   = dw_mci_rk3288_dwmmc_caps,
+       .num_caps               = ARRAY_SIZE(dw_mci_rk3288_dwmmc_caps),
        .set_ios                = dw_mci_rk3288_set_ios,
        .execute_tuning         = dw_mci_rk3288_execute_tuning,
        .parse_dt               = dw_mci_rk3288_parse_dt,
index d38e94ae2b855ccdbcd8c74db19950b9b9ea310e..c06b5393312ff185ffa9487931578665c74efd62 100644 (file)
@@ -195,6 +195,7 @@ static unsigned long zx_dwmmc_caps[3] = {
 
 static const struct dw_mci_drv_data zx_drv_data = {
        .caps                   = zx_dwmmc_caps,
+       .num_caps               = ARRAY_SIZE(zx_dwmmc_caps),
        .execute_tuning         = dw_mci_zx_execute_tuning,
        .prepare_hs400_tuning   = dw_mci_zx_prepare_hs400_tuning,
        .parse_dt               = dw_mci_zx_parse_dt,
index 0aa39975f33b8fbf36f0995cb56ffe283c7ca1b2..06d47414d0c19796beb70f130d192b3a4c3e592a 100644 (file)
@@ -165,6 +165,8 @@ static int dw_mci_regs_show(struct seq_file *s, void *v)
 {
        struct dw_mci *host = s->private;
 
+       pm_runtime_get_sync(host->dev);
+
        seq_printf(s, "STATUS:\t0x%08x\n", mci_readl(host, STATUS));
        seq_printf(s, "RINTSTS:\t0x%08x\n", mci_readl(host, RINTSTS));
        seq_printf(s, "CMD:\t0x%08x\n", mci_readl(host, CMD));
@@ -172,6 +174,8 @@ static int dw_mci_regs_show(struct seq_file *s, void *v)
        seq_printf(s, "INTMASK:\t0x%08x\n", mci_readl(host, INTMASK));
        seq_printf(s, "CLKENA:\t0x%08x\n", mci_readl(host, CLKENA));
 
+       pm_runtime_put_autosuspend(host->dev);
+
        return 0;
 }
 
@@ -409,7 +413,9 @@ static inline void dw_mci_set_cto(struct dw_mci *host)
        cto_div = (mci_readl(host, CLKDIV) & 0xff) * 2;
        if (cto_div == 0)
                cto_div = 1;
-       cto_ms = DIV_ROUND_UP(MSEC_PER_SEC * cto_clks * cto_div, host->bus_hz);
+
+       cto_ms = DIV_ROUND_UP_ULL((u64)MSEC_PER_SEC * cto_clks * cto_div,
+                                 host->bus_hz);
 
        /* add a bit spare time */
        cto_ms += 10;
@@ -558,6 +564,7 @@ static int dw_mci_idmac_init(struct dw_mci *host)
                                        (sizeof(struct idmac_desc_64addr) *
                                                        (i + 1))) >> 32;
                        /* Initialize reserved and buffer size fields to "0" */
+                       p->des0 = 0;
                        p->des1 = 0;
                        p->des2 = 0;
                        p->des3 = 0;
@@ -580,6 +587,7 @@ static int dw_mci_idmac_init(struct dw_mci *host)
                     i++, p++) {
                        p->des3 = cpu_to_le32(host->sg_dma +
                                        (sizeof(struct idmac_desc) * (i + 1)));
+                       p->des0 = 0;
                        p->des1 = 0;
                }
 
@@ -1795,8 +1803,8 @@ static bool dw_mci_reset(struct dw_mci *host)
        }
 
        if (host->use_dma == TRANS_MODE_IDMAC)
-               /* It is also recommended that we reset and reprogram idmac */
-               dw_mci_idmac_reset(host);
+               /* It is also required that we reinit idmac */
+               dw_mci_idmac_init(host);
 
        ret = true;
 
@@ -1944,8 +1952,9 @@ static void dw_mci_set_drto(struct dw_mci *host)
        drto_div = (mci_readl(host, CLKDIV) & 0xff) * 2;
        if (drto_div == 0)
                drto_div = 1;
-       drto_ms = DIV_ROUND_UP(MSEC_PER_SEC * drto_clks * drto_div,
-                              host->bus_hz);
+
+       drto_ms = DIV_ROUND_UP_ULL((u64)MSEC_PER_SEC * drto_clks * drto_div,
+                                  host->bus_hz);
 
        /* add a bit spare time */
        drto_ms += 10;
@@ -2778,12 +2787,57 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static int dw_mci_init_slot_caps(struct dw_mci_slot *slot)
+{
+       struct dw_mci *host = slot->host;
+       const struct dw_mci_drv_data *drv_data = host->drv_data;
+       struct mmc_host *mmc = slot->mmc;
+       int ctrl_id;
+
+       if (host->pdata->caps)
+               mmc->caps = host->pdata->caps;
+
+       /*
+        * Support MMC_CAP_ERASE by default.
+        * It needs to use trim/discard/erase commands.
+        */
+       mmc->caps |= MMC_CAP_ERASE;
+
+       if (host->pdata->pm_caps)
+               mmc->pm_caps = host->pdata->pm_caps;
+
+       if (host->dev->of_node) {
+               ctrl_id = of_alias_get_id(host->dev->of_node, "mshc");
+               if (ctrl_id < 0)
+                       ctrl_id = 0;
+       } else {
+               ctrl_id = to_platform_device(host->dev)->id;
+       }
+
+       if (drv_data && drv_data->caps) {
+               if (ctrl_id >= drv_data->num_caps) {
+                       dev_err(host->dev, "invalid controller id %d\n",
+                               ctrl_id);
+                       return -EINVAL;
+               }
+               mmc->caps |= drv_data->caps[ctrl_id];
+       }
+
+       if (host->pdata->caps2)
+               mmc->caps2 = host->pdata->caps2;
+
+       /* Process SDIO IRQs through the sdio_irq_work. */
+       if (mmc->caps & MMC_CAP_SDIO_IRQ)
+               mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
+
+       return 0;
+}
+
 static int dw_mci_init_slot(struct dw_mci *host)
 {
        struct mmc_host *mmc;
        struct dw_mci_slot *slot;
-       const struct dw_mci_drv_data *drv_data = host->drv_data;
-       int ctrl_id, ret;
+       int ret;
        u32 freq[2];
 
        mmc = mmc_alloc_host(sizeof(struct dw_mci_slot), host->dev);
@@ -2817,38 +2871,13 @@ static int dw_mci_init_slot(struct dw_mci *host)
        if (!mmc->ocr_avail)
                mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
-       if (host->pdata->caps)
-               mmc->caps = host->pdata->caps;
-
-       /*
-        * Support MMC_CAP_ERASE by default.
-        * It needs to use trim/discard/erase commands.
-        */
-       mmc->caps |= MMC_CAP_ERASE;
-
-       if (host->pdata->pm_caps)
-               mmc->pm_caps = host->pdata->pm_caps;
-
-       if (host->dev->of_node) {
-               ctrl_id = of_alias_get_id(host->dev->of_node, "mshc");
-               if (ctrl_id < 0)
-                       ctrl_id = 0;
-       } else {
-               ctrl_id = to_platform_device(host->dev)->id;
-       }
-       if (drv_data && drv_data->caps)
-               mmc->caps |= drv_data->caps[ctrl_id];
-
-       if (host->pdata->caps2)
-               mmc->caps2 = host->pdata->caps2;
-
        ret = mmc_of_parse(mmc);
        if (ret)
                goto err_host_allocated;
 
-       /* Process SDIO IRQs through the sdio_irq_work. */
-       if (mmc->caps & MMC_CAP_SDIO_IRQ)
-               mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
+       ret = dw_mci_init_slot_caps(slot);
+       if (ret)
+               goto err_host_allocated;
 
        /* Useful defaults if platform data is unset. */
        if (host->use_dma == TRANS_MODE_IDMAC) {
index e3124f06a47ef52840dc5abb77a7febd104b083d..1424bd490dd1bc1bcbd71dd4b2d803508db337b0 100644 (file)
@@ -543,6 +543,7 @@ struct dw_mci_slot {
 /**
  * dw_mci driver data - dw-mshc implementation specific driver data.
  * @caps: mmc subsystem specified capabilities of the controller(s).
+ * @num_caps: number of capabilities specified by @caps.
  * @init: early implementation specific initialization.
  * @set_ios: handle bus specific extensions.
  * @parse_dt: parse implementation specific device tree properties.
@@ -554,6 +555,7 @@ struct dw_mci_slot {
  */
 struct dw_mci_drv_data {
        unsigned long   *caps;
+       u32             num_caps;
        int             (*init)(struct dw_mci *host);
        void            (*set_ios)(struct dw_mci *host, struct mmc_ios *ios);
        int             (*parse_dt)(struct dw_mci *host);
index 4065da58789d2d091cd918b8cd30c518aac331d7..32321bd596d880027358db10e9eb5f5b45957c1d 100644 (file)
@@ -680,7 +680,7 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
        host->hw_name   = "ACPI";
        host->ops       = &sdhci_acpi_ops_dflt;
        host->irq       = platform_get_irq(pdev, 0);
-       if (host->irq <= 0) {
+       if (host->irq < 0) {
                err = -EINVAL;
                goto err_free;
        }
index 6d1a983e622722b527d8a3c1b6cdea449c88d180..82c4f05f91d8781528a365d97b3033472df987f4 100644 (file)
@@ -654,9 +654,36 @@ static void byt_read_dsm(struct sdhci_pci_slot *slot)
        slot->chip->rpm_retune = intel_host->d3_retune;
 }
 
-static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
+static int intel_execute_tuning(struct mmc_host *mmc, u32 opcode)
+{
+       int err = sdhci_execute_tuning(mmc, opcode);
+       struct sdhci_host *host = mmc_priv(mmc);
+
+       if (err)
+               return err;
+
+       /*
+        * Tuning can leave the IP in an active state (Buffer Read Enable bit
+        * set) which prevents the entry to low power states (i.e. S0i3). Data
+        * reset will clear it.
+        */
+       sdhci_reset(host, SDHCI_RESET_DATA);
+
+       return 0;
+}
+
+static void byt_probe_slot(struct sdhci_pci_slot *slot)
 {
+       struct mmc_host_ops *ops = &slot->host->mmc_host_ops;
+
        byt_read_dsm(slot);
+
+       ops->execute_tuning = intel_execute_tuning;
+}
+
+static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
+{
+       byt_probe_slot(slot);
        slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
                                 MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR |
                                 MMC_CAP_CMD_DURING_TFR |
@@ -779,7 +806,7 @@ static int ni_byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 {
        int err;
 
-       byt_read_dsm(slot);
+       byt_probe_slot(slot);
 
        err = ni_set_max_freq(slot);
        if (err)
@@ -792,7 +819,7 @@ static int ni_byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 
 static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 {
-       byt_read_dsm(slot);
+       byt_probe_slot(slot);
        slot->host->mmc->caps |= MMC_CAP_POWER_OFF_CARD | MMC_CAP_NONREMOVABLE |
                                 MMC_CAP_WAIT_WHILE_BUSY;
        return 0;
@@ -800,7 +827,7 @@ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 
 static int byt_sd_probe_slot(struct sdhci_pci_slot *slot)
 {
-       byt_read_dsm(slot);
+       byt_probe_slot(slot);
        slot->host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY |
                                 MMC_CAP_AGGRESSIVE_PM | MMC_CAP_CD_WAKE;
        slot->cd_idx = 0;
index d88b78a17440382355592296a830a7270349c495..08b85215c2be98782f80acadd9302d85e1489dac 100644 (file)
@@ -149,6 +149,7 @@ config MACVTAP
 config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
+    depends on IPV6 || !IPV6
     depends on NETFILTER
     select NET_L3_MASTER_DEV
     ---help---
index 64333ec999ac61c25517738b1a3104e1662ef09a..3afda6561434666fbb14887f653a9e2bda2aeb0f 100644 (file)
@@ -113,9 +113,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
 #endif
 #ifdef CONFIG_MVME147_NET      /* MVME147 internal Ethernet */
        {mvme147lance_probe, 0},
-#endif
-#ifdef CONFIG_MAC89x0
-       {mac89x0_probe, 0},
 #endif
        {NULL, 0},
 };
index c669554d70bb7c7ba2fe3091ed1c58bd3026229f..4c19d23dd28214d1fcb5c57add35d24d4dd9dac4 100644 (file)
@@ -4791,6 +4791,7 @@ static struct pernet_operations bond_net_ops = {
        .exit = bond_net_exit,
        .id   = &bond_net_id,
        .size = sizeof(struct bond_net),
+       .async = true,
 };
 
 static int __init bonding_init(void)
index 1e37313054f3950ee30e6c6fccad874d9262013a..6da69af103e60d9e26ed30815fc946be56aee224 100644 (file)
@@ -390,37 +390,23 @@ static int cc770_get_berr_counter(const struct net_device *dev,
        return 0;
 }
 
-static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static void cc770_tx(struct net_device *dev, int mo)
 {
        struct cc770_priv *priv = netdev_priv(dev);
-       struct net_device_stats *stats = &dev->stats;
-       struct can_frame *cf = (struct can_frame *)skb->data;
-       unsigned int mo = obj2msgobj(CC770_OBJ_TX);
+       struct can_frame *cf = (struct can_frame *)priv->tx_skb->data;
        u8 dlc, rtr;
        u32 id;
        int i;
 
-       if (can_dropped_invalid_skb(dev, skb))
-               return NETDEV_TX_OK;
-
-       if ((cc770_read_reg(priv,
-                           msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
-               netdev_err(dev, "TX register is still occupied!\n");
-               return NETDEV_TX_BUSY;
-       }
-
-       netif_stop_queue(dev);
-
        dlc = cf->can_dlc;
        id = cf->can_id;
-       if (cf->can_id & CAN_RTR_FLAG)
-               rtr = 0;
-       else
-               rtr = MSGCFG_DIR;
+       rtr = cf->can_id & CAN_RTR_FLAG ? 0 : MSGCFG_DIR;
+
+       cc770_write_reg(priv, msgobj[mo].ctrl0,
+                       MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
        cc770_write_reg(priv, msgobj[mo].ctrl1,
                        RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES);
-       cc770_write_reg(priv, msgobj[mo].ctrl0,
-                       MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES);
+
        if (id & CAN_EFF_FLAG) {
                id &= CAN_EFF_MASK;
                cc770_write_reg(priv, msgobj[mo].config,
@@ -439,22 +425,30 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev)
        for (i = 0; i < dlc; i++)
                cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]);
 
-       /* Store echo skb before starting the transfer */
-       can_put_echo_skb(skb, dev, 0);
-
        cc770_write_reg(priv, msgobj[mo].ctrl1,
-                       RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
+                       RMTPND_UNC | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
+       cc770_write_reg(priv, msgobj[mo].ctrl0,
+                       MSGVAL_SET | TXIE_SET | RXIE_SET | INTPND_UNC);
+}
 
-       stats->tx_bytes += dlc;
+static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct cc770_priv *priv = netdev_priv(dev);
+       unsigned int mo = obj2msgobj(CC770_OBJ_TX);
 
+       if (can_dropped_invalid_skb(dev, skb))
+               return NETDEV_TX_OK;
 
-       /*
-        * HM: We had some cases of repeated IRQs so make sure the
-        * INT is acknowledged I know it's already further up, but
-        * doing again fixed the issue
-        */
-       cc770_write_reg(priv, msgobj[mo].ctrl0,
-                       MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES);
+       netif_stop_queue(dev);
+
+       if ((cc770_read_reg(priv,
+                           msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
+               netdev_err(dev, "TX register is still occupied!\n");
+               return NETDEV_TX_BUSY;
+       }
+
+       priv->tx_skb = skb;
+       cc770_tx(dev, mo);
 
        return NETDEV_TX_OK;
 }
@@ -680,19 +674,46 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o)
        struct cc770_priv *priv = netdev_priv(dev);
        struct net_device_stats *stats = &dev->stats;
        unsigned int mo = obj2msgobj(o);
+       struct can_frame *cf;
+       u8 ctrl1;
+
+       ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1);
 
-       /* Nothing more to send, switch off interrupts */
        cc770_write_reg(priv, msgobj[mo].ctrl0,
                        MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
-       /*
-        * We had some cases of repeated IRQ so make sure the
-        * INT is acknowledged
+       cc770_write_reg(priv, msgobj[mo].ctrl1,
+                       RMTPND_RES | TXRQST_RES | MSGLST_RES | NEWDAT_RES);
+
+       if (unlikely(!priv->tx_skb)) {
+               netdev_err(dev, "missing tx skb in tx interrupt\n");
+               return;
+       }
+
+       if (unlikely(ctrl1 & MSGLST_SET)) {
+               stats->rx_over_errors++;
+               stats->rx_errors++;
+       }
+
+       /* When the CC770 is sending an RTR message and it receives a regular
+        * message that matches the id of the RTR message, it will overwrite the
+        * outgoing message in the TX register. When this happens we must
+        * process the received message and try to transmit the outgoing skb
+        * again.
         */
-       cc770_write_reg(priv, msgobj[mo].ctrl0,
-                       MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES);
+       if (unlikely(ctrl1 & NEWDAT_SET)) {
+               cc770_rx(dev, mo, ctrl1);
+               cc770_tx(dev, mo);
+               return;
+       }
 
+       cf = (struct can_frame *)priv->tx_skb->data;
+       stats->tx_bytes += cf->can_dlc;
        stats->tx_packets++;
+
+       can_put_echo_skb(priv->tx_skb, dev, 0);
        can_get_echo_skb(dev, 0);
+       priv->tx_skb = NULL;
+
        netif_wake_queue(dev);
 }
 
@@ -804,6 +825,7 @@ struct net_device *alloc_cc770dev(int sizeof_priv)
        priv->can.do_set_bittiming = cc770_set_bittiming;
        priv->can.do_set_mode = cc770_set_mode;
        priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
+       priv->tx_skb = NULL;
 
        memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags));
 
index a1739db98d911f006f82a44682f0be1b8694a01c..95752e1d128397260968ee100b0b3035923547c9 100644 (file)
@@ -193,6 +193,8 @@ struct cc770_priv {
        u8 cpu_interface;       /* CPU interface register */
        u8 clkout;              /* Clock out register */
        u8 bus_config;          /* Bus conffiguration register */
+
+       struct sk_buff *tx_skb;
 };
 
 struct net_device *alloc_cc770dev(int sizeof_priv);
index 2772d05ff11caafbdf074aebccd32b415feae0c4..fedd927ba6ed998fe75260d5baa8c5e1bb274c4f 100644 (file)
@@ -30,6 +30,7 @@
 #define IFI_CANFD_STCMD_ERROR_ACTIVE           BIT(2)
 #define IFI_CANFD_STCMD_ERROR_PASSIVE          BIT(3)
 #define IFI_CANFD_STCMD_BUSOFF                 BIT(4)
+#define IFI_CANFD_STCMD_ERROR_WARNING          BIT(5)
 #define IFI_CANFD_STCMD_BUSMONITOR             BIT(16)
 #define IFI_CANFD_STCMD_LOOPBACK               BIT(18)
 #define IFI_CANFD_STCMD_DISABLE_CANFD          BIT(24)
 #define IFI_CANFD_TXSTCMD_OVERFLOW             BIT(13)
 
 #define IFI_CANFD_INTERRUPT                    0xc
+#define IFI_CANFD_INTERRUPT_ERROR_BUSOFF       BIT(0)
 #define IFI_CANFD_INTERRUPT_ERROR_WARNING      BIT(1)
+#define IFI_CANFD_INTERRUPT_ERROR_STATE_CHG    BIT(2)
+#define IFI_CANFD_INTERRUPT_ERROR_REC_TEC_INC  BIT(3)
 #define IFI_CANFD_INTERRUPT_ERROR_COUNTER      BIT(10)
 #define IFI_CANFD_INTERRUPT_TXFIFO_EMPTY       BIT(16)
 #define IFI_CANFD_INTERRUPT_TXFIFO_REMOVE      BIT(22)
 #define IFI_CANFD_INTERRUPT_SET_IRQ            ((u32)BIT(31))
 
 #define IFI_CANFD_IRQMASK                      0x10
+#define IFI_CANFD_IRQMASK_ERROR_BUSOFF         BIT(0)
+#define IFI_CANFD_IRQMASK_ERROR_WARNING                BIT(1)
+#define IFI_CANFD_IRQMASK_ERROR_STATE_CHG      BIT(2)
+#define IFI_CANFD_IRQMASK_ERROR_REC_TEC_INC    BIT(3)
 #define IFI_CANFD_IRQMASK_SET_ERR              BIT(7)
 #define IFI_CANFD_IRQMASK_SET_TS               BIT(15)
 #define IFI_CANFD_IRQMASK_TXFIFO_EMPTY         BIT(16)
 #define IFI_CANFD_SYSCLOCK                     0x50
 
 #define IFI_CANFD_VER                          0x54
+#define IFI_CANFD_VER_REV_MASK                 0xff
+#define IFI_CANFD_VER_REV_MIN_SUPPORTED                0x15
 
 #define IFI_CANFD_IP_ID                                0x58
 #define IFI_CANFD_IP_ID_VALUE                  0xD073CAFD
@@ -220,7 +230,10 @@ static void ifi_canfd_irq_enable(struct net_device *ndev, bool enable)
 
        if (enable) {
                enirq = IFI_CANFD_IRQMASK_TXFIFO_EMPTY |
-                       IFI_CANFD_IRQMASK_RXFIFO_NEMPTY;
+                       IFI_CANFD_IRQMASK_RXFIFO_NEMPTY |
+                       IFI_CANFD_IRQMASK_ERROR_STATE_CHG |
+                       IFI_CANFD_IRQMASK_ERROR_WARNING |
+                       IFI_CANFD_IRQMASK_ERROR_BUSOFF;
                if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
                        enirq |= IFI_CANFD_INTERRUPT_ERROR_COUNTER;
        }
@@ -361,12 +374,13 @@ static int ifi_canfd_handle_lost_msg(struct net_device *ndev)
        return 1;
 }
 
-static int ifi_canfd_handle_lec_err(struct net_device *ndev, const u32 errctr)
+static int ifi_canfd_handle_lec_err(struct net_device *ndev)
 {
        struct ifi_canfd_priv *priv = netdev_priv(ndev);
        struct net_device_stats *stats = &ndev->stats;
        struct can_frame *cf;
        struct sk_buff *skb;
+       u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR);
        const u32 errmask = IFI_CANFD_ERROR_CTR_OVERLOAD_FIRST |
                            IFI_CANFD_ERROR_CTR_ACK_ERROR_FIRST |
                            IFI_CANFD_ERROR_CTR_BIT0_ERROR_FIRST |
@@ -449,6 +463,11 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev,
 
        switch (new_state) {
        case CAN_STATE_ERROR_ACTIVE:
+               /* error active state */
+               priv->can.can_stats.error_warning++;
+               priv->can.state = CAN_STATE_ERROR_ACTIVE;
+               break;
+       case CAN_STATE_ERROR_WARNING:
                /* error warning state */
                priv->can.can_stats.error_warning++;
                priv->can.state = CAN_STATE_ERROR_WARNING;
@@ -477,7 +496,7 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev,
        ifi_canfd_get_berr_counter(ndev, &bec);
 
        switch (new_state) {
-       case CAN_STATE_ERROR_ACTIVE:
+       case CAN_STATE_ERROR_WARNING:
                /* error warning state */
                cf->can_id |= CAN_ERR_CRTL;
                cf->data[1] = (bec.txerr > bec.rxerr) ?
@@ -510,22 +529,21 @@ static int ifi_canfd_handle_state_change(struct net_device *ndev,
        return 1;
 }
 
-static int ifi_canfd_handle_state_errors(struct net_device *ndev, u32 stcmd)
+static int ifi_canfd_handle_state_errors(struct net_device *ndev)
 {
        struct ifi_canfd_priv *priv = netdev_priv(ndev);
+       u32 stcmd = readl(priv->base + IFI_CANFD_STCMD);
        int work_done = 0;
-       u32 isr;
 
-       /*
-        * The ErrWarn condition is a little special, since the bit is
-        * located in the INTERRUPT register instead of STCMD register.
-        */
-       isr = readl(priv->base + IFI_CANFD_INTERRUPT);
-       if ((isr & IFI_CANFD_INTERRUPT_ERROR_WARNING) &&
+       if ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) &&
+           (priv->can.state != CAN_STATE_ERROR_ACTIVE)) {
+               netdev_dbg(ndev, "Error, entered active state\n");
+               work_done += ifi_canfd_handle_state_change(ndev,
+                                               CAN_STATE_ERROR_ACTIVE);
+       }
+
+       if ((stcmd & IFI_CANFD_STCMD_ERROR_WARNING) &&
            (priv->can.state != CAN_STATE_ERROR_WARNING)) {
-               /* Clear the interrupt */
-               writel(IFI_CANFD_INTERRUPT_ERROR_WARNING,
-                      priv->base + IFI_CANFD_INTERRUPT);
                netdev_dbg(ndev, "Error, entered warning state\n");
                work_done += ifi_canfd_handle_state_change(ndev,
                                                CAN_STATE_ERROR_WARNING);
@@ -552,18 +570,11 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota)
 {
        struct net_device *ndev = napi->dev;
        struct ifi_canfd_priv *priv = netdev_priv(ndev);
-       const u32 stcmd_state_mask = IFI_CANFD_STCMD_ERROR_PASSIVE |
-                                    IFI_CANFD_STCMD_BUSOFF;
-       int work_done = 0;
-
-       u32 stcmd = readl(priv->base + IFI_CANFD_STCMD);
        u32 rxstcmd = readl(priv->base + IFI_CANFD_RXSTCMD);
-       u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR);
+       int work_done = 0;
 
        /* Handle bus state changes */
-       if ((stcmd & stcmd_state_mask) ||
-           ((stcmd & IFI_CANFD_STCMD_ERROR_ACTIVE) == 0))
-               work_done += ifi_canfd_handle_state_errors(ndev, stcmd);
+       work_done += ifi_canfd_handle_state_errors(ndev);
 
        /* Handle lost messages on RX */
        if (rxstcmd & IFI_CANFD_RXSTCMD_OVERFLOW)
@@ -571,7 +582,7 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota)
 
        /* Handle lec errors on the bus */
        if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
-               work_done += ifi_canfd_handle_lec_err(ndev, errctr);
+               work_done += ifi_canfd_handle_lec_err(ndev);
 
        /* Handle normal messages on RX */
        if (!(rxstcmd & IFI_CANFD_RXSTCMD_EMPTY))
@@ -592,12 +603,13 @@ static irqreturn_t ifi_canfd_isr(int irq, void *dev_id)
        struct net_device_stats *stats = &ndev->stats;
        const u32 rx_irq_mask = IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY |
                                IFI_CANFD_INTERRUPT_RXFIFO_NEMPTY_PER |
+                               IFI_CANFD_INTERRUPT_ERROR_COUNTER |
+                               IFI_CANFD_INTERRUPT_ERROR_STATE_CHG |
                                IFI_CANFD_INTERRUPT_ERROR_WARNING |
-                               IFI_CANFD_INTERRUPT_ERROR_COUNTER;
+                               IFI_CANFD_INTERRUPT_ERROR_BUSOFF;
        const u32 tx_irq_mask = IFI_CANFD_INTERRUPT_TXFIFO_EMPTY |
                                IFI_CANFD_INTERRUPT_TXFIFO_REMOVE;
-       const u32 clr_irq_mask = ~((u32)(IFI_CANFD_INTERRUPT_SET_IRQ |
-                                        IFI_CANFD_INTERRUPT_ERROR_WARNING));
+       const u32 clr_irq_mask = ~((u32)IFI_CANFD_INTERRUPT_SET_IRQ);
        u32 isr;
 
        isr = readl(priv->base + IFI_CANFD_INTERRUPT);
@@ -933,7 +945,7 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev)
        struct resource *res;
        void __iomem *addr;
        int irq, ret;
-       u32 id;
+       u32 id, rev;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        addr = devm_ioremap_resource(dev, res);
@@ -947,6 +959,13 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
+       rev = readl(addr + IFI_CANFD_VER) & IFI_CANFD_VER_REV_MASK;
+       if (rev < IFI_CANFD_VER_REV_MIN_SUPPORTED) {
+               dev_err(dev, "This block is too old (rev %i), minimum supported is rev %i\n",
+                       rev, IFI_CANFD_VER_REV_MIN_SUPPORTED);
+               return -EINVAL;
+       }
+
        ndev = alloc_candev(sizeof(*priv), 1);
        if (!ndev)
                return -ENOMEM;
index 2594f7779c6f147d71fdda29f2eab22b0c9e1955..b397a33f3d32b5e3c28398a660c736d45a74179d 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/iopoll.h>
 #include <linux/can/dev.h>
+#include <linux/pinctrl/consumer.h>
 
 /* napi related */
 #define M_CAN_NAPI_WEIGHT      64
@@ -253,7 +254,7 @@ enum m_can_mram_cfg {
 
 /* Rx FIFO 0/1 Configuration (RXF0C/RXF1C) */
 #define RXFC_FWM_SHIFT 24
-#define RXFC_FWM_MASK  (0x7f < RXFC_FWM_SHIFT)
+#define RXFC_FWM_MASK  (0x7f << RXFC_FWM_SHIFT)
 #define RXFC_FS_SHIFT  16
 #define RXFC_FS_MASK   (0x7f << RXFC_FS_SHIFT)
 
@@ -1700,6 +1701,8 @@ static __maybe_unused int m_can_suspend(struct device *dev)
                m_can_clk_stop(priv);
        }
 
+       pinctrl_pm_select_sleep_state(dev);
+
        priv->can.state = CAN_STATE_SLEEPING;
 
        return 0;
@@ -1710,6 +1713,8 @@ static __maybe_unused int m_can_resume(struct device *dev)
        struct net_device *ndev = dev_get_drvdata(dev);
        struct m_can_priv *priv = netdev_priv(ndev);
 
+       pinctrl_pm_select_default_state(dev);
+
        m_can_init_ram(priv);
 
        priv->can.state = CAN_STATE_ERROR_ACTIVE;
index 55513411a82e68e11d6b1ca30e90ea4337a0f2ee..ed8561d4a90f4b5e25683a5483f0d98248d7dca2 100644 (file)
@@ -262,7 +262,6 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv,
 
                spin_lock_irqsave(&priv->echo_lock, flags);
                can_get_echo_skb(priv->ndev, msg->client);
-               spin_unlock_irqrestore(&priv->echo_lock, flags);
 
                /* count bytes of the echo instead of skb */
                stats->tx_bytes += cf_len;
@@ -271,6 +270,7 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv,
                /* restart tx queue (a slot is free) */
                netif_wake_queue(priv->ndev);
 
+               spin_unlock_irqrestore(&priv->echo_lock, flags);
                return 0;
        }
 
@@ -333,7 +333,6 @@ static int pucan_handle_status(struct peak_canfd_priv *priv,
 
        /* this STATUS is the CNF of the RX_BARRIER: Tx path can be setup */
        if (pucan_status_is_rx_barrier(msg)) {
-               unsigned long flags;
 
                if (priv->enable_tx_path) {
                        int err = priv->enable_tx_path(priv);
@@ -342,16 +341,8 @@ static int pucan_handle_status(struct peak_canfd_priv *priv,
                                return err;
                }
 
-               /* restart network queue only if echo skb array is free */
-               spin_lock_irqsave(&priv->echo_lock, flags);
-
-               if (!priv->can.echo_skb[priv->echo_idx]) {
-                       spin_unlock_irqrestore(&priv->echo_lock, flags);
-
-                       netif_wake_queue(ndev);
-               } else {
-                       spin_unlock_irqrestore(&priv->echo_lock, flags);
-               }
+               /* start network queue (echo_skb array is empty) */
+               netif_start_queue(ndev);
 
                return 0;
        }
@@ -726,11 +717,6 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb,
         */
        should_stop_tx_queue = !!(priv->can.echo_skb[priv->echo_idx]);
 
-       spin_unlock_irqrestore(&priv->echo_lock, flags);
-
-       /* write the skb on the interface */
-       priv->write_tx_msg(priv, msg);
-
        /* stop network tx queue if not enough room to save one more msg too */
        if (priv->can.ctrlmode & CAN_CTRLMODE_FD)
                should_stop_tx_queue |= (room_left <
@@ -742,6 +728,11 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb,
        if (should_stop_tx_queue)
                netif_stop_queue(ndev);
 
+       spin_unlock_irqrestore(&priv->echo_lock, flags);
+
+       /* write the skb on the interface */
+       priv->write_tx_msg(priv, msg);
+
        return NETDEV_TX_OK;
 }
 
index 788c3464a3b0e95aaa101591750b9de493a34a18..3c51a884db87bc90e71d5df8d5b0a91eadf69cdb 100644 (file)
@@ -349,8 +349,12 @@ static irqreturn_t pciefd_irq_handler(int irq, void *arg)
                priv->tx_pages_free++;
                spin_unlock_irqrestore(&priv->tx_lock, flags);
 
-               /* wake producer up */
-               netif_wake_queue(priv->ucan.ndev);
+               /* wake producer up (only if enough room in echo_skb array) */
+               spin_lock_irqsave(&priv->ucan.echo_lock, flags);
+               if (!priv->ucan.can.echo_skb[priv->ucan.echo_idx])
+                       netif_wake_queue(priv->ucan.ndev);
+
+               spin_unlock_irqrestore(&priv->ucan.echo_lock, flags);
        }
 
        /* re-enable Rx DMA transfer for this CAN */
index d040aeb45172662320bf63c75b094cde35b9b294..15c2a831edf192b2678901c9a4c6fce7e9df62cd 100644 (file)
@@ -1,7 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_NET_DSA_BCM_SF2)  += bcm-sf2.o
 bcm-sf2-objs                   := bcm_sf2.o bcm_sf2_cfp.o
-obj-$(CONFIG_NET_DSA_LOOP)     += dsa_loop.o dsa_loop_bdinfo.o
+obj-$(CONFIG_NET_DSA_LOOP)     += dsa_loop.o
+ifdef CONFIG_NET_DSA_LOOP
+obj-$(CONFIG_FIXED_PHY)                += dsa_loop_bdinfo.o
+endif
 obj-$(CONFIG_NET_DSA_MT7530)   += mt7530.o
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_QCA8K)    += qca8k.o
index db830a1141d99774f6e21037a63840518dfdab25..78616787f2a396102a4b9c5910fcd3abb87bcfb5 100644 (file)
@@ -814,8 +814,8 @@ void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
        unsigned int i;
 
        for (i = 0; i < mib_size; i++)
-               memcpy(data + i * ETH_GSTRING_LEN,
-                      mibs[i].name, ETH_GSTRING_LEN);
+               strlcpy(data + i * ETH_GSTRING_LEN,
+                       mibs[i].name, ETH_GSTRING_LEN);
 }
 EXPORT_SYMBOL(b53_get_strings);
 
@@ -852,7 +852,7 @@ void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
 }
 EXPORT_SYMBOL(b53_get_ethtool_stats);
 
-int b53_get_sset_count(struct dsa_switch *ds)
+int b53_get_sset_count(struct dsa_switch *ds, int port)
 {
        struct b53_device *dev = ds->priv;
 
index d954cf36ecd805b8c2a83371d3c9e443b04506fe..1187ebd79287bec832f78a04e47805e45fc22aeb 100644 (file)
@@ -288,7 +288,7 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port);
 int b53_configure_vlan(struct dsa_switch *ds);
 void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data);
 void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
-int b53_get_sset_count(struct dsa_switch *ds);
+int b53_get_sset_count(struct dsa_switch *ds, int port);
 int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
index 7aa84ee4e771d97b031e1e1e66910f81c1958828..f77be9f85cb36f3d229bcf1aa9dac0e86e6187c6 100644 (file)
@@ -86,7 +86,7 @@ static int dsa_loop_setup(struct dsa_switch *ds)
        return 0;
 }
 
-static int dsa_loop_get_sset_count(struct dsa_switch *ds)
+static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port)
 {
        return __DSA_LOOP_CNT_MAX;
 }
index 6171c0853ff112984a21f940891e253eb57ceb3b..fefa454f3e564fcdfef5e56602975f3e31a6a13e 100644 (file)
@@ -1007,7 +1007,7 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port,
        }
 }
 
-static int lan9303_get_sset_count(struct dsa_switch *ds)
+static int lan9303_get_sset_count(struct dsa_switch *ds, int port)
 {
        return ARRAY_SIZE(lan9303_mib);
 }
index 663b0d5b982b127f934a8c87e64f5aa8209a6b0b..bcb3e6c734f25e099ac9356738edb87a22a1eb43 100644 (file)
@@ -439,7 +439,7 @@ static void ksz_disable_port(struct dsa_switch *ds, int port,
        ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
 }
 
-static int ksz_sset_count(struct dsa_switch *ds)
+static int ksz_sset_count(struct dsa_switch *ds, int port)
 {
        return TOTAL_SWITCH_COUNTER_NUM;
 }
index 8a0bb000d05699f28e42d3a5547fffabfcc13341..511ca134f13f092ef07e461951cb4097ac5cebd4 100644 (file)
@@ -604,7 +604,7 @@ mt7530_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-mt7530_get_sset_count(struct dsa_switch *ds)
+mt7530_get_sset_count(struct dsa_switch *ds, int port)
 {
        return ARRAY_SIZE(mt7530_mib);
 }
index e1b5c5c66fce674d6f7698bf010371d83db44350..9a5d786b4885abe81d4cbffbee9c5c04405a60eb 100644 (file)
@@ -253,9 +253,8 @@ static void mv88e6xxx_g1_irq_unmask(struct irq_data *d)
        chip->g1_irq.masked &= ~(1 << n);
 }
 
-static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
 {
-       struct mv88e6xxx_chip *chip = dev_id;
        unsigned int nhandled = 0;
        unsigned int sub_irq;
        unsigned int n;
@@ -280,6 +279,13 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
        return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
 }
 
+static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+{
+       struct mv88e6xxx_chip *chip = dev_id;
+
+       return mv88e6xxx_g1_irq_thread_work(chip);
+}
+
 static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d)
 {
        struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
@@ -335,7 +341,7 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = {
        .xlate  = irq_domain_xlate_twocell,
 };
 
-static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
 {
        int irq, virq;
        u16 mask;
@@ -344,8 +350,6 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
        mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
        mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
 
-       free_irq(chip->irq, chip);
-
        for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
                virq = irq_find_mapping(chip->g1_irq.domain, irq);
                irq_dispose_mapping(virq);
@@ -354,7 +358,14 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
        irq_domain_remove(chip->g1_irq.domain);
 }
 
-static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+{
+       mv88e6xxx_g1_irq_free_common(chip);
+
+       free_irq(chip->irq, chip);
+}
+
+static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
 {
        int err, irq, virq;
        u16 reg, mask;
@@ -387,13 +398,6 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
        if (err)
                goto out_disable;
 
-       err = request_threaded_irq(chip->irq, NULL,
-                                  mv88e6xxx_g1_irq_thread_fn,
-                                  IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
-                                  dev_name(chip->dev), chip);
-       if (err)
-               goto out_disable;
-
        return 0;
 
 out_disable:
@@ -411,6 +415,64 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
        return err;
 }
 
+static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       err = mv88e6xxx_g1_irq_setup_common(chip);
+       if (err)
+               return err;
+
+       err = request_threaded_irq(chip->irq, NULL,
+                                  mv88e6xxx_g1_irq_thread_fn,
+                                  IRQF_ONESHOT,
+                                  dev_name(chip->dev), chip);
+       if (err)
+               mv88e6xxx_g1_irq_free_common(chip);
+
+       return err;
+}
+
+static void mv88e6xxx_irq_poll(struct kthread_work *work)
+{
+       struct mv88e6xxx_chip *chip = container_of(work,
+                                                  struct mv88e6xxx_chip,
+                                                  irq_poll_work.work);
+       mv88e6xxx_g1_irq_thread_work(chip);
+
+       kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+                                  msecs_to_jiffies(100));
+}
+
+static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       err = mv88e6xxx_g1_irq_setup_common(chip);
+       if (err)
+               return err;
+
+       kthread_init_delayed_work(&chip->irq_poll_work,
+                                 mv88e6xxx_irq_poll);
+
+       chip->kworker = kthread_create_worker(0, dev_name(chip->dev));
+       if (IS_ERR(chip->kworker))
+               return PTR_ERR(chip->kworker);
+
+       kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+                                  msecs_to_jiffies(100));
+
+       return 0;
+}
+
+static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
+{
+       mv88e6xxx_g1_irq_free_common(chip);
+
+       kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
+       kthread_destroy_worker(chip->kworker);
+}
+
 int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
 {
        int i;
@@ -606,7 +668,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
                        return UINT64_MAX;
 
                low = reg;
-               if (s->sizeof_stat == 4) {
+               if (s->size == 4) {
                        err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
                        if (err)
                                return UINT64_MAX;
@@ -619,7 +681,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
        case STATS_TYPE_BANK0:
                reg |= s->reg | histogram;
                mv88e6xxx_g1_stats_read(chip, reg, &low);
-               if (s->sizeof_stat == 8)
+               if (s->size == 8)
                        mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
                break;
        default:
@@ -629,8 +691,8 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
        return value;
 }
 
-static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
-                                       uint8_t *data, int types)
+static int mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
+                                      uint8_t *data, int types)
 {
        struct mv88e6xxx_hw_stat *stat;
        int i, j;
@@ -643,29 +705,41 @@ static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
                        j++;
                }
        }
+
+       return j;
 }
 
-static void mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
-                                       uint8_t *data)
+static int mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
+                                      uint8_t *data)
 {
-       mv88e6xxx_stats_get_strings(chip, data,
-                                   STATS_TYPE_BANK0 | STATS_TYPE_PORT);
+       return mv88e6xxx_stats_get_strings(chip, data,
+                                          STATS_TYPE_BANK0 | STATS_TYPE_PORT);
 }
 
-static void mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
-                                       uint8_t *data)
+static int mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
+                                      uint8_t *data)
 {
-       mv88e6xxx_stats_get_strings(chip, data,
-                                   STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
+       return mv88e6xxx_stats_get_strings(chip, data,
+                                          STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
 }
 
 static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
                                  uint8_t *data)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
+       int count = 0;
+
+       mutex_lock(&chip->reg_lock);
 
        if (chip->info->ops->stats_get_strings)
-               chip->info->ops->stats_get_strings(chip, data);
+               count = chip->info->ops->stats_get_strings(chip, data);
+
+       if (chip->info->ops->serdes_get_strings) {
+               data += count * ETH_GSTRING_LEN;
+               chip->info->ops->serdes_get_strings(chip, port, data);
+       }
+
+       mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_stats_get_sset_count(struct mv88e6xxx_chip *chip,
@@ -694,19 +768,34 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
                                              STATS_TYPE_BANK1);
 }
 
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
+       int serdes_count = 0;
+       int count = 0;
 
+       mutex_lock(&chip->reg_lock);
        if (chip->info->ops->stats_get_sset_count)
-               return chip->info->ops->stats_get_sset_count(chip);
+               count = chip->info->ops->stats_get_sset_count(chip);
+       if (count < 0)
+               goto out;
 
-       return 0;
+       if (chip->info->ops->serdes_get_sset_count)
+               serdes_count = chip->info->ops->serdes_get_sset_count(chip,
+                                                                     port);
+       if (serdes_count < 0)
+               count = serdes_count;
+       else
+               count += serdes_count;
+out:
+       mutex_unlock(&chip->reg_lock);
+
+       return count;
 }
 
-static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-                                     uint64_t *data, int types,
-                                     u16 bank1_select, u16 histogram)
+static int mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                    uint64_t *data, int types,
+                                    u16 bank1_select, u16 histogram)
 {
        struct mv88e6xxx_hw_stat *stat;
        int i, j;
@@ -723,18 +812,19 @@ static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
                        j++;
                }
        }
+       return j;
 }
 
-static void mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-                                     uint64_t *data)
+static int mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                    uint64_t *data)
 {
        return mv88e6xxx_stats_get_stats(chip, port, data,
                                         STATS_TYPE_BANK0 | STATS_TYPE_PORT,
                                         0, MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-                                     uint64_t *data)
+static int mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                    uint64_t *data)
 {
        return mv88e6xxx_stats_get_stats(chip, port, data,
                                         STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -742,8 +832,8 @@ static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
                                         MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-                                     uint64_t *data)
+static int mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                    uint64_t *data)
 {
        return mv88e6xxx_stats_get_stats(chip, port, data,
                                         STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -754,8 +844,17 @@ static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
                                uint64_t *data)
 {
+       int count = 0;
+
        if (chip->info->ops->stats_get_stats)
-               chip->info->ops->stats_get_stats(chip, port, data);
+               count = chip->info->ops->stats_get_stats(chip, port, data);
+
+       if (chip->info->ops->serdes_get_stats) {
+               data += count;
+               mutex_lock(&chip->reg_lock);
+               chip->info->ops->serdes_get_stats(chip, port, data);
+               mutex_unlock(&chip->reg_lock);
+       }
 }
 
 static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
@@ -2196,12 +2295,19 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
        bus->write = mv88e6xxx_mdio_write;
        bus->parent = chip->dev;
 
+       if (!external) {
+               err = mv88e6xxx_g2_irq_mdio_setup(chip, bus);
+               if (err)
+                       return err;
+       }
+
        if (np)
                err = of_mdiobus_register(bus, np);
        else
                err = mdiobus_register(bus);
        if (err) {
                dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err);
+               mv88e6xxx_g2_irq_mdio_free(chip, bus);
                return err;
        }
 
@@ -2228,6 +2334,9 @@ static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
        list_for_each_entry(mdio_bus, &chip->mdios, list) {
                bus = mdio_bus->bus;
 
+               if (!mdio_bus->external)
+                       mv88e6xxx_g2_irq_mdio_free(chip, bus);
+
                mdiobus_unregister(bus);
        }
 }
@@ -3130,6 +3239,9 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .serdes_power = mv88e6352_serdes_power,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6352_avb_ops,
+       .serdes_get_sset_count = mv88e6352_serdes_get_sset_count,
+       .serdes_get_strings = mv88e6352_serdes_get_strings,
+       .serdes_get_stats = mv88e6352_serdes_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6390_ops = {
@@ -3219,6 +3331,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6085",
                .num_databases = 4096,
                .num_ports = 10,
+               .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3239,6 +3352,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6095/88E6095F",
                .num_databases = 256,
                .num_ports = 11,
+               .num_internal_phys = 0,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3257,6 +3371,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6097/88E6097F",
                .num_databases = 4096,
                .num_ports = 11,
+               .num_internal_phys = 8,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3277,6 +3392,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6123",
                .num_databases = 4096,
                .num_ports = 3,
+               .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3297,6 +3413,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6131",
                .num_databases = 256,
                .num_ports = 8,
+               .num_internal_phys = 0,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3312,9 +3429,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
        [MV88E6141] = {
                .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6141,
                .family = MV88E6XXX_FAMILY_6341,
-               .name = "Marvell 88E6341",
+               .name = "Marvell 88E6141",
                .num_databases = 4096,
                .num_ports = 6,
+               .num_internal_phys = 5,
                .num_gpio = 11,
                .max_vid = 4095,
                .port_base_addr = 0x10,
@@ -3322,6 +3440,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global2_addr = 0x1c,
                .age_time_coeff = 3750,
                .atu_move_port_mask = 0x1f,
+               .g1_irqs = 9,
                .g2_irqs = 10,
                .pvt = true,
                .multi_chip = true,
@@ -3335,6 +3454,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6161",
                .num_databases = 4096,
                .num_ports = 6,
+               .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3355,6 +3475,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6165",
                .num_databases = 4096,
                .num_ports = 6,
+               .num_internal_phys = 0,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3375,6 +3496,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6171",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3395,6 +3517,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6172",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
@@ -3416,6 +3539,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6175",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3436,6 +3560,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6176",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
@@ -3457,6 +3582,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6185",
                .num_databases = 256,
                .num_ports = 10,
+               .num_internal_phys = 0,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3475,6 +3601,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6190",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_internal_phys = 11,
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
@@ -3496,6 +3623,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6190X",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_internal_phys = 11,
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
@@ -3517,6 +3645,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6191",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_internal_phys = 11,
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
@@ -3538,6 +3667,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6240",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
@@ -3560,6 +3690,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6290",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_internal_phys = 11,
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
@@ -3582,6 +3713,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6320",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
@@ -3589,6 +3721,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
                .multi_chip = true,
@@ -3603,6 +3736,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6321",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
@@ -3610,6 +3744,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
@@ -3622,6 +3757,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6341,
                .name = "Marvell 88E6341",
                .num_databases = 4096,
+               .num_internal_phys = 5,
                .num_ports = 6,
                .num_gpio = 11,
                .max_vid = 4095,
@@ -3630,6 +3766,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .global2_addr = 0x1c,
                .age_time_coeff = 3750,
                .atu_move_port_mask = 0x1f,
+               .g1_irqs = 9,
                .g2_irqs = 10,
                .pvt = true,
                .multi_chip = true,
@@ -3644,6 +3781,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6350",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3664,6 +3802,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6351",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
@@ -3684,6 +3823,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6352",
                .num_databases = 4096,
                .num_ports = 7,
+               .num_internal_phys = 5,
                .num_gpio = 15,
                .max_vid = 4095,
                .port_base_addr = 0x10,
@@ -3705,6 +3845,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6390",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_internal_phys = 11,
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
@@ -3726,6 +3867,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .name = "Marvell 88E6390X",
                .num_databases = 4096,
                .num_ports = 11,        /* 10 + Z80 */
+               .num_internal_phys = 11,
                .num_gpio = 16,
                .max_vid = 8191,
                .port_base_addr = 0x0,
@@ -4034,33 +4176,34 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
                goto out;
        }
 
-       if (chip->irq > 0) {
-               /* Has to be performed before the MDIO bus is created,
-                * because the PHYs will link there interrupts to these
-                * interrupt controllers
-                */
-               mutex_lock(&chip->reg_lock);
+       /* Has to be performed before the MDIO bus is created, because
+        * the PHYs will link their interrupts to these interrupt
+        * controllers
+        */
+       mutex_lock(&chip->reg_lock);
+       if (chip->irq > 0)
                err = mv88e6xxx_g1_irq_setup(chip);
-               mutex_unlock(&chip->reg_lock);
-
-               if (err)
-                       goto out;
-
-               if (chip->info->g2_irqs > 0) {
-                       err = mv88e6xxx_g2_irq_setup(chip);
-                       if (err)
-                               goto out_g1_irq;
-               }
+       else
+               err = mv88e6xxx_irq_poll_setup(chip);
+       mutex_unlock(&chip->reg_lock);
 
-               err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
-               if (err)
-                       goto out_g2_irq;
+       if (err)
+               goto out;
 
-               err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+       if (chip->info->g2_irqs > 0) {
+               err = mv88e6xxx_g2_irq_setup(chip);
                if (err)
-                       goto out_g1_atu_prob_irq;
+                       goto out_g1_irq;
        }
 
+       err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
+       if (err)
+               goto out_g2_irq;
+
+       err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+       if (err)
+               goto out_g1_atu_prob_irq;
+
        err = mv88e6xxx_mdios_register(chip, np);
        if (err)
                goto out_g1_vtu_prob_irq;
@@ -4074,20 +4217,19 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
        mv88e6xxx_mdios_unregister(chip);
 out_g1_vtu_prob_irq:
-       if (chip->irq > 0)
-               mv88e6xxx_g1_vtu_prob_irq_free(chip);
+       mv88e6xxx_g1_vtu_prob_irq_free(chip);
 out_g1_atu_prob_irq:
-       if (chip->irq > 0)
-               mv88e6xxx_g1_atu_prob_irq_free(chip);
+       mv88e6xxx_g1_atu_prob_irq_free(chip);
 out_g2_irq:
-       if (chip->info->g2_irqs > 0 && chip->irq > 0)
+       if (chip->info->g2_irqs > 0)
                mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
-       if (chip->irq > 0) {
-               mutex_lock(&chip->reg_lock);
+       mutex_lock(&chip->reg_lock);
+       if (chip->irq > 0)
                mv88e6xxx_g1_irq_free(chip);
-               mutex_unlock(&chip->reg_lock);
-       }
+       else
+               mv88e6xxx_irq_poll_free(chip);
+       mutex_unlock(&chip->reg_lock);
 out:
        return err;
 }
@@ -4106,15 +4248,18 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
        mv88e6xxx_unregister_switch(chip);
        mv88e6xxx_mdios_unregister(chip);
 
-       if (chip->irq > 0) {
-               mv88e6xxx_g1_vtu_prob_irq_free(chip);
-               mv88e6xxx_g1_atu_prob_irq_free(chip);
-               if (chip->info->g2_irqs > 0)
-                       mv88e6xxx_g2_irq_free(chip);
-               mutex_lock(&chip->reg_lock);
+       mv88e6xxx_g1_vtu_prob_irq_free(chip);
+       mv88e6xxx_g1_atu_prob_irq_free(chip);
+
+       if (chip->info->g2_irqs > 0)
+               mv88e6xxx_g2_irq_free(chip);
+
+       mutex_lock(&chip->reg_lock);
+       if (chip->irq > 0)
                mv88e6xxx_g1_irq_free(chip);
-               mutex_unlock(&chip->reg_lock);
-       }
+       else
+               mv88e6xxx_irq_poll_free(chip);
+       mutex_unlock(&chip->reg_lock);
 }
 
 static const struct of_device_id mv88e6xxx_of_match[] = {
index 97d7915f32c720448bec983e0f695b1c75e8585a..bad211014e91a2f0c093a2bb69361294f60261fd 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/if_vlan.h>
 #include <linux/irq.h>
 #include <linux/gpio/consumer.h>
+#include <linux/kthread.h>
 #include <linux/phy.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
@@ -109,6 +110,7 @@ struct mv88e6xxx_info {
        const char *name;
        unsigned int num_databases;
        unsigned int num_ports;
+       unsigned int num_internal_phys;
        unsigned int num_gpio;
        unsigned int max_vid;
        unsigned int port_base_addr;
@@ -190,6 +192,10 @@ struct mv88e6xxx_port_hwtstamp {
        struct hwtstamp_config tstamp_config;
 };
 
+struct mv88e6xxx_port {
+       u64 serdes_stats[2];
+};
+
 struct mv88e6xxx_chip {
        const struct mv88e6xxx_info *info;
 
@@ -243,8 +249,11 @@ struct mv88e6xxx_chip {
        int irq;
        int device_irq;
        int watchdog_irq;
+
        int atu_prob_irq;
        int vtu_prob_irq;
+       struct kthread_worker *kworker;
+       struct kthread_delayed_work irq_poll_work;
 
        /* GPIO resources */
        u8 gpio_data[2];
@@ -265,6 +274,9 @@ struct mv88e6xxx_chip {
 
        /* Per-port timestamping resources. */
        struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
+
+       /* Array of port structures. */
+       struct mv88e6xxx_port ports[DSA_MAX_PORTS];
 };
 
 struct mv88e6xxx_bus_ops {
@@ -383,9 +395,9 @@ struct mv88e6xxx_ops {
 
        /* Return the number of strings describing statistics */
        int (*stats_get_sset_count)(struct mv88e6xxx_chip *chip);
-       void (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
-       void (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
-                               uint64_t *data);
+       int (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
+       int (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+                              uint64_t *data);
        int (*set_cpu_port)(struct mv88e6xxx_chip *chip, int port);
        int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
        const struct mv88e6xxx_irq_ops *watchdog_ops;
@@ -395,6 +407,13 @@ struct mv88e6xxx_ops {
        /* Power on/off a SERDES interface */
        int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, bool on);
 
+       /* Statistics from the SERDES interface */
+       int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
+       void (*serdes_get_strings)(struct mv88e6xxx_chip *chip,  int port,
+                                  uint8_t *data);
+       void (*serdes_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+                                uint64_t *data);
+
        /* VLAN Translation Unit operations */
        int (*vtu_getnext)(struct mv88e6xxx_chip *chip,
                           struct mv88e6xxx_vtu_entry *entry);
@@ -459,7 +478,7 @@ struct mv88e6xxx_avb_ops {
 
 struct mv88e6xxx_hw_stat {
        char string[ETH_GSTRING_LEN];
-       int sizeof_stat;
+       size_t size;
        int reg;
        int type;
 };
index 5f370f1fc7c4e04326745b0b189de8a70ce766b8..0ce627fded48f2182676d46194c363d30bb93287 100644 (file)
@@ -1090,7 +1090,7 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
 
        err = request_threaded_irq(chip->device_irq, NULL,
                                   mv88e6xxx_g2_irq_thread_fn,
-                                  IRQF_ONESHOT, "mv88e6xxx-g1", chip);
+                                  IRQF_ONESHOT, "mv88e6xxx-g2", chip);
        if (err)
                goto out;
 
@@ -1107,6 +1107,38 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
        return err;
 }
 
+int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+                               struct mii_bus *bus)
+{
+       int phy, irq, err, err_phy;
+
+       for (phy = 0; phy < chip->info->num_internal_phys; phy++) {
+               irq = irq_find_mapping(chip->g2_irq.domain, phy);
+               if (irq < 0) {
+                       err = irq;
+                       goto out;
+               }
+               bus->irq[chip->info->port_base_addr + phy] = irq;
+       }
+       return 0;
+out:
+       err_phy = phy;
+
+       for (phy = 0; phy < err_phy; phy++)
+               irq_dispose_mapping(bus->irq[phy]);
+
+       return err;
+}
+
+void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+                               struct mii_bus *bus)
+{
+       int phy;
+
+       for (phy = 0; phy < chip->info->num_internal_phys; phy++)
+               irq_dispose_mapping(bus->irq[phy]);
+}
+
 int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 {
        u16 reg;
index aa3f0a736966abfe316e84ff8dac8cf5dc8ecd4b..520ec70d32e84f304cf9246eb904bb5016474dd6 100644 (file)
@@ -317,6 +317,11 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
 
+int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+                               struct mii_bus *bus);
+void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+                               struct mii_bus *bus);
+
 int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 
@@ -450,6 +455,17 @@ static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip)
 {
 }
 
+static inline int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+                                             struct mii_bus *bus)
+{
+       return 0;
+}
+
+static inline void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+                                             struct mii_bus *bus)
+{
+}
+
 static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
 {
        return -EOPNOTSUPP;
index f3c01119b3d1a9172efb66d9c1ba8843009c7d33..b6166424216a74120de572209ac066fe84d2a834 100644 (file)
@@ -55,18 +55,30 @@ static int mv88e6352_serdes_power_set(struct mv88e6xxx_chip *chip, bool on)
        return err;
 }
 
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
 {
-       int err;
        u8 cmode;
+       int err;
 
        err = mv88e6xxx_port_get_cmode(chip, port, &cmode);
-       if (err)
-               return err;
+       if (err) {
+               dev_err(chip->dev, "failed to read cmode\n");
+               return false;
+       }
 
        if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASE_X) ||
            (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X) ||
-           (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII)) {
+           (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII))
+               return true;
+
+       return false;
+}
+
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+{
+       int err;
+
+       if (mv88e6352_port_has_serdes(chip, port)) {
                err = mv88e6352_serdes_power_set(chip, on);
                if (err < 0)
                        return err;
@@ -75,6 +87,90 @@ int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
        return 0;
 }
 
+struct mv88e6352_serdes_hw_stat {
+       char string[ETH_GSTRING_LEN];
+       int sizeof_stat;
+       int reg;
+};
+
+static struct mv88e6352_serdes_hw_stat mv88e6352_serdes_hw_stats[] = {
+       { "serdes_fibre_rx_error", 16, 21 },
+       { "serdes_PRBS_error", 32, 24 },
+};
+
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
+{
+       if (mv88e6352_port_has_serdes(chip, port))
+               return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
+
+       return 0;
+}
+
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+                                 int port, uint8_t *data)
+{
+       struct mv88e6352_serdes_hw_stat *stat;
+       int i;
+
+       if (!mv88e6352_port_has_serdes(chip, port))
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+               stat = &mv88e6352_serdes_hw_stats[i];
+               memcpy(data + i * ETH_GSTRING_LEN, stat->string,
+                      ETH_GSTRING_LEN);
+       }
+}
+
+static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip,
+                                         struct mv88e6352_serdes_hw_stat *stat)
+{
+       u64 val = 0;
+       u16 reg;
+       int err;
+
+       err = mv88e6352_serdes_read(chip, stat->reg, &reg);
+       if (err) {
+               dev_err(chip->dev, "failed to read statistic\n");
+               return 0;
+       }
+
+       val = reg;
+
+       if (stat->sizeof_stat == 32) {
+               err = mv88e6352_serdes_read(chip, stat->reg + 1, &reg);
+               if (err) {
+                       dev_err(chip->dev, "failed to read statistic\n");
+                       return 0;
+               }
+               val = val << 16 | reg;
+       }
+
+       return val;
+}
+
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+                               uint64_t *data)
+{
+       struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port];
+       struct mv88e6352_serdes_hw_stat *stat;
+       u64 value;
+       int i;
+
+       if (!mv88e6352_port_has_serdes(chip, port))
+               return;
+
+       BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) >
+                    ARRAY_SIZE(mv88e6xxx_port->serdes_stats));
+
+       for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+               stat = &mv88e6352_serdes_hw_stats[i];
+               value = mv88e6352_serdes_get_stat(chip, stat);
+               mv88e6xxx_port->serdes_stats[i] += value;
+               data[i] = mv88e6xxx_port->serdes_stats[i];
+       }
+}
+
 /* Set the power on/off for 10GBASE-R and 10GBASE-X4/X2 */
 static int mv88e6390_serdes_10g(struct mv88e6xxx_chip *chip, int addr, bool on)
 {
index 5c1cd6d8e9a5e361971d23ffead531712174ed06..641baa75f910842de57b346346f13b02db744828 100644 (file)
@@ -44,5 +44,9 @@
 
 int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
-
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+                                 int port, uint8_t *data);
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+                               uint64_t *data);
 #endif
index 9df22ebee8223822d8bbfebe299b3f6ccfefc84e..600d5ad1fbde265afbf7dc030d694cadbaa8fcef 100644 (file)
@@ -631,7 +631,7 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-qca8k_get_sset_count(struct dsa_switch *ds)
+qca8k_get_sset_count(struct dsa_switch *ds, int port)
 {
        return ARRAY_SIZE(ar8327_mib);
 }
index 29c3075bfb052f1dbc7e788bf093bd0b8a94f152..fdc673484addcf09a8edd0f69f1c9d167816619f 100644 (file)
@@ -3,7 +3,7 @@
 #
 
 config NET_VENDOR_8390
-       bool "National Semi-conductor 8390 devices"
+       bool "National Semiconductor 8390 devices"
        default y
        depends on NET_VENDOR_NATSEMI
        ---help---
index 358f7ab77c70b6edb9919c9a4d5b939188a46519..c99e3e845ac05021462853fc780df2187b2e0f7c 100644 (file)
@@ -649,7 +649,7 @@ static void amd8111e_free_ring(struct amd8111e_priv *lp)
 static int amd8111e_tx(struct net_device *dev)
 {
        struct amd8111e_priv *lp = netdev_priv(dev);
-       int tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK;
+       int tx_index;
        int status;
        /* Complete all the transmit packet */
        while (lp->tx_complete_idx != lp->tx_idx){
index f17a160dbff285afd37cf6140bd1790d47f93bd6..137cbb470af2301b83864901332d794dcaa6f9b5 100644 (file)
@@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev)
        dev->netdev_ops         = &mace_netdev_ops;
        dev->watchdog_timeo     = TX_TIMEOUT;
 
-       printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
-              dev->name, dev->dev_addr);
+       pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+               dev->dev_addr, mp->chipid);
 
        err = register_netdev(dev);
        if (!err)
@@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
                        else if (fs & (UFLO|LCOL|RTRY)) {
                                ++dev->stats.tx_aborted_errors;
                                if (mb->xmtfs & UFLO) {
-                                       printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
                                        dev->stats.tx_fifo_errors++;
                                        mace_txdma_reset(dev);
                                }
@@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 
        if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
                dev->stats.rx_errors++;
-               if (frame_status & RS_OFLO) {
-                       printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+               if (frame_status & RS_OFLO)
                        dev->stats.rx_fifo_errors++;
-               }
                if (frame_status & RS_CLSN)
                        dev->stats.collisions++;
                if (frame_status & RS_FRAMERR)
@@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = {
        },
 };
 
-static int __init mac_mace_init_module(void)
-{
-       if (!MACH_IS_MAC)
-               return -ENODEV;
-
-       return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
-       platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);
index 0b49f1aeebd3dd98d6e0491aa1cc3e46a996a5a9..fc7383106946ca6461f62ea305be0f03bb59c227 100644 (file)
@@ -36,6 +36,8 @@
 #define AQ_CFG_TX_FRAME_MAX  (16U * 1024U)
 #define AQ_CFG_RX_FRAME_MAX  (4U * 1024U)
 
+#define AQ_CFG_TX_CLEAN_BUDGET 256U
+
 /* LRO */
 #define AQ_CFG_IS_LRO_DEF           1U
 
index ebbaf63eaf475123a0d67b7eef8cc1ed42e348e6..c96a92118b8b85272e7c3551dc5de31da3bf8852 100644 (file)
@@ -247,6 +247,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self)
        self->ndev->hw_features |= aq_hw_caps->hw_features;
        self->ndev->features = aq_hw_caps->hw_features;
        self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
+       self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
        self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN;
        self->ndev->max_mtu = aq_hw_caps->mtu - ETH_FCS_LEN - ETH_HLEN;
 
@@ -937,3 +939,23 @@ int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg)
 out:
        return err;
 }
+
+void aq_nic_shutdown(struct aq_nic_s *self)
+{
+       int err = 0;
+
+       if (!self->ndev)
+               return;
+
+       rtnl_lock();
+
+       netif_device_detach(self->ndev);
+
+       err = aq_nic_stop(self);
+       if (err < 0)
+               goto err_exit;
+       aq_nic_deinit(self);
+
+err_exit:
+       rtnl_unlock();
+}
\ No newline at end of file
index d16b0f1a95aa485753f90afda57ad0edf86081c1..219b550d16650bd6b205fb6e10855627a0fd277b 100644 (file)
@@ -118,5 +118,6 @@ struct aq_nic_cfg_s *aq_nic_get_cfg(struct aq_nic_s *self);
 u32 aq_nic_get_fw_version(struct aq_nic_s *self);
 int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg);
 int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self);
+void aq_nic_shutdown(struct aq_nic_s *self);
 
 #endif /* AQ_NIC_H */
index 87c4308b52a7cc7666a88984712d24198214a741..ecc6306f940f5d9f975d9cd422114f0be05c3435 100644 (file)
@@ -323,6 +323,20 @@ static void aq_pci_remove(struct pci_dev *pdev)
        pci_disable_device(pdev);
 }
 
+static void aq_pci_shutdown(struct pci_dev *pdev)
+{
+       struct aq_nic_s *self = pci_get_drvdata(pdev);
+
+       aq_nic_shutdown(self);
+
+       pci_disable_device(pdev);
+
+       if (system_state == SYSTEM_POWER_OFF) {
+               pci_wake_from_d3(pdev, false);
+               pci_set_power_state(pdev, PCI_D3hot);
+       }
+}
+
 static int aq_pci_suspend(struct pci_dev *pdev, pm_message_t pm_msg)
 {
        struct aq_nic_s *self = pci_get_drvdata(pdev);
@@ -345,6 +359,7 @@ static struct pci_driver aq_pci_ops = {
        .remove = aq_pci_remove,
        .suspend = aq_pci_suspend,
        .resume = aq_pci_resume,
+       .shutdown = aq_pci_shutdown,
 };
 
 module_pci_driver(aq_pci_ops);
index 0be6a11370bb3e233370c0dd377c8558310ab0f5..b5f1f62e8e253785436fa7cd9119a8467edf4fd4 100644 (file)
@@ -136,11 +136,12 @@ void aq_ring_queue_stop(struct aq_ring_s *ring)
                netif_stop_subqueue(ndev, ring->idx);
 }
 
-void aq_ring_tx_clean(struct aq_ring_s *self)
+bool aq_ring_tx_clean(struct aq_ring_s *self)
 {
        struct device *dev = aq_nic_get_dev(self->aq_nic);
+       unsigned int budget = AQ_CFG_TX_CLEAN_BUDGET;
 
-       for (; self->sw_head != self->hw_head;
+       for (; self->sw_head != self->hw_head && budget--;
                self->sw_head = aq_ring_next_dx(self, self->sw_head)) {
                struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head];
 
@@ -167,6 +168,8 @@ void aq_ring_tx_clean(struct aq_ring_s *self)
                buff->pa = 0U;
                buff->eop_index = 0xffffU;
        }
+
+       return !!budget;
 }
 
 #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
index 965fae0fb6e0ddee8165a91097aeb148cb1308ab..ac1329f4051d7f3681e18f0e886fe9ec58accc88 100644 (file)
@@ -153,7 +153,7 @@ void aq_ring_free(struct aq_ring_s *self);
 void aq_ring_update_queue_state(struct aq_ring_s *ring);
 void aq_ring_queue_wake(struct aq_ring_s *ring);
 void aq_ring_queue_stop(struct aq_ring_s *ring);
-void aq_ring_tx_clean(struct aq_ring_s *self);
+bool aq_ring_tx_clean(struct aq_ring_s *self);
 int aq_ring_rx_clean(struct aq_ring_s *self,
                     struct napi_struct *napi,
                     int *work_done,
index f890b8a5a8623ef20a4c3ca016b4dbe2ad16f475..d335c334fa561ed2ae1a8dad45fcd9af822ee0a7 100644 (file)
@@ -35,12 +35,12 @@ struct aq_vec_s {
 static int aq_vec_poll(struct napi_struct *napi, int budget)
 {
        struct aq_vec_s *self = container_of(napi, struct aq_vec_s, napi);
+       unsigned int sw_tail_old = 0U;
        struct aq_ring_s *ring = NULL;
+       bool was_tx_cleaned = true;
+       unsigned int i = 0U;
        int work_done = 0;
        int err = 0;
-       unsigned int i = 0U;
-       unsigned int sw_tail_old = 0U;
-       bool was_tx_cleaned = false;
 
        if (!self) {
                err = -EINVAL;
@@ -57,9 +57,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
 
                        if (ring[AQ_VEC_TX_ID].sw_head !=
                            ring[AQ_VEC_TX_ID].hw_head) {
-                               aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
+                               was_tx_cleaned = aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
                                aq_ring_update_queue_state(&ring[AQ_VEC_TX_ID]);
-                               was_tx_cleaned = true;
                        }
 
                        err = self->aq_hw_ops->hw_ring_rx_receive(self->aq_hw,
@@ -90,7 +89,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
                        }
                }
 
-               if (was_tx_cleaned)
+               if (!was_tx_cleaned)
                        work_done = budget;
 
                if (work_done < budget) {
index 967f0fd07fcf2d5c377476b6feabba9e9b4f1eac..d3b847ec7465cc6a87f345d53b777662c969140b 100644 (file)
 
 #define HW_ATL_UCP_0X370_REG    0x0370U
 
+#define HW_ATL_MIF_CMD          0x0200U
+#define HW_ATL_MIF_ADDR         0x0208U
+#define HW_ATL_MIF_VAL          0x020CU
+
 #define HW_ATL_FW_SM_RAM        0x2U
 #define HW_ATL_MPI_FW_VERSION  0x18
 #define HW_ATL_MPI_CONTROL_ADR  0x0368U
@@ -79,16 +83,15 @@ int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
 
 static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self)
 {
+       u32 gsr, val;
        int k = 0;
-       u32 gsr;
 
        aq_hw_write_reg(self, 0x404, 0x40e1);
        AQ_HW_SLEEP(50);
 
        /* Cleanup SPI */
-       aq_hw_write_reg(self, 0x534, 0xA0);
-       aq_hw_write_reg(self, 0x100, 0x9F);
-       aq_hw_write_reg(self, 0x100, 0x809F);
+       val = aq_hw_read_reg(self, 0x53C);
+       aq_hw_write_reg(self, 0x53C, val | 0x10);
 
        gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR);
        aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR, (gsr & 0xBFFF) | 0x8000);
@@ -97,7 +100,14 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self)
        aq_hw_write_reg(self, 0x404, 0x80e0);
        aq_hw_write_reg(self, 0x32a8, 0x0);
        aq_hw_write_reg(self, 0x520, 0x1);
+
+       /* Reset SPI again because of possible interrupted SPI burst */
+       val = aq_hw_read_reg(self, 0x53C);
+       aq_hw_write_reg(self, 0x53C, val | 0x10);
        AQ_HW_SLEEP(10);
+       /* Clear SPI reset state */
+       aq_hw_write_reg(self, 0x53C, val & ~0x10);
+
        aq_hw_write_reg(self, 0x404, 0x180e0);
 
        for (k = 0; k < 1000; k++) {
@@ -141,13 +151,15 @@ static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self)
                aq_pr_err("FW kickstart failed\n");
                return -EIO;
        }
+       /* Old FW requires fixed delay after init */
+       AQ_HW_SLEEP(15);
 
        return 0;
 }
 
 static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self)
 {
-       u32 gsr, rbl_status;
+       u32 gsr, val, rbl_status;
        int k;
 
        aq_hw_write_reg(self, 0x404, 0x40e1);
@@ -157,6 +169,10 @@ static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self)
        /* Alter RBL status */
        aq_hw_write_reg(self, 0x388, 0xDEAD);
 
+       /* Cleanup SPI */
+       val = aq_hw_read_reg(self, 0x53C);
+       aq_hw_write_reg(self, 0x53C, val | 0x10);
+
        /* Global software reset*/
        hw_atl_rx_rx_reg_res_dis_set(self, 0U);
        hw_atl_tx_tx_reg_res_dis_set(self, 0U);
@@ -204,6 +220,8 @@ static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self)
                aq_pr_err("FW kickstart failed\n");
                return -EIO;
        }
+       /* Old FW requires fixed delay after init */
+       AQ_HW_SLEEP(15);
 
        return 0;
 }
@@ -255,18 +273,22 @@ int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a,
                }
        }
 
-       aq_hw_write_reg(self, 0x00000208U, a);
-
-       for (++cnt; --cnt;) {
-               u32 i = 0U;
+       aq_hw_write_reg(self, HW_ATL_MIF_ADDR, a);
 
-               aq_hw_write_reg(self, 0x00000200U, 0x00008000U);
+       for (++cnt; --cnt && !err;) {
+               aq_hw_write_reg(self, HW_ATL_MIF_CMD, 0x00008000U);
 
-               for (i = 1024U;
-                       (0x100U & aq_hw_read_reg(self, 0x00000200U)) && --i;) {
-               }
+               if (IS_CHIP_FEATURE(REVISION_B1))
+                       AQ_HW_WAIT_FOR(a != aq_hw_read_reg(self,
+                                                          HW_ATL_MIF_ADDR),
+                                      1, 1000U);
+               else
+                       AQ_HW_WAIT_FOR(!(0x100 & aq_hw_read_reg(self,
+                                                          HW_ATL_MIF_CMD)),
+                                      1, 1000U);
 
-               *(p++) = aq_hw_read_reg(self, 0x0000020CU);
+               *(p++) = aq_hw_read_reg(self, HW_ATL_MIF_VAL);
+               a += 4;
        }
 
        hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
@@ -662,14 +684,18 @@ void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p)
        u32 val = hw_atl_reg_glb_mif_id_get(self);
        u32 mif_rev = val & 0xFFU;
 
-       if ((3U & mif_rev) == 1U) {
-               chip_features |=
-                       HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 |
+       if ((0xFU & mif_rev) == 1U) {
+               chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 |
                        HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
                        HAL_ATLANTIC_UTILS_CHIP_MIPS;
-       } else if ((3U & mif_rev) == 2U) {
-               chip_features |=
-                       HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 |
+       } else if ((0xFU & mif_rev) == 2U) {
+               chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 |
+                       HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
+                       HAL_ATLANTIC_UTILS_CHIP_MIPS |
+                       HAL_ATLANTIC_UTILS_CHIP_TPO2 |
+                       HAL_ATLANTIC_UTILS_CHIP_RPF2;
+       } else if ((0xFU & mif_rev) == 0xAU) {
+               chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B1 |
                        HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
                        HAL_ATLANTIC_UTILS_CHIP_MIPS |
                        HAL_ATLANTIC_UTILS_CHIP_TPO2 |
index 2c690947910a3927f559efd63df20d99b0e8010b..cd8f18f39c611f8f709f71c7a1c23da8332a3fa4 100644 (file)
@@ -161,6 +161,7 @@ struct __packed hw_aq_atl_utils_mbox {
 #define HAL_ATLANTIC_UTILS_CHIP_MPI_AQ       0x00000010U
 #define HAL_ATLANTIC_UTILS_CHIP_REVISION_A0  0x01000000U
 #define HAL_ATLANTIC_UTILS_CHIP_REVISION_B0  0x02000000U
+#define HAL_ATLANTIC_UTILS_CHIP_REVISION_B1  0x04000000U
 
 #define IS_CHIP_FEATURE(_F_) (HAL_ATLANTIC_UTILS_CHIP_##_F_ & \
        self->chip_features)
index 5265b937677bcada0c38e7b41ee3b744299cbece..a445de6837a6c8bff1c250d4702612f4795b2477 100644 (file)
@@ -13,7 +13,7 @@
 #define NIC_MAJOR_DRIVER_VERSION           2
 #define NIC_MINOR_DRIVER_VERSION           0
 #define NIC_BUILD_DRIVER_VERSION           2
-#define NIC_REVISION_DRIVER_VERSION        0
+#define NIC_REVISION_DRIVER_VERSION        1
 
 #define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
 
index 16f9bee992fedfab2069a2324c38fd4a5f142c93..0f65768026072ae7ded390fef283269f180f6e24 100644 (file)
@@ -169,8 +169,10 @@ static int emac_rockchip_probe(struct platform_device *pdev)
        /* Optional regulator for PHY */
        priv->regulator = devm_regulator_get_optional(dev, "phy");
        if (IS_ERR(priv->regulator)) {
-               if (PTR_ERR(priv->regulator) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(priv->regulator) == -EPROBE_DEFER) {
+                       err = -EPROBE_DEFER;
+                       goto out_clk_disable;
+               }
                dev_err(dev, "no regulator found\n");
                priv->regulator = NULL;
        }
index f15a8fc6dfc97419f8e1492dd1717b9d2e562b84..4e26f606a7f227ca46f2139b7b04ca9fd46ffa67 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/net_dim.h>
 #include <linux/etherdevice.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
@@ -574,21 +575,55 @@ static int bcm_sysport_set_wol(struct net_device *dev,
        return 0;
 }
 
+static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv)
+{
+       u32 reg;
+
+       reg = rdma_readl(priv, RDMA_MBDONE_INTR);
+       reg &= ~(RDMA_INTR_THRESH_MASK |
+                RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
+       reg |= priv->dim.coal_pkts;
+       reg |= DIV_ROUND_UP(priv->dim.coal_usecs * 1000, 8192) <<
+                           RDMA_TIMEOUT_SHIFT;
+       rdma_writel(priv, reg, RDMA_MBDONE_INTR);
+}
+
+static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring *ring)
+{
+       struct bcm_sysport_priv *priv = ring->priv;
+       u32 reg;
+
+       reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(ring->index));
+       reg &= ~(RING_INTR_THRESH_MASK |
+                RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
+       reg |= ring->dim.coal_pkts;
+       reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192) <<
+                           RING_TIMEOUT_SHIFT;
+       tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(ring->index));
+}
+
 static int bcm_sysport_get_coalesce(struct net_device *dev,
                                    struct ethtool_coalesce *ec)
 {
        struct bcm_sysport_priv *priv = netdev_priv(dev);
+       struct bcm_sysport_tx_ring *ring;
+       unsigned int i;
        u32 reg;
 
        reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0));
 
        ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 / 1000;
        ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK;
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               ring = &priv->tx_rings[i];
+               ec->use_adaptive_tx_coalesce |= ring->dim.use_dim;
+       }
 
        reg = rdma_readl(priv, RDMA_MBDONE_INTR);
 
        ec->rx_coalesce_usecs = (reg >> RDMA_TIMEOUT_SHIFT) * 8192 / 1000;
        ec->rx_max_coalesced_frames = reg & RDMA_INTR_THRESH_MASK;
+       ec->use_adaptive_rx_coalesce = priv->dim.use_dim;
 
        return 0;
 }
@@ -597,8 +632,8 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
                                    struct ethtool_coalesce *ec)
 {
        struct bcm_sysport_priv *priv = netdev_priv(dev);
+       struct bcm_sysport_tx_ring *ring;
        unsigned int i;
-       u32 reg;
 
        /* Base system clock is 125Mhz, DMA timeout is this reference clock
         * divided by 1024, which yield roughly 8.192 us, our maximum value has
@@ -615,22 +650,26 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
                return -EINVAL;
 
        for (i = 0; i < dev->num_tx_queues; i++) {
-               reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(i));
-               reg &= ~(RING_INTR_THRESH_MASK |
-                        RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
-               reg |= ec->tx_max_coalesced_frames;
-               reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) <<
-                        RING_TIMEOUT_SHIFT;
-               tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(i));
+               ring = &priv->tx_rings[i];
+               ring->dim.coal_pkts = ec->tx_max_coalesced_frames;
+               ring->dim.coal_usecs = ec->tx_coalesce_usecs;
+               if (!ec->use_adaptive_tx_coalesce && ring->dim.use_dim) {
+                       ring->dim.coal_pkts = 1;
+                       ring->dim.coal_usecs = 0;
+               }
+               ring->dim.use_dim = ec->use_adaptive_tx_coalesce;
+               bcm_sysport_set_tx_coalesce(ring);
        }
 
-       reg = rdma_readl(priv, RDMA_MBDONE_INTR);
-       reg &= ~(RDMA_INTR_THRESH_MASK |
-                RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
-       reg |= ec->rx_max_coalesced_frames;
-       reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192) <<
-                           RDMA_TIMEOUT_SHIFT;
-       rdma_writel(priv, reg, RDMA_MBDONE_INTR);
+       priv->dim.coal_usecs = ec->rx_coalesce_usecs;
+       priv->dim.coal_pkts = ec->rx_max_coalesced_frames;
+
+       if (!ec->use_adaptive_rx_coalesce && priv->dim.use_dim) {
+               priv->dim.coal_pkts = 1;
+               priv->dim.coal_usecs = 0;
+       }
+       priv->dim.use_dim = ec->use_adaptive_rx_coalesce;
+       bcm_sysport_set_rx_coalesce(priv);
 
        return 0;
 }
@@ -709,6 +748,7 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
        struct bcm_sysport_stats64 *stats64 = &priv->stats64;
        struct net_device *ndev = priv->netdev;
        unsigned int processed = 0, to_process;
+       unsigned int processed_bytes = 0;
        struct bcm_sysport_cb *cb;
        struct sk_buff *skb;
        unsigned int p_index;
@@ -800,6 +840,7 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
                 */
                skb_pull(skb, sizeof(*rsb) + 2);
                len -= (sizeof(*rsb) + 2);
+               processed_bytes += len;
 
                /* UniMAC may forward CRC */
                if (priv->crc_fwd) {
@@ -824,6 +865,9 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
                        priv->rx_read_ptr = 0;
        }
 
+       priv->dim.packets = processed;
+       priv->dim.bytes = processed_bytes;
+
        return processed;
 }
 
@@ -855,10 +899,12 @@ static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring,
 static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
                                             struct bcm_sysport_tx_ring *ring)
 {
-       unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs;
        unsigned int pkts_compl = 0, bytes_compl = 0;
        struct net_device *ndev = priv->netdev;
+       unsigned int txbds_processed = 0;
        struct bcm_sysport_cb *cb;
+       unsigned int txbds_ready;
+       unsigned int c_index;
        u32 hw_ind;
 
        /* Clear status before servicing to reduce spurious interrupts */
@@ -871,35 +917,31 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
        /* Compute how many descriptors have been processed since last call */
        hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index));
        c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK;
-       ring->p_index = (hw_ind & RING_PROD_INDEX_MASK);
-
-       last_c_index = ring->c_index;
-       num_tx_cbs = ring->size;
-
-       c_index &= (num_tx_cbs - 1);
-
-       if (c_index >= last_c_index)
-               last_tx_cn = c_index - last_c_index;
-       else
-               last_tx_cn = num_tx_cbs - last_c_index + c_index;
+       txbds_ready = (c_index - ring->c_index) & RING_CONS_INDEX_MASK;
 
        netif_dbg(priv, tx_done, ndev,
-                 "ring=%d c_index=%d last_tx_cn=%d last_c_index=%d\n",
-                 ring->index, c_index, last_tx_cn, last_c_index);
+                 "ring=%d old_c_index=%u c_index=%u txbds_ready=%u\n",
+                 ring->index, ring->c_index, c_index, txbds_ready);
 
-       while (last_tx_cn-- > 0) {
-               cb = ring->cbs + last_c_index;
+       while (txbds_processed < txbds_ready) {
+               cb = &ring->cbs[ring->clean_index];
                bcm_sysport_tx_reclaim_one(ring, cb, &bytes_compl, &pkts_compl);
 
                ring->desc_count++;
-               last_c_index++;
-               last_c_index &= (num_tx_cbs - 1);
+               txbds_processed++;
+
+               if (likely(ring->clean_index < ring->size - 1))
+                       ring->clean_index++;
+               else
+                       ring->clean_index = 0;
        }
 
        u64_stats_update_begin(&priv->syncp);
        ring->packets += pkts_compl;
        ring->bytes += bytes_compl;
        u64_stats_update_end(&priv->syncp);
+       ring->dim.packets = pkts_compl;
+       ring->dim.bytes = bytes_compl;
 
        ring->c_index = c_index;
 
@@ -945,6 +987,7 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
 {
        struct bcm_sysport_tx_ring *ring =
                container_of(napi, struct bcm_sysport_tx_ring, napi);
+       struct net_dim_sample dim_sample;
        unsigned int work_done = 0;
 
        work_done = bcm_sysport_tx_reclaim(ring->priv, ring);
@@ -961,6 +1004,12 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
                return 0;
        }
 
+       if (ring->dim.use_dim) {
+               net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
+                              ring->dim.bytes, &dim_sample);
+               net_dim(&ring->dim.dim, dim_sample);
+       }
+
        return budget;
 }
 
@@ -976,6 +1025,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
 {
        struct bcm_sysport_priv *priv =
                container_of(napi, struct bcm_sysport_priv, napi);
+       struct net_dim_sample dim_sample;
        unsigned int work_done = 0;
 
        work_done = bcm_sysport_desc_rx(priv, budget);
@@ -998,6 +1048,12 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
                intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE);
        }
 
+       if (priv->dim.use_dim) {
+               net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
+                              priv->dim.bytes, &dim_sample);
+               net_dim(&priv->dim.dim, dim_sample);
+       }
+
        return work_done;
 }
 
@@ -1016,6 +1072,40 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
        netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
 }
 
+static void bcm_sysport_dim_work(struct work_struct *work)
+{
+       struct net_dim *dim = container_of(work, struct net_dim, work);
+       struct bcm_sysport_net_dim *ndim =
+                       container_of(dim, struct bcm_sysport_net_dim, dim);
+       struct bcm_sysport_priv *priv =
+                       container_of(ndim, struct bcm_sysport_priv, dim);
+       struct net_dim_cq_moder cur_profile =
+                               net_dim_get_profile(dim->mode, dim->profile_ix);
+
+       priv->dim.coal_usecs = cur_profile.usec;
+       priv->dim.coal_pkts = cur_profile.pkts;
+
+       bcm_sysport_set_rx_coalesce(priv);
+       dim->state = NET_DIM_START_MEASURE;
+}
+
+static void bcm_sysport_dim_tx_work(struct work_struct *work)
+{
+       struct net_dim *dim = container_of(work, struct net_dim, work);
+       struct bcm_sysport_net_dim *ndim =
+                       container_of(dim, struct bcm_sysport_net_dim, dim);
+       struct bcm_sysport_tx_ring *ring =
+                       container_of(ndim, struct bcm_sysport_tx_ring, dim);
+       struct net_dim_cq_moder cur_profile =
+                               net_dim_get_profile(dim->mode, dim->profile_ix);
+
+       ring->dim.coal_usecs = cur_profile.usec;
+       ring->dim.coal_pkts = cur_profile.pkts;
+
+       bcm_sysport_set_tx_coalesce(ring);
+       dim->state = NET_DIM_START_MEASURE;
+}
+
 /* RX and misc interrupt routine */
 static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 {
@@ -1034,6 +1124,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
        }
 
        if (priv->irq0_stat & INTRL2_0_RDMA_MBDONE) {
+               priv->dim.event_ctr++;
                if (likely(napi_schedule_prep(&priv->napi))) {
                        /* disable RX interrupts */
                        intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE);
@@ -1061,6 +1152,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
                        continue;
 
                txr = &priv->tx_rings[ring];
+               txr->dim.event_ctr++;
 
                if (likely(napi_schedule_prep(&txr->napi))) {
                        intrl2_0_mask_set(priv, ring_bit);
@@ -1093,6 +1185,7 @@ static irqreturn_t bcm_sysport_tx_isr(int irq, void *dev_id)
                        continue;
 
                txr = &priv->tx_rings[ring];
+               txr->dim.event_ctr++;
 
                if (likely(napi_schedule_prep(&txr->napi))) {
                        intrl2_1_mask_set(priv, BIT(ring));
@@ -1358,6 +1451,16 @@ static void bcm_sysport_adj_link(struct net_device *dev)
                phy_print_status(phydev);
 }
 
+static void bcm_sysport_init_dim(struct bcm_sysport_net_dim *dim,
+                                void (*cb)(struct work_struct *work))
+{
+       INIT_WORK(&dim->dim.work, cb);
+       dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+       dim->event_ctr = 0;
+       dim->packets = 0;
+       dim->bytes = 0;
+}
+
 static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
                                    unsigned int index)
 {
@@ -1394,6 +1497,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
        netif_tx_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64);
        ring->index = index;
        ring->size = size;
+       ring->clean_index = 0;
        ring->alloc_size = ring->size;
        ring->desc_cpu = p;
        ring->desc_count = ring->size;
@@ -1447,6 +1551,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
        reg |= (1 << index);
        tdma_writel(priv, reg, TDMA_TIER1_ARB_0_QUEUE_EN);
 
+       bcm_sysport_init_dim(&ring->dim, bcm_sysport_dim_tx_work);
        napi_enable(&ring->napi);
 
        netif_dbg(priv, hw, priv->netdev,
@@ -1477,6 +1582,7 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv,
                return;
 
        napi_disable(&ring->napi);
+       cancel_work_sync(&ring->dim.dim.work);
        netif_napi_del(&ring->napi);
 
        bcm_sysport_tx_clean(priv, ring);
@@ -1766,6 +1872,7 @@ static void bcm_sysport_netif_start(struct net_device *dev)
        struct bcm_sysport_priv *priv = netdev_priv(dev);
 
        /* Enable NAPI */
+       bcm_sysport_init_dim(&priv->dim, bcm_sysport_dim_work);
        napi_enable(&priv->napi);
 
        /* Enable RX interrupt and TX ring full interrupt */
@@ -1951,6 +2058,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev)
        /* stop all software from updating hardware */
        netif_tx_stop_all_queues(dev);
        napi_disable(&priv->napi);
+       cancel_work_sync(&priv->dim.dim.work);
        phy_stop(dev->phydev);
 
        /* mask all interrupts */
index f5a984c1c986535f3421bafd9c851ec995ccf3b0..e1c97d4a82b4b8adb23bc7a9817b33cb3c671a45 100644 (file)
@@ -12,6 +12,7 @@
 #define __BCM_SYSPORT_H
 
 #include <linux/if_vlan.h>
+#include <linux/net_dim.h>
 
 /* Receive/transmit descriptor format */
 #define DESC_ADDR_HI_STATUS_LEN        0x00
@@ -695,6 +696,16 @@ struct bcm_sysport_hw_params {
        unsigned int    num_rx_desc_words;
 };
 
+struct bcm_sysport_net_dim {
+       u16                     use_dim;
+       u16                     event_ctr;
+       unsigned long           packets;
+       unsigned long           bytes;
+       u32                     coal_usecs;
+       u32                     coal_pkts;
+       struct net_dim          dim;
+};
+
 /* Software view of the TX ring */
 struct bcm_sysport_tx_ring {
        spinlock_t      lock;           /* Ring lock for tx reclaim/xmit */
@@ -706,12 +717,13 @@ struct bcm_sysport_tx_ring {
        unsigned int    desc_count;     /* Number of descriptors */
        unsigned int    curr_desc;      /* Current descriptor */
        unsigned int    c_index;        /* Last consumer index */
-       unsigned int    p_index;        /* Current producer index */
+       unsigned int    clean_index;    /* Current clean index */
        struct bcm_sysport_cb *cbs;     /* Transmit control blocks */
        struct dma_desc *desc_cpu;      /* CPU view of the descriptor */
        struct bcm_sysport_priv *priv;  /* private context backpointer */
        unsigned long   packets;        /* packets statistics */
        unsigned long   bytes;          /* bytes statistics */
+       struct bcm_sysport_net_dim dim; /* Net DIM context */
        unsigned int    switch_queue;   /* switch port queue number */
        unsigned int    switch_port;    /* switch port queue number */
        bool            inspect;        /* inspect switch port and queue */
@@ -743,6 +755,8 @@ struct bcm_sysport_priv {
        unsigned int            rx_read_ptr;
        unsigned int            rx_c_index;
 
+       struct bcm_sysport_net_dim      dim;
+
        /* PHY device */
        struct device_node      *phy_dn;
        phy_interface_t         phy_interface;
index 74fc9af4aadb4358a53858fa93e6b185637a618e..b8388e93520a1a45b6d20b7369f0e6c109c22f36 100644 (file)
@@ -13913,7 +13913,7 @@ static void bnx2x_register_phc(struct bnx2x *bp)
        bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &bp->pdev->dev);
        if (IS_ERR(bp->ptp_clock)) {
                bp->ptp_clock = NULL;
-               BNX2X_ERR("PTP clock registeration failed\n");
+               BNX2X_ERR("PTP clock registration failed\n");
        }
 }
 
index 1500243b988650625c5deeaf5ac9759e2670b514..c7e5e6f09647d5d798e50db2beb1ee7b1e28f277 100644 (file)
@@ -1439,7 +1439,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
            (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
                u16 vlan_proto = tpa_info->metadata >>
                        RX_CMP_FLAGS2_METADATA_TPID_SFT;
-               u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_VID_MASK;
+               u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK;
 
                __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
        }
@@ -1623,7 +1623,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
             cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) &&
            (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
                u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data);
-               u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_VID_MASK;
+               u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK;
                u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT;
 
                __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
@@ -3847,6 +3847,9 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
        struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
        struct hwrm_vnic_tpa_cfg_input req = {0};
 
+       if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
+               return 0;
+
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_TPA_CFG, -1, -1);
 
        if (tpa_flags) {
@@ -4558,18 +4561,17 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
        return rc;
 }
 
-static int
-bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-                          int ring_grps, int cp_rings, int vnics)
+static void
+__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
+                            int tx_rings, int rx_rings, int ring_grps,
+                            int cp_rings, int vnics)
 {
-       struct hwrm_func_cfg_input req = {0};
        u32 enables = 0;
-       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
+       bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_CFG, -1, -1);
+       req->fid = cpu_to_le16(0xffff);
        enables |= tx_rings ? FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
-       req.num_tx_rings = cpu_to_le16(tx_rings);
+       req->num_tx_rings = cpu_to_le16(tx_rings);
        if (bp->flags & BNXT_FLAG_NEW_RM) {
                enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
                enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
@@ -4578,16 +4580,53 @@ bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
                           FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
                enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
 
-               req.num_rx_rings = cpu_to_le16(rx_rings);
-               req.num_hw_ring_grps = cpu_to_le16(ring_grps);
-               req.num_cmpl_rings = cpu_to_le16(cp_rings);
-               req.num_stat_ctxs = req.num_cmpl_rings;
-               req.num_vnics = cpu_to_le16(vnics);
+               req->num_rx_rings = cpu_to_le16(rx_rings);
+               req->num_hw_ring_grps = cpu_to_le16(ring_grps);
+               req->num_cmpl_rings = cpu_to_le16(cp_rings);
+               req->num_stat_ctxs = req->num_cmpl_rings;
+               req->num_vnics = cpu_to_le16(vnics);
        }
-       if (!enables)
+       req->enables = cpu_to_le32(enables);
+}
+
+static void
+__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
+                            struct hwrm_func_vf_cfg_input *req, int tx_rings,
+                            int rx_rings, int ring_grps, int cp_rings,
+                            int vnics)
+{
+       u32 enables = 0;
+
+       bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_VF_CFG, -1, -1);
+       enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
+       enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
+       enables |= cp_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
+                             FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+       enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
+       enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
+
+       req->num_tx_rings = cpu_to_le16(tx_rings);
+       req->num_rx_rings = cpu_to_le16(rx_rings);
+       req->num_hw_ring_grps = cpu_to_le16(ring_grps);
+       req->num_cmpl_rings = cpu_to_le16(cp_rings);
+       req->num_stat_ctxs = req->num_cmpl_rings;
+       req->num_vnics = cpu_to_le16(vnics);
+
+       req->enables = cpu_to_le32(enables);
+}
+
+static int
+bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
+                          int ring_grps, int cp_rings, int vnics)
+{
+       struct hwrm_func_cfg_input req = {0};
+       int rc;
+
+       __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
+                                    cp_rings, vnics);
+       if (!req.enables)
                return 0;
 
-       req.enables = cpu_to_le32(enables);
        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (rc)
                return -ENOMEM;
@@ -4604,7 +4643,6 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
                           int ring_grps, int cp_rings, int vnics)
 {
        struct hwrm_func_vf_cfg_input req = {0};
-       u32 enables = 0;
        int rc;
 
        if (!(bp->flags & BNXT_FLAG_NEW_RM)) {
@@ -4612,22 +4650,8 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
                return 0;
        }
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
-       enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
-       enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
-       enables |= cp_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-                             FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
-       enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
-       enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
-
-       req.num_tx_rings = cpu_to_le16(tx_rings);
-       req.num_rx_rings = cpu_to_le16(rx_rings);
-       req.num_hw_ring_grps = cpu_to_le16(ring_grps);
-       req.num_cmpl_rings = cpu_to_le16(cp_rings);
-       req.num_stat_ctxs = req.num_cmpl_rings;
-       req.num_vnics = cpu_to_le16(vnics);
-
-       req.enables = cpu_to_le32(enables);
+       __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
+                                    cp_rings, vnics);
        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (rc)
                return -ENOMEM;
@@ -4743,39 +4767,25 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
 }
 
 static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-                                   int ring_grps, int cp_rings)
+                                   int ring_grps, int cp_rings, int vnics)
 {
        struct hwrm_func_vf_cfg_input req = {0};
-       u32 flags, enables;
+       u32 flags;
        int rc;
 
        if (!(bp->flags & BNXT_FLAG_NEW_RM))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
+       __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
+                                    cp_rings, vnics);
        flags = FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST |
                FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST |
                FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
                FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST |
                FUNC_VF_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
                FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST;
-       enables = FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS |
-                 FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
-                 FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-                 FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS |
-                 FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS |
-                 FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS;
 
        req.flags = cpu_to_le32(flags);
-       req.enables = cpu_to_le32(enables);
-       req.num_tx_rings = cpu_to_le16(tx_rings);
-       req.num_rx_rings = cpu_to_le16(rx_rings);
-       req.num_cmpl_rings = cpu_to_le16(cp_rings);
-       req.num_hw_ring_grps = cpu_to_le16(ring_grps);
-       req.num_stat_ctxs = cpu_to_le16(cp_rings);
-       req.num_vnics = cpu_to_le16(1);
-       if (bp->flags & BNXT_FLAG_RFS)
-               req.num_vnics = cpu_to_le16(rx_rings + 1);
        rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (rc)
                return -ENOMEM;
@@ -4783,38 +4793,23 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 }
 
 static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-                                   int ring_grps, int cp_rings)
+                                   int ring_grps, int cp_rings, int vnics)
 {
        struct hwrm_func_cfg_input req = {0};
-       u32 flags, enables;
+       u32 flags;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
+       __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
+                                    cp_rings, vnics);
        flags = FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST;
-       enables = FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS;
-       req.num_tx_rings = cpu_to_le16(tx_rings);
-       if (bp->flags & BNXT_FLAG_NEW_RM) {
+       if (bp->flags & BNXT_FLAG_NEW_RM)
                flags |= FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST |
                         FUNC_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
                         FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST |
                         FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
                         FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST;
-               enables |= FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS |
-                          FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-                          FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS |
-                          FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS |
-                          FUNC_CFG_REQ_ENABLES_NUM_VNICS;
-               req.num_rx_rings = cpu_to_le16(rx_rings);
-               req.num_cmpl_rings = cpu_to_le16(cp_rings);
-               req.num_hw_ring_grps = cpu_to_le16(ring_grps);
-               req.num_stat_ctxs = cpu_to_le16(cp_rings);
-               req.num_vnics = cpu_to_le16(1);
-               if (bp->flags & BNXT_FLAG_RFS)
-                       req.num_vnics = cpu_to_le16(rx_rings + 1);
-       }
+
        req.flags = cpu_to_le32(flags);
-       req.enables = cpu_to_le32(enables);
        rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (rc)
                return -ENOMEM;
@@ -4822,17 +4817,17 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 }
 
 static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings,
-                                int ring_grps, int cp_rings)
+                                int ring_grps, int cp_rings, int vnics)
 {
        if (bp->hwrm_spec_code < 0x10801)
                return 0;
 
        if (BNXT_PF(bp))
                return bnxt_hwrm_check_pf_rings(bp, tx_rings, rx_rings,
-                                               ring_grps, cp_rings);
+                                               ring_grps, cp_rings, vnics);
 
        return bnxt_hwrm_check_vf_rings(bp, tx_rings, rx_rings, ring_grps,
-                                       cp_rings);
+                                       cp_rings, vnics);
 }
 
 static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal,
@@ -5865,7 +5860,6 @@ static int bnxt_init_msix(struct bnxt *bp)
                if (rc)
                        goto msix_setup_exit;
 
-               bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
                bp->cp_nr_rings = (min == 1) ?
                                  max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
                                  bp->tx_nr_rings + bp->rx_nr_rings;
@@ -5897,7 +5891,6 @@ static int bnxt_init_inta(struct bnxt *bp)
        bp->rx_nr_rings = 1;
        bp->tx_nr_rings = 1;
        bp->cp_nr_rings = 1;
-       bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
        bp->flags |= BNXT_FLAG_SHARED_RINGS;
        bp->irq_tbl[0].vector = bp->pdev->irq;
        return 0;
@@ -7531,7 +7524,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
        int max_rx, max_tx, tx_sets = 1;
        int tx_rings_needed;
        int rx_rings = rx;
-       int cp, rc;
+       int cp, vnics, rc;
 
        if (tcs)
                tx_sets = tcs;
@@ -7547,10 +7540,15 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
        if (max_tx < tx_rings_needed)
                return -ENOMEM;
 
+       vnics = 1;
+       if (bp->flags & BNXT_FLAG_RFS)
+               vnics += rx_rings;
+
        if (bp->flags & BNXT_FLAG_AGG_RINGS)
                rx_rings <<= 1;
        cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx;
-       return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp);
+       return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp,
+                                    vnics);
 }
 
 static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev)
@@ -8437,13 +8435,20 @@ int bnxt_restore_pf_fw_resources(struct bnxt *bp)
                return 0;
 
        bnxt_hwrm_func_qcaps(bp);
-       __bnxt_close_nic(bp, true, false);
+
+       if (netif_running(bp->dev))
+               __bnxt_close_nic(bp, true, false);
+
        bnxt_clear_int_mode(bp);
        rc = bnxt_init_int_mode(bp);
-       if (rc)
-               dev_close(bp->dev);
-       else
-               rc = bnxt_open_nic(bp, true, false);
+
+       if (netif_running(bp->dev)) {
+               if (rc)
+                       dev_close(bp->dev);
+               else
+                       rc = bnxt_open_nic(bp, true, false);
+       }
+
        return rc;
 }
 
@@ -8664,6 +8669,11 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (rc)
                goto init_err_pci_clean;
 
+       /* No TC has been set yet and rings may have been trimmed due to
+        * limited MSIX, so we re-initialize the TX rings per TC.
+        */
+       bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+
        bnxt_get_wol_settings(bp);
        if (bp->flags & BNXT_FLAG_WOL_CAP)
                device_set_wakeup_enable(&pdev->dev, bp->wol);
index 1989c470172cba7ac56e8c030f847cd0c5a32531..5e3d62189cab8e05b5c65f2cae0e11fa6e3cf459 100644 (file)
@@ -189,6 +189,7 @@ struct rx_cmp_ext {
        #define RX_CMP_FLAGS2_T_L4_CS_CALC                      (0x1 << 3)
        #define RX_CMP_FLAGS2_META_FORMAT_VLAN                  (0x1 << 4)
        __le32 rx_cmp_meta_data;
+       #define RX_CMP_FLAGS2_METADATA_TCI_MASK                 0xffff
        #define RX_CMP_FLAGS2_METADATA_VID_MASK                 0xfff
        #define RX_CMP_FLAGS2_METADATA_TPID_MASK                0xffff0000
         #define RX_CMP_FLAGS2_METADATA_TPID_SFT                 16
index fbe6e208e17b9ad190d1ee017c68075d0af70bb9..65c2cee357669a7a7b5784b3c7b9a3805095304f 100644 (file)
@@ -349,6 +349,9 @@ static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, __le16 flow_handle)
        if (rc)
                netdev_info(bp->dev, "Error: %s: flow_handle=0x%x rc=%d",
                            __func__, flow_handle, rc);
+
+       if (rc)
+               rc = -EIO;
        return rc;
 }
 
@@ -484,13 +487,15 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
        req.action_flags = cpu_to_le16(action_flags);
 
        mutex_lock(&bp->hwrm_cmd_lock);
-
        rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (!rc)
                *flow_handle = resp->flow_handle;
-
        mutex_unlock(&bp->hwrm_cmd_lock);
 
+       if (rc == HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR)
+               rc = -ENOSPC;
+       else if (rc)
+               rc = -EIO;
        return rc;
 }
 
@@ -561,6 +566,8 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
                netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
        mutex_unlock(&bp->hwrm_cmd_lock);
 
+       if (rc)
+               rc = -EIO;
        return rc;
 }
 
@@ -576,6 +583,9 @@ static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (rc)
                netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+
+       if (rc)
+               rc = -EIO;
        return rc;
 }
 
@@ -624,6 +634,8 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
                netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
        mutex_unlock(&bp->hwrm_cmd_lock);
 
+       if (rc)
+               rc = -EIO;
        return rc;
 }
 
@@ -639,6 +651,9 @@ static int hwrm_cfa_encap_record_free(struct bnxt *bp,
        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (rc)
                netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+
+       if (rc)
+               rc = -EIO;
        return rc;
 }
 
@@ -1269,11 +1284,8 @@ static int bnxt_tc_del_flow(struct bnxt *bp,
        flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
                                           &tc_flow_cmd->cookie,
                                           tc_info->flow_ht_params);
-       if (!flow_node) {
-               netdev_info(bp->dev, "ERROR: no flow_node for cookie %lx",
-                           tc_flow_cmd->cookie);
+       if (!flow_node)
                return -EINVAL;
-       }
 
        return __bnxt_tc_del_flow(bp, flow_node);
 }
@@ -1290,11 +1302,8 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp,
        flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
                                           &tc_flow_cmd->cookie,
                                           tc_info->flow_ht_params);
-       if (!flow_node) {
-               netdev_info(bp->dev, "Error: no flow_node for cookie %lx",
-                           tc_flow_cmd->cookie);
+       if (!flow_node)
                return -1;
-       }
 
        flow = &flow_node->flow;
        curr_stats = &flow->stats;
@@ -1344,8 +1353,10 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
        } else {
                netdev_info(bp->dev, "error rc=%d", rc);
        }
-
        mutex_unlock(&bp->hwrm_cmd_lock);
+
+       if (rc)
+               rc = -EIO;
        return rc;
 }
 
index b1e35a9accf1e2727a8749ba00801e58608181db..7db8edc643ec42e7909522e3180f09968b70402f 100644 (file)
@@ -603,6 +603,8 @@ static int bcmgenet_get_coalesce(struct net_device *dev,
                                 struct ethtool_coalesce *ec)
 {
        struct bcmgenet_priv *priv = netdev_priv(dev);
+       struct bcmgenet_rx_ring *ring;
+       unsigned int i;
 
        ec->tx_max_coalesced_frames =
                bcmgenet_tdma_ring_readl(priv, DESC_INDEX,
@@ -613,15 +615,37 @@ static int bcmgenet_get_coalesce(struct net_device *dev,
        ec->rx_coalesce_usecs =
                bcmgenet_rdma_readl(priv, DMA_RING16_TIMEOUT) * 8192 / 1000;
 
+       for (i = 0; i < priv->hw_params->rx_queues; i++) {
+               ring = &priv->rx_rings[i];
+               ec->use_adaptive_rx_coalesce |= ring->dim.use_dim;
+       }
+       ring = &priv->rx_rings[DESC_INDEX];
+       ec->use_adaptive_rx_coalesce |= ring->dim.use_dim;
+
        return 0;
 }
 
+static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring)
+{
+       struct bcmgenet_priv *priv = ring->priv;
+       unsigned int i = ring->index;
+       u32 reg;
+
+       bcmgenet_rdma_ring_writel(priv, i, ring->dim.coal_pkts,
+                                 DMA_MBUF_DONE_THRESH);
+
+       reg = bcmgenet_rdma_readl(priv, DMA_RING0_TIMEOUT + i);
+       reg &= ~DMA_TIMEOUT_MASK;
+       reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192);
+       bcmgenet_rdma_writel(priv, reg, DMA_RING0_TIMEOUT + i);
+}
+
 static int bcmgenet_set_coalesce(struct net_device *dev,
                                 struct ethtool_coalesce *ec)
 {
        struct bcmgenet_priv *priv = netdev_priv(dev);
+       struct bcmgenet_rx_ring *ring;
        unsigned int i;
-       u32 reg;
 
        /* Base system clock is 125Mhz, DMA timeout is this reference clock
         * divided by 1024, which yields roughly 8.192us, our maximum value
@@ -641,7 +665,8 @@ static int bcmgenet_set_coalesce(struct net_device *dev,
         * transmitted, or when the ring is empty.
         */
        if (ec->tx_coalesce_usecs || ec->tx_coalesce_usecs_high ||
-           ec->tx_coalesce_usecs_irq || ec->tx_coalesce_usecs_low)
+           ec->tx_coalesce_usecs_irq || ec->tx_coalesce_usecs_low ||
+           ec->use_adaptive_tx_coalesce)
                return -EOPNOTSUPP;
 
        /* Program all TX queues with the same values, as there is no
@@ -656,24 +681,26 @@ static int bcmgenet_set_coalesce(struct net_device *dev,
                                  DMA_MBUF_DONE_THRESH);
 
        for (i = 0; i < priv->hw_params->rx_queues; i++) {
-               bcmgenet_rdma_ring_writel(priv, i,
-                                         ec->rx_max_coalesced_frames,
-                                         DMA_MBUF_DONE_THRESH);
-
-               reg = bcmgenet_rdma_readl(priv, DMA_RING0_TIMEOUT + i);
-               reg &= ~DMA_TIMEOUT_MASK;
-               reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192);
-               bcmgenet_rdma_writel(priv, reg, DMA_RING0_TIMEOUT + i);
+               ring = &priv->rx_rings[i];
+               ring->dim.coal_usecs = ec->rx_coalesce_usecs;
+               ring->dim.coal_pkts = ec->rx_max_coalesced_frames;
+               if (!ec->use_adaptive_rx_coalesce && ring->dim.use_dim) {
+                       ring->dim.coal_pkts = 1;
+                       ring->dim.coal_usecs = 0;
+               }
+               ring->dim.use_dim = ec->use_adaptive_rx_coalesce;
+               bcmgenet_set_rx_coalesce(ring);
        }
 
-       bcmgenet_rdma_ring_writel(priv, DESC_INDEX,
-                                 ec->rx_max_coalesced_frames,
-                                 DMA_MBUF_DONE_THRESH);
-
-       reg = bcmgenet_rdma_readl(priv, DMA_RING16_TIMEOUT);
-       reg &= ~DMA_TIMEOUT_MASK;
-       reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192);
-       bcmgenet_rdma_writel(priv, reg, DMA_RING16_TIMEOUT);
+       ring = &priv->rx_rings[DESC_INDEX];
+       ring->dim.coal_usecs = ec->rx_coalesce_usecs;
+       ring->dim.coal_pkts = ec->rx_max_coalesced_frames;
+       if (!ec->use_adaptive_rx_coalesce && ring->dim.use_dim) {
+               ring->dim.coal_pkts = 1;
+               ring->dim.coal_usecs = 0;
+       }
+       ring->dim.use_dim = ec->use_adaptive_rx_coalesce;
+       bcmgenet_set_rx_coalesce(ring);
 
        return 0;
 }
@@ -1713,6 +1740,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
        unsigned long dma_flag;
        int len;
        unsigned int rxpktprocessed = 0, rxpkttoprocess;
+       unsigned int bytes_processed = 0;
        unsigned int p_index, mask;
        unsigned int discards;
        unsigned int chksum_ok = 0;
@@ -1832,6 +1860,8 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
                        len -= ETH_FCS_LEN;
                }
 
+               bytes_processed += len;
+
                /*Finish setting up the received SKB and send it to the kernel*/
                skb->protocol = eth_type_trans(skb, priv->dev);
                ring->packets++;
@@ -1854,6 +1884,9 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
                bcmgenet_rdma_ring_writel(priv, ring->index, ring->c_index, RDMA_CONS_INDEX);
        }
 
+       ring->dim.bytes = bytes_processed;
+       ring->dim.packets = rxpktprocessed;
+
        return rxpktprocessed;
 }
 
@@ -1862,6 +1895,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
 {
        struct bcmgenet_rx_ring *ring = container_of(napi,
                        struct bcmgenet_rx_ring, napi);
+       struct net_dim_sample dim_sample;
        unsigned int work_done;
 
        work_done = bcmgenet_desc_rx(ring, budget);
@@ -1871,9 +1905,32 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
                ring->int_enable(ring);
        }
 
+       if (ring->dim.use_dim) {
+               net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
+                              ring->dim.bytes, &dim_sample);
+               net_dim(&ring->dim.dim, dim_sample);
+       }
+
        return work_done;
 }
 
+static void bcmgenet_dim_work(struct work_struct *work)
+{
+       struct net_dim *dim = container_of(work, struct net_dim, work);
+       struct bcmgenet_net_dim *ndim =
+                       container_of(dim, struct bcmgenet_net_dim, dim);
+       struct bcmgenet_rx_ring *ring =
+                       container_of(ndim, struct bcmgenet_rx_ring, dim);
+       struct net_dim_cq_moder cur_profile =
+                       net_dim_get_profile(dim->mode, dim->profile_ix);
+
+       ring->dim.coal_usecs = cur_profile.usec;
+       ring->dim.coal_pkts = cur_profile.pkts;
+
+       bcmgenet_set_rx_coalesce(ring);
+       dim->state = NET_DIM_START_MEASURE;
+}
+
 /* Assign skb to RX DMA descriptor. */
 static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
                                     struct bcmgenet_rx_ring *ring)
@@ -2022,6 +2079,16 @@ static void init_umac(struct bcmgenet_priv *priv)
        dev_dbg(kdev, "done init umac\n");
 }
 
+static void bcmgenet_init_dim(struct bcmgenet_net_dim *dim,
+                             void (*cb)(struct work_struct *work))
+{
+       INIT_WORK(&dim->dim.work, cb);
+       dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+       dim->event_ctr = 0;
+       dim->packets = 0;
+       dim->bytes = 0;
+}
+
 /* Initialize a Tx ring along with corresponding hardware registers */
 static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
                                  unsigned int index, unsigned int size,
@@ -2111,6 +2178,8 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
        if (ret)
                return ret;
 
+       bcmgenet_init_dim(&ring->dim, bcmgenet_dim_work);
+
        /* Initialize Rx NAPI */
        netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll,
                       NAPI_POLL_WEIGHT);
@@ -2276,10 +2345,12 @@ static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv)
        for (i = 0; i < priv->hw_params->rx_queues; ++i) {
                ring = &priv->rx_rings[i];
                napi_disable(&ring->napi);
+               cancel_work_sync(&ring->dim.dim.work);
        }
 
        ring = &priv->rx_rings[DESC_INDEX];
        napi_disable(&ring->napi);
+       cancel_work_sync(&ring->dim.dim.work);
 }
 
 static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv)
@@ -2557,6 +2628,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
                        continue;
 
                rx_ring = &priv->rx_rings[index];
+               rx_ring->dim.event_ctr++;
 
                if (likely(napi_schedule_prep(&rx_ring->napi))) {
                        rx_ring->int_disable(rx_ring);
@@ -2601,6 +2673,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 
        if (status & UMAC_IRQ_RXDMA_DONE) {
                rx_ring = &priv->rx_rings[DESC_INDEX];
+               rx_ring->dim.event_ctr++;
 
                if (likely(napi_schedule_prep(&rx_ring->napi))) {
                        rx_ring->int_disable(rx_ring);
index 3c50431ccd2a8526d50af6732c61fc79da08b5c8..22c41e0430fb4b22b4b0d35b82dd1b5fbf0bcca0 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/mii.h>
 #include <linux/if_vlan.h>
 #include <linux/phy.h>
+#include <linux/net_dim.h>
 
 /* total number of Buffer Descriptors, same for Rx/Tx */
 #define TOTAL_DESC                             256
@@ -572,6 +573,16 @@ struct bcmgenet_tx_ring {
        struct bcmgenet_priv *priv;
 };
 
+struct bcmgenet_net_dim {
+       u16             use_dim;
+       u16             event_ctr;
+       unsigned long   packets;
+       unsigned long   bytes;
+       u32             coal_usecs;
+       u32             coal_pkts;
+       struct net_dim  dim;
+};
+
 struct bcmgenet_rx_ring {
        struct napi_struct napi;        /* Rx NAPI struct */
        unsigned long   bytes;
@@ -586,6 +597,7 @@ struct bcmgenet_rx_ring {
        unsigned int    cb_ptr;         /* Rx ring initial CB ptr */
        unsigned int    end_ptr;        /* Rx ring end CB ptr */
        unsigned int    old_discards;
+       struct bcmgenet_net_dim dim;
        void (*int_enable)(struct bcmgenet_rx_ring *);
        void (*int_disable)(struct bcmgenet_rx_ring *);
        struct bcmgenet_priv *priv;
index c1841db1b500fa49f823c79e56cb3bc05f3f9199..f2593978ae75fb195f462a957d443eeddbbddb46 100644 (file)
@@ -820,7 +820,7 @@ static int tg3_ape_event_lock(struct tg3 *tp, u32 timeout_us)
 
                tg3_ape_unlock(tp, TG3_APE_LOCK_MEM);
 
-               usleep_range(10, 20);
+               udelay(10);
                timeout_us -= (timeout_us > 10) ? 10 : timeout_us;
        }
 
index e84afcf1ecb508a771cfe3f418e38f7340c5c1d3..d09bd43680b35df8c1d380d35610952dcccc45f0 100644 (file)
@@ -472,8 +472,44 @@ static int macb_mii_probe(struct net_device *dev)
        struct macb *bp = netdev_priv(dev);
        struct macb_platform_data *pdata;
        struct phy_device *phydev;
-       int phy_irq;
-       int ret;
+       struct device_node *np;
+       int phy_irq, ret, i;
+
+       pdata = dev_get_platdata(&bp->pdev->dev);
+       np = bp->pdev->dev.of_node;
+       ret = 0;
+
+       if (np) {
+               if (of_phy_is_fixed_link(np)) {
+                       if (of_phy_register_fixed_link(np) < 0) {
+                               dev_err(&bp->pdev->dev,
+                                       "broken fixed-link specification\n");
+                               return -ENODEV;
+                       }
+                       bp->phy_node = of_node_get(np);
+               } else {
+                       bp->phy_node = of_parse_phandle(np, "phy-handle", 0);
+                       /* fallback to standard phy registration if no
+                        * phy-handle was found nor any phy found during
+                        * dt phy registration
+                        */
+                       if (!bp->phy_node && !phy_find_first(bp->mii_bus)) {
+                               for (i = 0; i < PHY_MAX_ADDR; i++) {
+                                       struct phy_device *phydev;
+
+                                       phydev = mdiobus_scan(bp->mii_bus, i);
+                                       if (IS_ERR(phydev) &&
+                                           PTR_ERR(phydev) != -ENODEV) {
+                                               ret = PTR_ERR(phydev);
+                                               break;
+                                       }
+                               }
+
+                               if (ret)
+                                       return -ENODEV;
+                       }
+               }
+       }
 
        if (bp->phy_node) {
                phydev = of_phy_connect(dev, bp->phy_node,
@@ -488,7 +524,6 @@ static int macb_mii_probe(struct net_device *dev)
                        return -ENXIO;
                }
 
-               pdata = dev_get_platdata(&bp->pdev->dev);
                if (pdata) {
                        if (gpio_is_valid(pdata->phy_irq_pin)) {
                                ret = devm_gpio_request(&bp->pdev->dev,
@@ -533,7 +568,7 @@ static int macb_mii_init(struct macb *bp)
 {
        struct macb_platform_data *pdata;
        struct device_node *np;
-       int err = -ENXIO, i;
+       int err;
 
        /* Enable management port */
        macb_writel(bp, NCR, MACB_BIT(MPE));
@@ -556,43 +591,10 @@ static int macb_mii_init(struct macb *bp)
        dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
 
        np = bp->pdev->dev.of_node;
-       if (np) {
-               if (of_phy_is_fixed_link(np)) {
-                       if (of_phy_register_fixed_link(np) < 0) {
-                               dev_err(&bp->pdev->dev,
-                                       "broken fixed-link specification\n");
-                               goto err_out_unregister_bus;
-                       }
-                       bp->phy_node = of_node_get(np);
-
-                       err = mdiobus_register(bp->mii_bus);
-               } else {
-                       /* try dt phy registration */
-                       err = of_mdiobus_register(bp->mii_bus, np);
-
-                       /* fallback to standard phy registration if no phy were
-                        * found during dt phy registration
-                        */
-                       if (!err && !phy_find_first(bp->mii_bus)) {
-                               for (i = 0; i < PHY_MAX_ADDR; i++) {
-                                       struct phy_device *phydev;
-
-                                       phydev = mdiobus_scan(bp->mii_bus, i);
-                                       if (IS_ERR(phydev) &&
-                                           PTR_ERR(phydev) != -ENODEV) {
-                                               err = PTR_ERR(phydev);
-                                               break;
-                                       }
-                               }
 
-                               if (err)
-                                       goto err_out_unregister_bus;
-                       }
-               }
+       if (np) {
+               err = of_mdiobus_register(bp->mii_bus, np);
        } else {
-               for (i = 0; i < PHY_MAX_ADDR; i++)
-                       bp->mii_bus->irq[i] = PHY_POLL;
-
                if (pdata)
                        bp->mii_bus->phy_mask = pdata->phy_mask;
 
@@ -610,10 +612,10 @@ static int macb_mii_init(struct macb *bp)
 
 err_out_unregister_bus:
        mdiobus_unregister(bp->mii_bus);
-err_out_free_mdiobus:
-       of_node_put(bp->phy_node);
        if (np && of_phy_is_fixed_link(np))
                of_phy_deregister_fixed_link(np);
+err_out_free_mdiobus:
+       of_node_put(bp->phy_node);
        mdiobus_free(bp->mii_bus);
 err_out:
        return err;
index 32ae63b6f20e1eb3c94291c63b7f33d83a044a94..73e70e076e61da048bcded35cddc1c93cc8c833c 100644 (file)
@@ -164,15 +164,6 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
                }
                break;
 
-       case OCTNET_CMD_CHANGE_MTU:
-               /* If command is successful, change the MTU. */
-               netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
-                          netdev->mtu, nctrl->ncmd.s.param1);
-               netdev->mtu = nctrl->ncmd.s.param1;
-               queue_delayed_work(lio->link_status_wq.wq,
-                                  &lio->link_status_wq.wk.work, 0);
-               break;
-
        case OCTNET_CMD_GPIO_ACCESS:
                netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
 
@@ -386,20 +377,12 @@ static void lio_update_txq_status(struct octeon_device *oct, int iq_num)
                return;
 
        lio = GET_LIO(netdev);
-       if (netif_is_multiqueue(netdev)) {
-               if (__netif_subqueue_stopped(netdev, iq->q_index) &&
-                   lio->linfo.link.s.link_up &&
-                   (!octnet_iq_is_full(oct, iq_num))) {
-                       netif_wake_subqueue(netdev, iq->q_index);
-                       INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
-                                                 tx_restart, 1);
-               }
-       } else if (netif_queue_stopped(netdev) &&
-                  lio->linfo.link.s.link_up &&
-                  (!octnet_iq_is_full(oct, lio->txq))) {
-               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+       if (__netif_subqueue_stopped(netdev, iq->q_index) &&
+           lio->linfo.link.s.link_up &&
+           (!octnet_iq_is_full(oct, iq_num))) {
+               netif_wake_subqueue(netdev, iq->q_index);
+               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
                                          tx_restart, 1);
-               netif_wake_queue(netdev);
        }
 }
 
@@ -571,7 +554,8 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)),
 
                napi_gro_receive(napi, skb);
 
-               droq->stats.rx_bytes_received += len;
+               droq->stats.rx_bytes_received += len -
+                       rh->r_dh.len * BYTES_PER_DHLEN_UNIT;
                droq->stats.rx_pkts_received++;
        } else {
                recv_buffer_free(skb);
@@ -635,9 +619,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
        iq_no = droq->q_no;
 
        /* Handle Droq descriptors */
-       work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
-                                                POLL_EVENT_PROCESS_PKTS,
-                                                budget);
+       work_done = octeon_droq_process_poll_pkts(oct, droq, budget);
 
        /* Flush the instruction queue */
        iq = oct->instr_queue[iq_no];
@@ -668,8 +650,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
                tx_done = 1;
                napi_complete_done(napi, work_done);
 
-               octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
-                                            POLL_EVENT_ENABLE_INTR, 0);
+               octeon_enable_irq(droq->oct_dev, droq->q_no);
                return 0;
        }
 
@@ -1080,3 +1061,88 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
        }
        return 0;
 }
+
+static void liquidio_change_mtu_completion(struct octeon_device *oct,
+                                          u32 status, void *buf)
+{
+       struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+       struct liquidio_if_cfg_context *ctx;
+
+       ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+       if (status) {
+               dev_err(&oct->pci_dev->dev, "MTU change failed. Status: %llx\n",
+                       CVM_CAST64(status));
+               WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_FAIL);
+       } else {
+               WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_SUCCESS);
+       }
+
+       /* This barrier is required to be sure that the response has been
+        * written fully before waking up the handler
+        */
+       wmb();
+
+       wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       struct liquidio_if_cfg_context *ctx;
+       struct octeon_soft_command *sc;
+       union octnet_cmd *ncmd;
+       int ctx_size;
+       int ret = 0;
+
+       ctx_size = sizeof(struct liquidio_if_cfg_context);
+       sc = (struct octeon_soft_command *)
+               octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, ctx_size);
+
+       ncmd = (union octnet_cmd *)sc->virtdptr;
+       ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+       WRITE_ONCE(ctx->cond, 0);
+       ctx->octeon_id = lio_get_device_id(oct);
+       init_waitqueue_head(&ctx->wc);
+
+       ncmd->u64 = 0;
+       ncmd->s.cmd = OCTNET_CMD_CHANGE_MTU;
+       ncmd->s.param1 = new_mtu;
+
+       octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+       sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+       octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+                                   OPCODE_NIC_CMD, 0, 0, 0);
+
+       sc->callback = liquidio_change_mtu_completion;
+       sc->callback_arg = sc;
+       sc->wait_time = 100;
+
+       ret = octeon_send_soft_command(oct, sc);
+       if (ret == IQ_SEND_FAILED) {
+               netif_info(lio, rx_err, lio->netdev, "Failed to change MTU\n");
+               return -EINVAL;
+       }
+       /* Sleep on a wait queue till the cond flag indicates that the
+        * response arrived or timed-out.
+        */
+       if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR ||
+           ctx->cond == LIO_CHANGE_MTU_FAIL) {
+               octeon_free_soft_command(oct, sc);
+               return -EINVAL;
+       }
+
+       netdev->mtu = new_mtu;
+       lio->mtu = new_mtu;
+
+       octeon_free_soft_command(oct, sc);
+       return 0;
+}
index a63ddf07f168e1663c0a177aad668288d57643ad..550ac29682a5301373c186e480f76a851916d1dc 100644 (file)
@@ -232,10 +232,16 @@ static int lio_get_link_ksettings(struct net_device *netdev,
 
        linfo = &lio->linfo;
 
-       if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
-           linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
-           linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
-           linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
+       switch (linfo->link.s.phy_type) {
+       case LIO_PHY_PORT_TP:
+               ecmd->base.port = PORT_TP;
+               supported = (SUPPORTED_10000baseT_Full |
+                            SUPPORTED_TP | SUPPORTED_Pause);
+               advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Pause);
+               ecmd->base.autoneg = AUTONEG_DISABLE;
+               break;
+
+       case LIO_PHY_PORT_FIBRE:
                ecmd->base.port = PORT_FIBRE;
 
                if (linfo->link.s.speed == SPEED_10000) {
@@ -245,12 +251,18 @@ static int lio_get_link_ksettings(struct net_device *netdev,
 
                supported |= SUPPORTED_FIBRE | SUPPORTED_Pause;
                advertising |= ADVERTISED_Pause;
+               ecmd->base.autoneg = AUTONEG_DISABLE;
+               break;
+       }
+
+       if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
+           linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
+           linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
+           linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
                ethtool_convert_legacy_u32_to_link_mode(
                        ecmd->link_modes.supported, supported);
                ethtool_convert_legacy_u32_to_link_mode(
                        ecmd->link_modes.advertising, advertising);
-               ecmd->base.autoneg = AUTONEG_DISABLE;
-
        } else {
                dev_err(&oct->pci_dev->dev, "Unknown link interface reported %d\n",
                        linfo->link.s.if_mode);
index a5eecd895a8253d753bea0fb273da0bf49005d13..58b5c75fd2ee16e9f9ab3b2854367ac7280af2ab 100644 (file)
@@ -91,18 +91,9 @@ static int octeon_console_debug_enabled(u32 console)
  */
 #define LIO_SYNC_OCTEON_TIME_INTERVAL_MS 60000
 
-struct liquidio_if_cfg_context {
-       int octeon_id;
-
-       wait_queue_head_t wc;
-
-       int cond;
-};
-
-struct liquidio_if_cfg_resp {
-       u64 rh;
-       struct liquidio_if_cfg_info cfg_info;
-       u64 status;
+struct lio_trusted_vf_ctx {
+       struct completion complete;
+       int status;
 };
 
 struct liquidio_rx_ctl_context {
@@ -522,115 +513,6 @@ static void liquidio_deinit_pci(void)
        pci_unregister_driver(&liquidio_pci_driver);
 }
 
-/**
- * \brief Stop Tx queues
- * @param netdev network device
- */
-static inline void txqs_stop(struct net_device *netdev)
-{
-       if (netif_is_multiqueue(netdev)) {
-               int i;
-
-               for (i = 0; i < netdev->num_tx_queues; i++)
-                       netif_stop_subqueue(netdev, i);
-       } else {
-               netif_stop_queue(netdev);
-       }
-}
-
-/**
- * \brief Start Tx queues
- * @param netdev network device
- */
-static inline void txqs_start(struct net_device *netdev)
-{
-       if (netif_is_multiqueue(netdev)) {
-               int i;
-
-               for (i = 0; i < netdev->num_tx_queues; i++)
-                       netif_start_subqueue(netdev, i);
-       } else {
-               netif_start_queue(netdev);
-       }
-}
-
-/**
- * \brief Wake Tx queues
- * @param netdev network device
- */
-static inline void txqs_wake(struct net_device *netdev)
-{
-       struct lio *lio = GET_LIO(netdev);
-
-       if (netif_is_multiqueue(netdev)) {
-               int i;
-
-               for (i = 0; i < netdev->num_tx_queues; i++) {
-                       int qno = lio->linfo.txpciq[i %
-                               lio->oct_dev->num_iqs].s.q_no;
-
-                       if (__netif_subqueue_stopped(netdev, i)) {
-                               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
-                                                         tx_restart, 1);
-                               netif_wake_subqueue(netdev, i);
-                       }
-               }
-       } else {
-               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-                                         tx_restart, 1);
-               netif_wake_queue(netdev);
-       }
-}
-
-/**
- * \brief Stop Tx queue
- * @param netdev network device
- */
-static void stop_txq(struct net_device *netdev)
-{
-       txqs_stop(netdev);
-}
-
-/**
- * \brief Start Tx queue
- * @param netdev network device
- */
-static void start_txq(struct net_device *netdev)
-{
-       struct lio *lio = GET_LIO(netdev);
-
-       if (lio->linfo.link.s.link_up) {
-               txqs_start(netdev);
-               return;
-       }
-}
-
-/**
- * \brief Wake a queue
- * @param netdev network device
- * @param q which queue to wake
- */
-static inline void wake_q(struct net_device *netdev, int q)
-{
-       if (netif_is_multiqueue(netdev))
-               netif_wake_subqueue(netdev, q);
-       else
-               netif_wake_queue(netdev);
-}
-
-/**
- * \brief Stop a queue
- * @param netdev network device
- * @param q which queue to stop
- */
-static inline void stop_q(struct net_device *netdev, int q)
-{
-       if (netif_is_multiqueue(netdev))
-               netif_stop_subqueue(netdev, q);
-       else
-               netif_stop_queue(netdev);
-}
-
 /**
  * \brief Check Tx queue status, and take appropriate action
  * @param lio per-network private data
@@ -638,33 +520,24 @@ static inline void stop_q(struct net_device *netdev, int q)
  */
 static inline int check_txq_status(struct lio *lio)
 {
+       int numqs = lio->netdev->num_tx_queues;
        int ret_val = 0;
+       int q, iq;
 
-       if (netif_is_multiqueue(lio->netdev)) {
-               int numqs = lio->netdev->num_tx_queues;
-               int q, iq = 0;
-
-               /* check each sub-queue state */
-               for (q = 0; q < numqs; q++) {
-                       iq = lio->linfo.txpciq[q %
-                               lio->oct_dev->num_iqs].s.q_no;
-                       if (octnet_iq_is_full(lio->oct_dev, iq))
-                               continue;
-                       if (__netif_subqueue_stopped(lio->netdev, q)) {
-                               wake_q(lio->netdev, q);
-                               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq,
-                                                         tx_restart, 1);
-                               ret_val++;
-                       }
+       /* check each sub-queue state */
+       for (q = 0; q < numqs; q++) {
+               iq = lio->linfo.txpciq[q %
+                       lio->oct_dev->num_iqs].s.q_no;
+               if (octnet_iq_is_full(lio->oct_dev, iq))
+                       continue;
+               if (__netif_subqueue_stopped(lio->netdev, q)) {
+                       netif_wake_subqueue(lio->netdev, q);
+                       INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq,
+                                                 tx_restart, 1);
+                       ret_val++;
                }
-       } else {
-               if (octnet_iq_is_full(lio->oct_dev, lio->txq))
-                       return 0;
-               wake_q(lio->netdev, lio->txq);
-               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-                                         tx_restart, 1);
-               ret_val = 1;
        }
+
        return ret_val;
 }
 
@@ -841,8 +714,12 @@ static void octnet_link_status_change(struct work_struct *work)
        struct cavium_wk *wk = (struct cavium_wk *)work;
        struct lio *lio = (struct lio *)wk->ctxptr;
 
+       /* lio->linfo.link.s.mtu always contains max MTU of the lio interface.
+        * this API is invoked only when new max-MTU of the interface is
+        * less than current MTU.
+        */
        rtnl_lock();
-       call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+       dev_set_mtu(lio->netdev, lio->linfo.link.s.mtu);
        rtnl_unlock();
 }
 
@@ -891,7 +768,11 @@ static inline void update_link_status(struct net_device *netdev,
 {
        struct lio *lio = GET_LIO(netdev);
        int changed = (lio->linfo.link.u64 != ls->u64);
+       int current_max_mtu = lio->linfo.link.s.mtu;
+       struct octeon_device *oct = lio->oct_dev;
 
+       dev_dbg(&oct->pci_dev->dev, "%s: lio->linfo.link.u64=%llx, ls->u64=%llx\n",
+               __func__, lio->linfo.link.u64, ls->u64);
        lio->linfo.link.u64 = ls->u64;
 
        if ((lio->intf_open) && (changed)) {
@@ -899,11 +780,25 @@ static inline void update_link_status(struct net_device *netdev,
                lio->link_changes++;
 
                if (lio->linfo.link.s.link_up) {
+                       dev_dbg(&oct->pci_dev->dev, "%s: link_up", __func__);
                        netif_carrier_on(netdev);
-                       txqs_wake(netdev);
+                       wake_txqs(netdev);
                } else {
+                       dev_dbg(&oct->pci_dev->dev, "%s: link_off", __func__);
                        netif_carrier_off(netdev);
-                       stop_txq(netdev);
+                       stop_txqs(netdev);
+               }
+               if (lio->linfo.link.s.mtu != current_max_mtu) {
+                       netif_info(lio, probe, lio->netdev, "Max MTU changed from %d to %d\n",
+                                  current_max_mtu, lio->linfo.link.s.mtu);
+                       netdev->max_mtu = lio->linfo.link.s.mtu;
+               }
+               if (lio->linfo.link.s.mtu < netdev->mtu) {
+                       dev_warn(&oct->pci_dev->dev,
+                                "Current MTU is higher than new max MTU; Reducing the current mtu from %d to %d\n",
+                                    netdev->mtu, lio->linfo.link.s.mtu);
+                       queue_delayed_work(lio->link_status_wq.wq,
+                                          &lio->link_status_wq.wk.work, 0);
                }
        }
 }
@@ -1739,16 +1634,6 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
        return 0;
 }
 
-static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
-{
-       int q = 0;
-
-       if (netif_is_multiqueue(lio->netdev))
-               q = skb->queue_mapping % lio->linfo.num_txpciq;
-
-       return q;
-}
-
 /**
  * \brief Check Tx queue state for a given network buffer
  * @param lio per-network private data
@@ -1756,22 +1641,17 @@ static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
  */
 static inline int check_txq_state(struct lio *lio, struct sk_buff *skb)
 {
-       int q = 0, iq = 0;
+       int q, iq;
 
-       if (netif_is_multiqueue(lio->netdev)) {
-               q = skb->queue_mapping;
-               iq = lio->linfo.txpciq[(q % lio->oct_dev->num_iqs)].s.q_no;
-       } else {
-               iq = lio->txq;
-               q = iq;
-       }
+       q = skb->queue_mapping;
+       iq = lio->linfo.txpciq[(q % lio->oct_dev->num_iqs)].s.q_no;
 
        if (octnet_iq_is_full(lio->oct_dev, iq))
                return 0;
 
        if (__netif_subqueue_stopped(lio->netdev, q)) {
                INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
-               wake_q(lio->netdev, q);
+               netif_wake_subqueue(lio->netdev, q);
        }
        return 1;
 }
@@ -2211,7 +2091,7 @@ static int liquidio_open(struct net_device *netdev)
                        return -1;
        }
 
-       start_txq(netdev);
+       start_txqs(netdev);
 
        /* tell Octeon to start forwarding packets to host */
        send_rx_ctrl_cmd(lio, 1);
@@ -2448,38 +2328,6 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
        return stats;
 }
 
-/**
- * \brief Net device change_mtu
- * @param netdev network device
- */
-static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
-{
-       struct lio *lio = GET_LIO(netdev);
-       struct octeon_device *oct = lio->oct_dev;
-       struct octnic_ctrl_pkt nctrl;
-       int ret = 0;
-
-       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-       nctrl.ncmd.u64 = 0;
-       nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
-       nctrl.ncmd.s.param1 = new_mtu;
-       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-       nctrl.wait_time = 100;
-       nctrl.netpndev = (u64)netdev;
-       nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-       ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-       if (ret < 0) {
-               dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
-               return -1;
-       }
-
-       lio->mtu = new_mtu;
-
-       return 0;
-}
-
 /**
  * \brief Handler for SIOCSHWTSTAMP ioctl
  * @param netdev network device
@@ -2685,14 +2533,9 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
        lio = GET_LIO(netdev);
        oct = lio->oct_dev;
 
-       if (netif_is_multiqueue(netdev)) {
-               q_idx = skb->queue_mapping;
-               q_idx = (q_idx % (lio->linfo.num_txpciq));
-               tag = q_idx;
-               iq_no = lio->linfo.txpciq[q_idx].s.q_no;
-       } else {
-               iq_no = lio->txq;
-       }
+       q_idx = skb_iq(lio, skb);
+       tag = q_idx;
+       iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
        stats = &oct->instr_queue[iq_no]->stats;
 
@@ -2723,23 +2566,14 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
        ndata.q_no = iq_no;
 
-       if (netif_is_multiqueue(netdev)) {
-               if (octnet_iq_is_full(oct, ndata.q_no)) {
-                       /* defer sending if queue is full */
-                       netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-                                  ndata.q_no);
-                       stats->tx_iq_busy++;
-                       return NETDEV_TX_BUSY;
-               }
-       } else {
-               if (octnet_iq_is_full(oct, lio->txq)) {
-                       /* defer sending if queue is full */
-                       stats->tx_iq_busy++;
-                       netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-                                  lio->txq);
-                       return NETDEV_TX_BUSY;
-               }
+       if (octnet_iq_is_full(oct, ndata.q_no)) {
+               /* defer sending if queue is full */
+               netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+                          ndata.q_no);
+               stats->tx_iq_busy++;
+               return NETDEV_TX_BUSY;
        }
+
        /* pr_info(" XMIT - valid Qs: %d, 1st Q no: %d, cpu:  %d, q_no:%d\n",
         *      lio->linfo.num_txpciq, lio->txq, cpu, ndata.q_no);
         */
@@ -2895,7 +2729,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
        netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
 
        if (status == IQ_SEND_STOP)
-               stop_q(netdev, q_idx);
+               netif_stop_subqueue(netdev, q_idx);
 
        netif_trans_update(netdev);
 
@@ -2934,7 +2768,7 @@ static void liquidio_tx_timeout(struct net_device *netdev)
                   "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
                   netdev->stats.tx_dropped);
        netif_trans_update(netdev);
-       txqs_wake(netdev);
+       wake_txqs(netdev);
 }
 
 static int liquidio_vlan_rx_add_vid(struct net_device *netdev,
@@ -3289,10 +3123,120 @@ static int liquidio_get_vf_config(struct net_device *netdev, int vfidx,
        ether_addr_copy(&ivi->mac[0], macaddr);
        ivi->vlan = oct->sriov_info.vf_vlantci[vfidx] & VLAN_VID_MASK;
        ivi->qos = oct->sriov_info.vf_vlantci[vfidx] >> VLAN_PRIO_SHIFT;
+       if (oct->sriov_info.trusted_vf.active &&
+           oct->sriov_info.trusted_vf.id == vfidx)
+               ivi->trusted = true;
+       else
+               ivi->trusted = false;
        ivi->linkstate = oct->sriov_info.vf_linkstate[vfidx];
        return 0;
 }
 
+static void trusted_vf_callback(struct octeon_device *oct_dev,
+                               u32 status, void *ptr)
+{
+       struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+       struct lio_trusted_vf_ctx *ctx;
+
+       ctx = (struct lio_trusted_vf_ctx *)sc->ctxptr;
+       ctx->status = status;
+
+       complete(&ctx->complete);
+}
+
+static int liquidio_send_vf_trust_cmd(struct lio *lio, int vfidx, bool trusted)
+{
+       struct octeon_device *oct = lio->oct_dev;
+       struct lio_trusted_vf_ctx *ctx;
+       struct octeon_soft_command *sc;
+       int ctx_size, retval;
+
+       ctx_size = sizeof(struct lio_trusted_vf_ctx);
+       sc = octeon_alloc_soft_command(oct, 0, 0, ctx_size);
+
+       ctx  = (struct lio_trusted_vf_ctx *)sc->ctxptr;
+       init_completion(&ctx->complete);
+
+       sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+       /* vfidx is 0 based, but vf_num (param1) is 1 based */
+       octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+                                   OPCODE_NIC_SET_TRUSTED_VF, 0, vfidx + 1,
+                                   trusted);
+
+       sc->callback = trusted_vf_callback;
+       sc->callback_arg = sc;
+       sc->wait_time = 1000;
+
+       retval = octeon_send_soft_command(oct, sc);
+       if (retval == IQ_SEND_FAILED) {
+               retval = -1;
+       } else {
+               /* Wait for response or timeout */
+               if (wait_for_completion_timeout(&ctx->complete,
+                                               msecs_to_jiffies(2000)))
+                       retval = ctx->status;
+               else
+                       retval = -1;
+       }
+
+       octeon_free_soft_command(oct, sc);
+
+       return retval;
+}
+
+static int liquidio_set_vf_trust(struct net_device *netdev, int vfidx,
+                                bool setting)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+
+       if (strcmp(oct->fw_info.liquidio_firmware_version, "1.7.1") < 0) {
+               /* trusted vf is not supported by firmware older than 1.7.1 */
+               return -EOPNOTSUPP;
+       }
+
+       if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced) {
+               netif_info(lio, drv, lio->netdev, "Invalid vfidx %d\n", vfidx);
+               return -EINVAL;
+       }
+
+       if (setting) {
+               /* Set */
+
+               if (oct->sriov_info.trusted_vf.active &&
+                   oct->sriov_info.trusted_vf.id == vfidx)
+                       return 0;
+
+               if (oct->sriov_info.trusted_vf.active) {
+                       netif_info(lio, drv, lio->netdev, "More than one trusted VF is not allowed\n");
+                       return -EPERM;
+               }
+       } else {
+               /* Clear */
+
+               if (!oct->sriov_info.trusted_vf.active)
+                       return 0;
+       }
+
+       if (!liquidio_send_vf_trust_cmd(lio, vfidx, setting)) {
+               if (setting) {
+                       oct->sriov_info.trusted_vf.id = vfidx;
+                       oct->sriov_info.trusted_vf.active = true;
+               } else {
+                       oct->sriov_info.trusted_vf.active = false;
+               }
+
+               netif_info(lio, drv, lio->netdev, "VF %u is %strusted\n", vfidx,
+                          setting ? "" : "not ");
+       } else {
+               netif_info(lio, drv, lio->netdev, "Failed to set VF trusted\n");
+               return -1;
+       }
+
+       return 0;
+}
+
 static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
                                      int linkstate)
 {
@@ -3423,6 +3367,7 @@ static const struct net_device_ops lionetdevops = {
        .ndo_set_vf_mac         = liquidio_set_vf_mac,
        .ndo_set_vf_vlan        = liquidio_set_vf_vlan,
        .ndo_get_vf_config      = liquidio_get_vf_config,
+       .ndo_set_vf_trust       = liquidio_set_vf_trust,
        .ndo_set_vf_link_state  = liquidio_set_vf_link_state,
 };
 
index fd70a4844e2db4811b0d185c802a0e6565eae0f7..d5f5c9a693eeacc8c9b5fbaeec1adebeeb7978a8 100644 (file)
@@ -40,20 +40,6 @@ MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-struct liquidio_if_cfg_context {
-       int octeon_id;
-
-       wait_queue_head_t wc;
-
-       int cond;
-};
-
-struct liquidio_if_cfg_resp {
-       u64 rh;
-       struct liquidio_if_cfg_info cfg_info;
-       u64 status;
-};
-
 struct liquidio_rx_ctl_context {
        int octeon_id;
 
@@ -298,105 +284,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
        .err_handler    = &liquidio_vf_err_handler,    /* For AER */
 };
 
-/**
- * \brief Stop Tx queues
- * @param netdev network device
- */
-static void txqs_stop(struct net_device *netdev)
-{
-       if (netif_is_multiqueue(netdev)) {
-               int i;
-
-               for (i = 0; i < netdev->num_tx_queues; i++)
-                       netif_stop_subqueue(netdev, i);
-       } else {
-               netif_stop_queue(netdev);
-       }
-}
-
-/**
- * \brief Start Tx queues
- * @param netdev network device
- */
-static void txqs_start(struct net_device *netdev)
-{
-       if (netif_is_multiqueue(netdev)) {
-               int i;
-
-               for (i = 0; i < netdev->num_tx_queues; i++)
-                       netif_start_subqueue(netdev, i);
-       } else {
-               netif_start_queue(netdev);
-       }
-}
-
-/**
- * \brief Wake Tx queues
- * @param netdev network device
- */
-static void txqs_wake(struct net_device *netdev)
-{
-       struct lio *lio = GET_LIO(netdev);
-
-       if (netif_is_multiqueue(netdev)) {
-               int i;
-
-               for (i = 0; i < netdev->num_tx_queues; i++) {
-                       int qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs]
-                                     .s.q_no;
-                       if (__netif_subqueue_stopped(netdev, i)) {
-                               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
-                                                         tx_restart, 1);
-                               netif_wake_subqueue(netdev, i);
-                       }
-               }
-       } else {
-               INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-                                         tx_restart, 1);
-               netif_wake_queue(netdev);
-       }
-}
-
-/**
- * \brief Start Tx queue
- * @param netdev network device
- */
-static void start_txq(struct net_device *netdev)
-{
-       struct lio *lio = GET_LIO(netdev);
-
-       if (lio->linfo.link.s.link_up) {
-               txqs_start(netdev);
-               return;
-       }
-}
-
-/**
- * \brief Wake a queue
- * @param netdev network device
- * @param q which queue to wake
- */
-static void wake_q(struct net_device *netdev, int q)
-{
-       if (netif_is_multiqueue(netdev))
-               netif_wake_subqueue(netdev, q);
-       else
-               netif_wake_queue(netdev);
-}
-
-/**
- * \brief Stop a queue
- * @param netdev network device
- * @param q which queue to stop
- */
-static void stop_q(struct net_device *netdev, int q)
-{
-       if (netif_is_multiqueue(netdev))
-               netif_stop_subqueue(netdev, q);
-       else
-               netif_stop_queue(netdev);
-}
-
 /**
  * Remove the node at the head of the list. The list would be empty at
  * the end of this call if there are no more nodes in the list.
@@ -564,8 +451,12 @@ static void octnet_link_status_change(struct work_struct *work)
        struct cavium_wk *wk = (struct cavium_wk *)work;
        struct lio *lio = (struct lio *)wk->ctxptr;
 
+       /* lio->linfo.link.s.mtu always contains max MTU of the lio interface.
+        * this API is invoked only when new max-MTU of the interface is
+        * less than current MTU.
+        */
        rtnl_lock();
-       call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+       dev_set_mtu(lio->netdev, lio->linfo.link.s.mtu);
        rtnl_unlock();
 }
 
@@ -613,6 +504,7 @@ static void update_link_status(struct net_device *netdev,
                               union oct_link_status *ls)
 {
        struct lio *lio = GET_LIO(netdev);
+       int current_max_mtu = lio->linfo.link.s.mtu;
        struct octeon_device *oct = lio->oct_dev;
 
        if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
@@ -623,24 +515,23 @@ static void update_link_status(struct net_device *netdev,
 
                if (lio->linfo.link.s.link_up) {
                        netif_carrier_on(netdev);
-                       txqs_wake(netdev);
+                       wake_txqs(netdev);
                } else {
                        netif_carrier_off(netdev);
-                       txqs_stop(netdev);
+                       stop_txqs(netdev);
                }
 
-               if (lio->linfo.link.s.mtu != netdev->max_mtu) {
-                       dev_info(&oct->pci_dev->dev, "Max MTU Changed from %d to %d\n",
-                                netdev->max_mtu, lio->linfo.link.s.mtu);
+               if (lio->linfo.link.s.mtu != current_max_mtu) {
+                       dev_info(&oct->pci_dev->dev,
+                                "Max MTU Changed from %d to %d\n",
+                                current_max_mtu, lio->linfo.link.s.mtu);
                        netdev->max_mtu = lio->linfo.link.s.mtu;
                }
 
                if (lio->linfo.link.s.mtu < netdev->mtu) {
                        dev_warn(&oct->pci_dev->dev,
-                                "PF has changed the MTU for gmx port. Reducing the mtu from %d to %d\n",
+                                "Current MTU is higher than new max MTU; Reducing the current mtu from %d to %d\n",
                                 netdev->mtu, lio->linfo.link.s.mtu);
-                       lio->mtu = lio->linfo.link.s.mtu;
-                       netdev->mtu = lio->linfo.link.s.mtu;
                        queue_delayed_work(lio->link_status_wq.wq,
                                           &lio->link_status_wq.wk.work, 0);
                }
@@ -1062,16 +953,6 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
        return 0;
 }
 
-static int skb_iq(struct lio *lio, struct sk_buff *skb)
-{
-       int q = 0;
-
-       if (netif_is_multiqueue(lio->netdev))
-               q = skb->queue_mapping % lio->linfo.num_txpciq;
-
-       return q;
-}
-
 /**
  * \brief Check Tx queue state for a given network buffer
  * @param lio per-network private data
@@ -1079,22 +960,17 @@ static int skb_iq(struct lio *lio, struct sk_buff *skb)
  */
 static int check_txq_state(struct lio *lio, struct sk_buff *skb)
 {
-       int q = 0, iq = 0;
+       int q, iq;
 
-       if (netif_is_multiqueue(lio->netdev)) {
-               q = skb->queue_mapping;
-               iq = lio->linfo.txpciq[q % lio->oct_dev->num_iqs].s.q_no;
-       } else {
-               iq = lio->txq;
-               q = iq;
-       }
+       q = skb->queue_mapping;
+       iq = lio->linfo.txpciq[q % lio->oct_dev->num_iqs].s.q_no;
 
        if (octnet_iq_is_full(lio->oct_dev, iq))
                return 0;
 
        if (__netif_subqueue_stopped(lio->netdev, q)) {
                INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
-               wake_q(lio->netdev, q);
+               netif_wake_subqueue(lio->netdev, q);
        }
 
        return 1;
@@ -1268,7 +1144,7 @@ static int liquidio_open(struct net_device *netdev)
        lio->intf_open = 1;
 
        netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
-       start_txq(netdev);
+       start_txqs(netdev);
 
        /* tell Octeon to start forwarding packets to host */
        send_rx_ctrl_cmd(lio, 1);
@@ -1310,7 +1186,7 @@ static int liquidio_stop(struct net_device *netdev)
 
        ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
-       txqs_stop(netdev);
+       stop_txqs(netdev);
 
        dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
@@ -1537,41 +1413,6 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
        return stats;
 }
 
-/**
- * \brief Net device change_mtu
- * @param netdev network device
- */
-static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
-{
-       struct octnic_ctrl_pkt nctrl;
-       struct octeon_device *oct;
-       struct lio *lio;
-       int ret = 0;
-
-       lio = GET_LIO(netdev);
-       oct = lio->oct_dev;
-
-       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-       nctrl.ncmd.u64 = 0;
-       nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
-       nctrl.ncmd.s.param1 = new_mtu;
-       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-       nctrl.wait_time = LIO_CMD_WAIT_TM;
-       nctrl.netpndev = (u64)netdev;
-       nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-       ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-       if (ret < 0) {
-               dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
-               return -EIO;
-       }
-
-       lio->mtu = new_mtu;
-
-       return 0;
-}
-
 /**
  * \brief Handler for SIOCSHWTSTAMP ioctl
  * @param netdev network device
@@ -1763,14 +1604,9 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
        lio = GET_LIO(netdev);
        oct = lio->oct_dev;
 
-       if (netif_is_multiqueue(netdev)) {
-               q_idx = skb->queue_mapping;
-               q_idx = (q_idx % (lio->linfo.num_txpciq));
-               tag = q_idx;
-               iq_no = lio->linfo.txpciq[q_idx].s.q_no;
-       } else {
-               iq_no = lio->txq;
-       }
+       q_idx = skb_iq(lio, skb);
+       tag = q_idx;
+       iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
        stats = &oct->instr_queue[iq_no]->stats;
 
@@ -1799,22 +1635,12 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
        ndata.q_no = iq_no;
 
-       if (netif_is_multiqueue(netdev)) {
-               if (octnet_iq_is_full(oct, ndata.q_no)) {
-                       /* defer sending if queue is full */
-                       netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-                                  ndata.q_no);
-                       stats->tx_iq_busy++;
-                       return NETDEV_TX_BUSY;
-               }
-       } else {
-               if (octnet_iq_is_full(oct, lio->txq)) {
-                       /* defer sending if queue is full */
-                       stats->tx_iq_busy++;
-                       netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-                                  ndata.q_no);
-                       return NETDEV_TX_BUSY;
-               }
+       if (octnet_iq_is_full(oct, ndata.q_no)) {
+               /* defer sending if queue is full */
+               netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+                          ndata.q_no);
+               stats->tx_iq_busy++;
+               return NETDEV_TX_BUSY;
        }
 
        ndata.datasize = skb->len;
@@ -1956,7 +1782,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
        if (status == IQ_SEND_STOP) {
                dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n",
                        iq_no);
-               stop_q(netdev, q_idx);
+               netif_stop_subqueue(netdev, q_idx);
        }
 
        netif_trans_update(netdev);
@@ -1996,7 +1822,7 @@ static void liquidio_tx_timeout(struct net_device *netdev)
                   "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
                   netdev->stats.tx_dropped);
        netif_trans_update(netdev);
-       txqs_wake(netdev);
+       wake_txqs(netdev);
 }
 
 static int
index 522dcc4dcff7444c3c420822c9f6bafe3154cdea..82a783db5baf86decdc76aaa9c39d95b718f16d4 100644 (file)
@@ -84,6 +84,7 @@ enum octeon_tag_type {
 #define OPCODE_NIC_IF_CFG              0x09
 #define OPCODE_NIC_VF_DRV_NOTICE       0x0A
 #define OPCODE_NIC_INTRMOD_PARAMS      0x0B
+#define OPCODE_NIC_SET_TRUSTED_VF      0x13
 #define OPCODE_NIC_SYNC_OCTEON_TIME    0x14
 #define VF_DRV_LOADED                  1
 #define VF_DRV_REMOVED                -1
@@ -192,7 +193,8 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 
 #define   OCTNET_MAX_FRM_SIZE        (16000 + OCTNET_FRM_HEADER_SIZE)
 
-#define   OCTNET_DEFAULT_FRM_SIZE    (1500 + OCTNET_FRM_HEADER_SIZE)
+#define   OCTNET_DEFAULT_MTU         (1500)
+#define   OCTNET_DEFAULT_FRM_SIZE  (OCTNET_DEFAULT_MTU + OCTNET_FRM_HEADER_SIZE)
 
 /** NIC Commands are sent using this Octeon Input Queue */
 #define   OCTNET_CMD_Q                0
@@ -675,9 +677,11 @@ union oct_link_status {
                u64 if_mode:5;
                u64 pause:1;
                u64 flashing:1;
-               u64 reserved:15;
+               u64 phy_type:5;
+               u64 reserved:10;
 #else
-               u64 reserved:15;
+               u64 reserved:10;
+               u64 phy_type:5;
                u64 flashing:1;
                u64 pause:1;
                u64 if_mode:5;
@@ -690,6 +694,12 @@ union oct_link_status {
        } s;
 };
 
+enum lio_phy_type {
+       LIO_PHY_PORT_TP = 0x0,
+       LIO_PHY_PORT_FIBRE = 0x1,
+       LIO_PHY_PORT_UNKNOWN,
+};
+
 /** The txpciq info passed to host from the firmware */
 
 union oct_txpciq {
@@ -909,6 +919,12 @@ union oct_nic_if_cfg {
        } s;
 };
 
+struct lio_trusted_vf {
+       uint64_t active: 1;
+       uint64_t id : 8;
+       uint64_t reserved: 55;
+};
+
 struct lio_time {
        s64 sec;   /* seconds */
        s64 nsec;  /* nanoseconds */
index 63b0c758a0a68f6a481a5e6f3d77bb2eddff8ff3..91937cc5c1d7447021057dec9e1e7b4c709b1684 100644 (file)
@@ -370,6 +370,8 @@ struct octeon_sriov_info {
 
        u32     sriov_enabled;
 
+       struct lio_trusted_vf   trusted_vf;
+
        /*lookup table that maps DPI ring number to VF pci_dev struct pointer*/
        struct pci_dev *dpiring_to_vfpcidev_lut[MAX_POSSIBLE_VFS];
 
index 3461d65ff4ebd93d11a7ad041258d658fb814a50..f044718cea52bd2fe0e524e84a619f173b4726d5 100644 (file)
@@ -788,7 +788,7 @@ octeon_droq_process_packets(struct octeon_device *oct,
  * called before calling this routine.
  */
 
-static int
+int
 octeon_droq_process_poll_pkts(struct octeon_device *oct,
                              struct octeon_droq *droq, u32 budget)
 {
@@ -835,71 +835,46 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
        return total_pkts_processed;
 }
 
+/* Enable Pkt Interrupt */
 int
-octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
-                            u32 arg)
+octeon_enable_irq(struct octeon_device *oct, u32 q_no)
 {
-       struct octeon_droq *droq;
-
-       droq = oct->droq[q_no];
+       switch (oct->chip_id) {
+       case OCTEON_CN66XX:
+       case OCTEON_CN68XX: {
+               struct octeon_cn6xxx *cn6xxx =
+                       (struct octeon_cn6xxx *)oct->chip;
+               unsigned long flags;
+               u32 value;
 
-       if (cmd == POLL_EVENT_PROCESS_PKTS)
-               return octeon_droq_process_poll_pkts(oct, droq, arg);
+               spin_lock_irqsave
+                       (&cn6xxx->lock_for_droq_int_enb_reg, flags);
+               value = octeon_read_csr(oct, CN6XXX_SLI_PKT_TIME_INT_ENB);
+               value |= (1 << q_no);
+               octeon_write_csr(oct, CN6XXX_SLI_PKT_TIME_INT_ENB, value);
+               value = octeon_read_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB);
+               value |= (1 << q_no);
+               octeon_write_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB, value);
 
-       if (cmd == POLL_EVENT_PENDING_PKTS) {
-               u32 pkt_cnt = atomic_read(&droq->pkts_pending);
+               /* don't bother flushing the enables */
 
-               return  octeon_droq_process_packets(oct, droq, pkt_cnt);
+               spin_unlock_irqrestore
+                       (&cn6xxx->lock_for_droq_int_enb_reg, flags);
        }
-
-       if (cmd == POLL_EVENT_ENABLE_INTR) {
-               u32 value;
-               unsigned long flags;
-
-               /* Enable Pkt Interrupt */
-               switch (oct->chip_id) {
-               case OCTEON_CN66XX:
-               case OCTEON_CN68XX: {
-                       struct octeon_cn6xxx *cn6xxx =
-                               (struct octeon_cn6xxx *)oct->chip;
-                       spin_lock_irqsave
-                               (&cn6xxx->lock_for_droq_int_enb_reg, flags);
-                       value =
-                               octeon_read_csr(oct,
-                                               CN6XXX_SLI_PKT_TIME_INT_ENB);
-                       value |= (1 << q_no);
-                       octeon_write_csr(oct,
-                                        CN6XXX_SLI_PKT_TIME_INT_ENB,
-                                        value);
-                       value =
-                               octeon_read_csr(oct,
-                                               CN6XXX_SLI_PKT_CNT_INT_ENB);
-                       value |= (1 << q_no);
-                       octeon_write_csr(oct,
-                                        CN6XXX_SLI_PKT_CNT_INT_ENB,
-                                        value);
-
-                       /* don't bother flushing the enables */
-
-                       spin_unlock_irqrestore
-                               (&cn6xxx->lock_for_droq_int_enb_reg, flags);
-                       return 0;
-               }
                break;
-               case OCTEON_CN23XX_PF_VID: {
-                       lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
-               }
+       case OCTEON_CN23XX_PF_VID:
+               lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
                break;
 
-               case OCTEON_CN23XX_VF_VID:
-                       lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+       case OCTEON_CN23XX_VF_VID:
+               lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
                break;
-               }
-               return 0;
+       default:
+               dev_err(&oct->pci_dev->dev, "%s Unknown Chip\n", __func__);
+               return 1;
        }
 
-       dev_err(&oct->pci_dev->dev, "%s Unknown command: %d\n", __func__, cmd);
-       return -EINVAL;
+       return 0;
 }
 
 int octeon_register_droq_ops(struct octeon_device *oct, u32 q_no,
index 815a9f56fd595f1099c231a7b976891bfeaede13..f28f262d4ab6e91378337854622d391ca14d85e5 100644 (file)
@@ -123,11 +123,6 @@ struct oct_droq_stats {
 
 };
 
-#define POLL_EVENT_INTR_ARRIVED  1
-#define POLL_EVENT_PROCESS_PKTS  2
-#define POLL_EVENT_PENDING_PKTS  3
-#define POLL_EVENT_ENABLE_INTR   4
-
 /* The maximum number of buffers that can be dispatched from the
  * output/dma queue. Set to 64 assuming 1K buffers in DROQ and the fact that
  * max packet size from DROQ is 64K.
@@ -414,8 +409,10 @@ int octeon_droq_process_packets(struct octeon_device *oct,
                                struct octeon_droq *droq,
                                u32 budget);
 
-int octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no,
-                                int cmd, u32 arg);
+int octeon_droq_process_poll_pkts(struct octeon_device *oct,
+                                 struct octeon_droq *droq, u32 budget);
+
+int octeon_enable_irq(struct octeon_device *oct, u32 q_no);
 
 void octeon_droq_check_oom(struct octeon_droq *droq);
 
index 57af7df74ced48591744ba0261e5ee586ad48e79..28e74ee23ff85a6056c09328fb91549c094cc93d 100644 (file)
@@ -87,7 +87,7 @@ int octeon_mbox_read(struct octeon_mbox *mbox)
        }
 
        if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVING) {
-               if (mbox->mbox_req.recv_len < msg.s.len) {
+               if (mbox->mbox_req.recv_len < mbox->mbox_req.msg.s.len) {
                        ret = 0;
                } else {
                        mbox->state &= ~OCTEON_MBOX_STATE_REQUEST_RECEIVING;
@@ -96,7 +96,8 @@ int octeon_mbox_read(struct octeon_mbox *mbox)
                }
        } else {
                if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVING) {
-                       if (mbox->mbox_resp.recv_len < msg.s.len) {
+                       if (mbox->mbox_resp.recv_len <
+                           mbox->mbox_resp.msg.s.len) {
                                ret = 0;
                        } else {
                                mbox->state &=
index f2d1a076a038a3bd6b6e730acfb93cc7ac3ffa4a..8782206271b631c04398b492ce289e77a8856822 100644 (file)
 #define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
 #define   LIO_IFSTATE_RESETTING                   0x10
 
+struct liquidio_if_cfg_context {
+       u32 octeon_id;
+       wait_queue_head_t wc;
+       int cond;
+};
+
+struct liquidio_if_cfg_resp {
+       u64 rh;
+       struct liquidio_if_cfg_info cfg_info;
+       u64 status;
+};
+
 struct oct_nic_stats_resp {
        u64     rh;
        struct oct_link_stats stats;
@@ -184,6 +196,14 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
  */
 void liquidio_set_ethtool_ops(struct net_device *netdev);
 
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+int liquidio_change_mtu(struct net_device *netdev, int new_mtu);
+#define LIO_CHANGE_MTU_SUCCESS 1
+#define LIO_CHANGE_MTU_FAIL    2
+
 #define SKB_ADJ_MASK  0x3F
 #define SKB_ADJ       (SKB_ADJ_MASK + 1)
 
@@ -486,4 +506,56 @@ static inline int wait_for_pending_requests(struct octeon_device *oct)
        return 0;
 }
 
+/**
+ * \brief Stop Tx queues
+ * @param netdev network device
+ */
+static inline void stop_txqs(struct net_device *netdev)
+{
+       int i;
+
+       for (i = 0; i < netdev->num_tx_queues; i++)
+               netif_stop_subqueue(netdev, i);
+}
+
+/**
+ * \brief Wake Tx queues
+ * @param netdev network device
+ */
+static inline void wake_txqs(struct net_device *netdev)
+{
+       struct lio *lio = GET_LIO(netdev);
+       int i, qno;
+
+       for (i = 0; i < netdev->num_tx_queues; i++) {
+               qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs].s.q_no;
+
+               if (__netif_subqueue_stopped(netdev, i)) {
+                       INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
+                                                 tx_restart, 1);
+                       netif_wake_subqueue(netdev, i);
+               }
+       }
+}
+
+/**
+ * \brief Start Tx queues
+ * @param netdev network device
+ */
+static inline void start_txqs(struct net_device *netdev)
+{
+       struct lio *lio = GET_LIO(netdev);
+       int i;
+
+       if (lio->linfo.link.s.link_up) {
+               for (i = 0; i < netdev->num_tx_queues; i++)
+                       netif_start_subqueue(netdev, i);
+       }
+}
+
+static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
+{
+       return skb->queue_mapping % lio->linfo.num_txpciq;
+}
+
 #endif
index e07d2093b971326f25ea11af72525af9e9edb778..2766af05b89efcbc903f2b60b94b7ef21ad389f8 100644 (file)
@@ -366,6 +366,7 @@ int
 lio_process_iq_request_list(struct octeon_device *oct,
                            struct octeon_instr_queue *iq, u32 napi_budget)
 {
+       struct cavium_wq *cwq = &oct->dma_comp_wq;
        int reqtype;
        void *buf;
        u32 old = iq->flush_index;
@@ -450,6 +451,10 @@ lio_process_iq_request_list(struct octeon_device *oct,
                                                   bytes_compl);
        iq->flush_index = old;
 
+       if (atomic_read(&oct->response_list
+                       [OCTEON_ORDERED_SC_LIST].pending_req_count))
+               queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(1));
+
        return inst_count;
 }
 
index 3d691c69f74d1cf7567b2d58730dcfd656c0eefd..fe5b537005763cbed01d1cc998b26d2d02521095 100644 (file)
@@ -49,7 +49,6 @@ int octeon_setup_response_list(struct octeon_device *oct)
        INIT_DELAYED_WORK(&cwq->wk.work, oct_poll_req_completion);
        cwq->wk.ctxptr = oct;
        oct->cmd_resp_state = OCT_DRV_ONLINE;
-       queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
 
        return ret;
 }
@@ -164,5 +163,8 @@ static void oct_poll_req_completion(struct work_struct *work)
        struct cavium_wq *cwq = &oct->dma_comp_wq;
 
        lio_process_ordered_list(oct, 0);
-       queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
+
+       if (atomic_read(&oct->response_list
+                       [OCTEON_ORDERED_SC_LIST].pending_req_count))
+               queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(1));
 }
index a89721fad633ec75a3ed46a7fc22d8988cbdada4..080918af773cd1285accf3a08c95d8bd8391166b 100644 (file)
@@ -681,18 +681,18 @@ int t3_seeprom_wp(struct adapter *adapter, int enable)
        return t3_seeprom_write(adapter, EEPROM_STAT_ADDR, enable ? 0xc : 0);
 }
 
-static int vpdstrtouint(char *s, int len, unsigned int base, unsigned int *val)
+static int vpdstrtouint(char *s, u8 len, unsigned int base, unsigned int *val)
 {
-       char tok[len + 1];
+       char tok[256];
 
        memcpy(tok, s, len);
        tok[len] = 0;
        return kstrtouint(strim(tok), base, val);
 }
 
-static int vpdstrtou16(char *s, int len, unsigned int base, u16 *val)
+static int vpdstrtou16(char *s, u8 len, unsigned int base, u16 *val)
 {
-       char tok[len + 1];
+       char tok[256];
 
        memcpy(tok, s, len);
        tok[len] = 0;
index 53b6a02c778e35f327af8374a6ea783e127381d1..bea6a059a8f111672e4b23e867e5f5287b53321e 100644 (file)
@@ -6,7 +6,7 @@
 obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
 cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
-             cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
+             cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
              cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
              cudbg_common.o cudbg_lib.o cudbg_zlib.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
index d3fa53db61ee21b95664e7276080e7c033869605..688f95440af26c48d84d146ed98409e5e0856015 100644 (file)
@@ -390,6 +390,8 @@ struct adapter_params {
         * used by the Port
         */
        u8 mps_bg_map[MAX_NPORTS];      /* MPS Buffer Group Map */
+       bool write_w_imm_support;       /* FW supports WRITE_WITH_IMMEDIATE */
+       bool write_cmpl_support;        /* FW supports WRITE_CMPL */
 };
 
 /* State needed to monitor the forward progress of SGE Ingress DMA activities
@@ -831,6 +833,16 @@ struct vf_info {
        u16 vlan;
 };
 
+enum {
+       HMA_DMA_MAPPED_FLAG = 1
+};
+
+struct hma_data {
+       unsigned char flags;
+       struct sg_table *sgt;
+       dma_addr_t *phy_addr;   /* physical address of the page */
+};
+
 struct mbox_list {
        struct list_head list;
 };
@@ -907,6 +919,7 @@ struct adapter {
        struct work_struct tid_release_task;
        struct work_struct db_full_task;
        struct work_struct db_drop_task;
+       struct work_struct fatal_err_notify_task;
        bool tid_release_task_busy;
 
        /* lock for mailbox cmd list */
@@ -946,6 +959,11 @@ struct adapter {
 
        /* Ethtool Dump */
        struct ethtool_dump eth_dump;
+
+       /* HMA */
+       struct hma_data hma;
+
+       struct srq_data *srq;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
index 2822bbff73e8645ee6d5f0ff9cb0feb08246178f..de2ba86eccfd2236faca919d8fca734e00df44a9 100644 (file)
@@ -2617,7 +2617,7 @@ int mem_open(struct inode *inode, struct file *file)
 
        file->private_data = inode->i_private;
 
-       mem = (uintptr_t)file->private_data & 0x3;
+       mem = (uintptr_t)file->private_data & 0x7;
        adap = file->private_data - mem;
 
        (void)t4_fwcache(adap, FW_PARAM_DEV_FWCACHE_FLUSH);
@@ -2630,7 +2630,7 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
 {
        loff_t pos = *ppos;
        loff_t avail = file_inode(file)->i_size;
-       unsigned int mem = (uintptr_t)file->private_data & 3;
+       unsigned int mem = (uintptr_t)file->private_data & 0x7;
        struct adapter *adap = file->private_data - mem;
        __be32 *data;
        int ret;
@@ -3042,6 +3042,12 @@ int t4_setup_debugfs(struct adapter *adap)
                        add_debugfs_mem(adap, "mc", MEM_MC,
                                        EXT_MEM_SIZE_G(size));
                }
+
+               if (i & HMA_MUX_F) {
+                       size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+                       add_debugfs_mem(adap, "hma", MEM_HMA,
+                                       EXT_MEM1_SIZE_G(size));
+               }
        }
 
        de = debugfs_create_file_size("flash", S_IRUSR, adap->debugfs_root, adap,
index 7852d98bad75810d357d4155bbf129149cf88621..59d04d73c672614c4281f8865a559a0c3fb193e5 100644 (file)
@@ -597,22 +597,22 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
        case FW_PORT_TYPE_KR:
                SET_LMM(Backplane);
-               SET_LMM(10000baseKR_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
                break;
 
        case FW_PORT_TYPE_BP_AP:
                SET_LMM(Backplane);
-               SET_LMM(10000baseR_FEC);
-               SET_LMM(10000baseKR_Full);
-               SET_LMM(1000baseKX_Full);
+               FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
                break;
 
        case FW_PORT_TYPE_BP4_AP:
                SET_LMM(Backplane);
-               SET_LMM(10000baseR_FEC);
-               SET_LMM(10000baseKR_Full);
-               SET_LMM(1000baseKX_Full);
-               SET_LMM(10000baseKX4_Full);
+               FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
                break;
 
        case FW_PORT_TYPE_FIBER_XFI:
@@ -628,7 +628,9 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
        case FW_PORT_TYPE_BP40_BA:
        case FW_PORT_TYPE_QSFP:
                SET_LMM(FIBRE);
-               SET_LMM(40000baseSR4_Full);
+               FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+               FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
                break;
 
        case FW_PORT_TYPE_CR_QSFP:
@@ -655,12 +657,14 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
        case FW_PORT_TYPE_CR2_QSFP:
                SET_LMM(FIBRE);
-               SET_LMM(50000baseSR2_Full);
+               FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
                break;
 
        case FW_PORT_TYPE_KR4_100G:
        case FW_PORT_TYPE_CR4_QSFP:
                SET_LMM(FIBRE);
+               FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
                FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
                FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
                FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
index 3177b0c9bd2db38dc79b3a51e0dd478388196796..db92f1858060ec685d7b59740ada8422097f178f 100644 (file)
@@ -1335,12 +1335,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
                return ret;
        }
 
-       /* Clear out any old resources being used by the filter before
-        * we start constructing the new filter.
-        */
-       if (f->valid)
-               clear_filter(adapter, f);
-
        if (is_t6(adapter->params.chip) && fs->type &&
            ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
            IPV6_ADDR_ANY) {
index 7b452e85de2ad18a5613ac41687a9eaeab872f9f..57d38f8ed455108ae9e5382724805a7a9149a22a 100644 (file)
@@ -75,6 +75,7 @@
 #include "t4fw_api.h"
 #include "t4fw_version.h"
 #include "cxgb4_dcb.h"
+#include "srq.h"
 #include "cxgb4_debugfs.h"
 #include "clip_tbl.h"
 #include "l2t.h"
@@ -210,6 +211,9 @@ static void link_report(struct net_device *dev)
                case 40000:
                        s = "40Gbps";
                        break;
+               case 50000:
+                       s = "50Gbps";
+                       break;
                case 100000:
                        s = "100Gbps";
                        break;
@@ -583,6 +587,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
                const struct cpl_abort_rpl_rss *p = (void *)rsp;
 
                hash_del_filter_rpl(q->adap, p);
+       } else if (opcode == CPL_SRQ_TABLE_RPL) {
+               const struct cpl_srq_table_rpl *p = (void *)rsp;
+
+               do_srq_table_rpl(q->adap, p);
        } else
                dev_err(q->adap->pdev_dev,
                        "unexpected CPL %#x on FW event queue\n", opcode);
@@ -833,8 +841,6 @@ static int setup_fw_sge_queues(struct adapter *adap)
 
        err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
                               adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
-       if (err)
-               t4_free_sge_resources(adap);
        return err;
 }
 
@@ -1733,10 +1739,11 @@ EXPORT_SYMBOL(cxgb4_sync_txq_pidx);
 
 int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 {
-       struct adapter *adap;
-       u32 offset, memtype, memaddr;
        u32 edc0_size, edc1_size, mc0_size, mc1_size, size;
        u32 edc0_end, edc1_end, mc0_end, mc1_end;
+       u32 offset, memtype, memaddr;
+       struct adapter *adap;
+       u32 hma_size = 0;
        int ret;
 
        adap = netdev2adap(dev);
@@ -1756,6 +1763,10 @@ int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
        size = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
        mc0_size = EXT_MEM0_SIZE_G(size) << 20;
 
+       if (t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A) & HMA_MUX_F) {
+               size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+               hma_size = EXT_MEM1_SIZE_G(size) << 20;
+       }
        edc0_end = edc0_size;
        edc1_end = edc0_end + edc1_size;
        mc0_end = edc1_end + mc0_size;
@@ -1767,7 +1778,10 @@ int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
                memtype = MEM_EDC1;
                memaddr = offset - edc0_end;
        } else {
-               if (offset < mc0_end) {
+               if (hma_size && (offset < (edc1_end + hma_size))) {
+                       memtype = MEM_HMA;
+                       memaddr = offset - edc1_end;
+               } else if (offset < mc0_end) {
                        memtype = MEM_MC0;
                        memaddr = offset - edc1_end;
                } else if (is_t5(adap->params.chip)) {
@@ -2681,13 +2695,17 @@ static int cxgb4_mgmt_get_vf_config(struct net_device *dev,
 {
        struct port_info *pi = netdev_priv(dev);
        struct adapter *adap = pi->adapter;
+       struct vf_info *vfinfo;
 
        if (vf >= adap->num_vfs)
                return -EINVAL;
+       vfinfo = &adap->vfinfo[vf];
+
        ivi->vf = vf;
-       ivi->max_tx_rate = adap->vfinfo[vf].tx_rate;
+       ivi->max_tx_rate = vfinfo->tx_rate;
        ivi->min_tx_rate = 0;
-       ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr);
+       ether_addr_copy(ivi->mac, vfinfo->vf_mac_addr);
+       ivi->vlan = vfinfo->vlan;
        return 0;
 }
 
@@ -2870,11 +2888,11 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
        /* Convert from Mbps to Kbps */
        req_rate = rate << 10;
 
-       /* Max rate is 10 Gbps */
+       /* Max rate is 100 Gbps */
        if (req_rate >= SCHED_MAX_RATE_KBPS) {
                dev_err(adap->pdev_dev,
-                       "Invalid rate %u Mbps, Max rate is %u Gbps\n",
-                       rate, SCHED_MAX_RATE_KBPS);
+                       "Invalid rate %u Mbps, Max rate is %u Mbps\n",
+                       rate, SCHED_MAX_RATE_KBPS >> 10);
                return -ERANGE;
        }
 
@@ -3244,6 +3262,14 @@ static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
        .get_drvinfo       = cxgb4_mgmt_get_drvinfo,
 };
 
+static void notify_fatal_err(struct work_struct *work)
+{
+       struct adapter *adap;
+
+       adap = container_of(work, struct adapter, fatal_err_notify_task);
+       notify_ulds(adap, CXGB4_STATE_FATAL_ERROR);
+}
+
 void t4_fatal_err(struct adapter *adap)
 {
        int port;
@@ -3268,6 +3294,7 @@ void t4_fatal_err(struct adapter *adap)
                netif_carrier_off(dev);
        }
        dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
+       queue_work(adap->workq, &adap->fatal_err_notify_task);
 }
 
 static void setup_memwin(struct adapter *adap)
@@ -3298,6 +3325,206 @@ static void setup_memwin_rdma(struct adapter *adap)
        }
 }
 
+/* HMA Definitions */
+
+/* The maximum number of address that can be send in a single FW cmd */
+#define HMA_MAX_ADDR_IN_CMD    5
+
+#define HMA_PAGE_SIZE          PAGE_SIZE
+
+#define HMA_MAX_NO_FW_ADDRESS  (16 << 10)  /* FW supports 16K addresses */
+
+#define HMA_PAGE_ORDER                                 \
+       ((HMA_PAGE_SIZE < HMA_MAX_NO_FW_ADDRESS) ?      \
+       ilog2(HMA_MAX_NO_FW_ADDRESS / HMA_PAGE_SIZE) : 0)
+
+/* The minimum and maximum possible HMA sizes that can be specified in the FW
+ * configuration(in units of MB).
+ */
+#define HMA_MIN_TOTAL_SIZE     1
+#define HMA_MAX_TOTAL_SIZE                             \
+       (((HMA_PAGE_SIZE << HMA_PAGE_ORDER) *           \
+         HMA_MAX_NO_FW_ADDRESS) >> 20)
+
+static void adap_free_hma_mem(struct adapter *adapter)
+{
+       struct scatterlist *iter;
+       struct page *page;
+       int i;
+
+       if (!adapter->hma.sgt)
+               return;
+
+       if (adapter->hma.flags & HMA_DMA_MAPPED_FLAG) {
+               dma_unmap_sg(adapter->pdev_dev, adapter->hma.sgt->sgl,
+                            adapter->hma.sgt->nents, PCI_DMA_BIDIRECTIONAL);
+               adapter->hma.flags &= ~HMA_DMA_MAPPED_FLAG;
+       }
+
+       for_each_sg(adapter->hma.sgt->sgl, iter,
+                   adapter->hma.sgt->orig_nents, i) {
+               page = sg_page(iter);
+               if (page)
+                       __free_pages(page, HMA_PAGE_ORDER);
+       }
+
+       kfree(adapter->hma.phy_addr);
+       sg_free_table(adapter->hma.sgt);
+       kfree(adapter->hma.sgt);
+       adapter->hma.sgt = NULL;
+}
+
+static int adap_config_hma(struct adapter *adapter)
+{
+       struct scatterlist *sgl, *iter;
+       struct sg_table *sgt;
+       struct page *newpage;
+       unsigned int i, j, k;
+       u32 param, hma_size;
+       unsigned int ncmds;
+       size_t page_size;
+       u32 page_order;
+       int node, ret;
+
+       /* HMA is supported only for T6+ cards.
+        * Avoid initializing HMA in kdump kernels.
+        */
+       if (is_kdump_kernel() ||
+           CHELSIO_CHIP_VERSION(adapter->params.chip) < CHELSIO_T6)
+               return 0;
+
+       /* Get the HMA region size required by fw */
+       param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+                FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HMA_SIZE));
+       ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+                             1, &param, &hma_size);
+       /* An error means card has its own memory or HMA is not supported by
+        * the firmware. Return without any errors.
+        */
+       if (ret || !hma_size)
+               return 0;
+
+       if (hma_size < HMA_MIN_TOTAL_SIZE ||
+           hma_size > HMA_MAX_TOTAL_SIZE) {
+               dev_err(adapter->pdev_dev,
+                       "HMA size %uMB beyond bounds(%u-%lu)MB\n",
+                       hma_size, HMA_MIN_TOTAL_SIZE, HMA_MAX_TOTAL_SIZE);
+               return -EINVAL;
+       }
+
+       page_size = HMA_PAGE_SIZE;
+       page_order = HMA_PAGE_ORDER;
+       adapter->hma.sgt = kzalloc(sizeof(*adapter->hma.sgt), GFP_KERNEL);
+       if (unlikely(!adapter->hma.sgt)) {
+               dev_err(adapter->pdev_dev, "HMA SG table allocation failed\n");
+               return -ENOMEM;
+       }
+       sgt = adapter->hma.sgt;
+       /* FW returned value will be in MB's
+        */
+       sgt->orig_nents = (hma_size << 20) / (page_size << page_order);
+       if (sg_alloc_table(sgt, sgt->orig_nents, GFP_KERNEL)) {
+               dev_err(adapter->pdev_dev, "HMA SGL allocation failed\n");
+               kfree(adapter->hma.sgt);
+               adapter->hma.sgt = NULL;
+               return -ENOMEM;
+       }
+
+       sgl = adapter->hma.sgt->sgl;
+       node = dev_to_node(adapter->pdev_dev);
+       for_each_sg(sgl, iter, sgt->orig_nents, i) {
+               newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL,
+                                          page_order);
+               if (!newpage) {
+                       dev_err(adapter->pdev_dev,
+                               "Not enough memory for HMA page allocation\n");
+                       ret = -ENOMEM;
+                       goto free_hma;
+               }
+               sg_set_page(iter, newpage, page_size << page_order, 0);
+       }
+
+       sgt->nents = dma_map_sg(adapter->pdev_dev, sgl, sgt->orig_nents,
+                               DMA_BIDIRECTIONAL);
+       if (!sgt->nents) {
+               dev_err(adapter->pdev_dev,
+                       "Not enough memory for HMA DMA mapping");
+               ret = -ENOMEM;
+               goto free_hma;
+       }
+       adapter->hma.flags |= HMA_DMA_MAPPED_FLAG;
+
+       adapter->hma.phy_addr = kcalloc(sgt->nents, sizeof(dma_addr_t),
+                                       GFP_KERNEL);
+       if (unlikely(!adapter->hma.phy_addr))
+               goto free_hma;
+
+       for_each_sg(sgl, iter, sgt->nents, i) {
+               newpage = sg_page(iter);
+               adapter->hma.phy_addr[i] = sg_dma_address(iter);
+       }
+
+       ncmds = DIV_ROUND_UP(sgt->nents, HMA_MAX_ADDR_IN_CMD);
+       /* Pass on the addresses to firmware */
+       for (i = 0, k = 0; i < ncmds; i++, k += HMA_MAX_ADDR_IN_CMD) {
+               struct fw_hma_cmd hma_cmd;
+               u8 naddr = HMA_MAX_ADDR_IN_CMD;
+               u8 soc = 0, eoc = 0;
+               u8 hma_mode = 1; /* Presently we support only Page table mode */
+
+               soc = (i == 0) ? 1 : 0;
+               eoc = (i == ncmds - 1) ? 1 : 0;
+
+               /* For last cmd, set naddr corresponding to remaining
+                * addresses
+                */
+               if (i == ncmds - 1) {
+                       naddr = sgt->nents % HMA_MAX_ADDR_IN_CMD;
+                       naddr = naddr ? naddr : HMA_MAX_ADDR_IN_CMD;
+               }
+               memset(&hma_cmd, 0, sizeof(hma_cmd));
+               hma_cmd.op_pkd = htonl(FW_CMD_OP_V(FW_HMA_CMD) |
+                                      FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
+               hma_cmd.retval_len16 = htonl(FW_LEN16(hma_cmd));
+
+               hma_cmd.mode_to_pcie_params =
+                       htonl(FW_HMA_CMD_MODE_V(hma_mode) |
+                             FW_HMA_CMD_SOC_V(soc) | FW_HMA_CMD_EOC_V(eoc));
+
+               /* HMA cmd size specified in MB's */
+               hma_cmd.naddr_size =
+                       htonl(FW_HMA_CMD_SIZE_V(hma_size) |
+                             FW_HMA_CMD_NADDR_V(naddr));
+
+               /* Total Page size specified in units of 4K */
+               hma_cmd.addr_size_pkd =
+                       htonl(FW_HMA_CMD_ADDR_SIZE_V
+                               ((page_size << page_order) >> 12));
+
+               /* Fill the 5 addresses */
+               for (j = 0; j < naddr; j++) {
+                       hma_cmd.phy_address[j] =
+                               cpu_to_be64(adapter->hma.phy_addr[j + k]);
+               }
+               ret = t4_wr_mbox(adapter, adapter->mbox, &hma_cmd,
+                                sizeof(hma_cmd), &hma_cmd);
+               if (ret) {
+                       dev_err(adapter->pdev_dev,
+                               "HMA FW command failed with err %d\n", ret);
+                       goto free_hma;
+               }
+       }
+
+       if (!ret)
+               dev_info(adapter->pdev_dev,
+                        "Reserved %uMB host memory for HMA\n", hma_size);
+       return ret;
+
+free_hma:
+       adap_free_hma_mem(adapter);
+       return ret;
+}
+
 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
 {
        u32 v;
@@ -3751,6 +3978,12 @@ static int adap_init0_config(struct adapter *adapter, int reset)
        if (ret < 0)
                goto bye;
 
+       /* We will proceed even if HMA init fails. */
+       ret = adap_config_hma(adapter);
+       if (ret)
+               dev_err(adapter->pdev_dev,
+                       "HMA configuration failed with error %d\n", ret);
+
        /*
         * And finally tell the firmware to initialize itself using the
         * parameters from the Configuration File.
@@ -3957,6 +4190,11 @@ static int adap_init0(struct adapter *adap)
         * effect. Otherwise, it's time to try initializing the adapter.
         */
        if (state == DEV_STATE_INIT) {
+               ret = adap_config_hma(adap);
+               if (ret)
+                       dev_err(adap->pdev_dev,
+                               "HMA configuration failed with error %d\n",
+                               ret);
                dev_info(adap->pdev_dev, "Coming up as %s: "\
                         "Adapter already initialized\n",
                         adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
@@ -4236,6 +4474,20 @@ static int adap_init0(struct adapter *adap)
                adap->vres.pbl.start = val[4];
                adap->vres.pbl.size = val[5] - val[4] + 1;
 
+               params[0] = FW_PARAM_PFVF(SRQ_START);
+               params[1] = FW_PARAM_PFVF(SRQ_END);
+               ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+                                     params, val);
+               if (!ret) {
+                       adap->vres.srq.start = val[0];
+                       adap->vres.srq.size = val[1] - val[0] + 1;
+               }
+               if (adap->vres.srq.size) {
+                       adap->srq = t4_init_srq(adap->vres.srq.size);
+                       if (!adap->srq)
+                               dev_warn(&adap->pdev->dev, "could not allocate SRQ, continuing\n");
+               }
+
                params[0] = FW_PARAM_PFVF(SQRQ_START);
                params[1] = FW_PARAM_PFVF(SQRQ_END);
                params[2] = FW_PARAM_PFVF(CQ_START);
@@ -4269,6 +4521,18 @@ static int adap_init0(struct adapter *adap)
                         "max_ordird_qp %d max_ird_adapter %d\n",
                         adap->params.max_ordird_qp,
                         adap->params.max_ird_adapter);
+
+               /* Enable write_with_immediate if FW supports it */
+               params[0] = FW_PARAM_DEV(RDMA_WRITE_WITH_IMM);
+               ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+                                     val);
+               adap->params.write_w_imm_support = (ret == 0 && val[0] != 0);
+
+               /* Enable write_cmpl if FW supports it */
+               params[0] = FW_PARAM_DEV(RI_WRITE_CMPL_WR);
+               ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+                                     val);
+               adap->params.write_cmpl_support = (ret == 0 && val[0] != 0);
                adap->num_ofld_uld += 2;
        }
        if (caps_cmd.iscsicaps) {
@@ -4346,6 +4610,7 @@ static int adap_init0(struct adapter *adap)
         * happened to HW/FW, stop issuing commands.
         */
 bye:
+       adap_free_hma_mem(adap);
        kfree(adap->sge.egr_map);
        kfree(adap->sge.ingr_map);
        kfree(adap->sge.starving_fl);
@@ -4903,6 +5168,7 @@ static void free_some_resources(struct adapter *adapter)
 
        kvfree(adapter->smt);
        kvfree(adapter->l2t);
+       kvfree(adapter->srq);
        t4_cleanup_sched(adapter);
        kvfree(adapter->tids.tid_tab);
        cxgb4_cleanup_tc_flower(adapter);
@@ -4970,7 +5236,6 @@ static void cxgb4_mgmt_setup(struct net_device *dev)
        /* Initialize the device structure. */
        dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
        dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
-       dev->needs_free_netdev = true;
 }
 
 static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
@@ -5181,6 +5446,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        adapter->name = pci_name(pdev);
        adapter->mbox = func;
        adapter->pf = func;
+       adapter->params.chip = chip;
+       adapter->adap_idx = adap_idx;
        adapter->msg_enable = DFLT_MSG_ENABLE;
        adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
                                    (sizeof(struct mbox_cmd) *
@@ -5256,6 +5523,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
        INIT_WORK(&adapter->db_full_task, process_db_full);
        INIT_WORK(&adapter->db_drop_task, process_db_drop);
+       INIT_WORK(&adapter->fatal_err_notify_task, notify_fatal_err);
 
        err = t4_prep_adapter(adapter);
        if (err)
@@ -5473,6 +5741,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (err)
                goto out_free_dev;
 
+       err = setup_fw_sge_queues(adapter);
+       if (err) {
+               dev_err(adapter->pdev_dev,
+                       "FW sge queue allocation failed, err %d", err);
+               goto out_free_dev;
+       }
+
        /*
         * The card is now ready to go.  If any errors occur during device
         * registration we do not fail the whole card but rather proceed only
@@ -5521,10 +5796,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                cxgb4_ptp_init(adapter);
 
        print_adapter_info(adapter);
-       setup_fw_sge_queues(adapter);
        return 0;
 
  out_free_dev:
+       t4_free_sge_resources(adapter);
        free_some_resources(adapter);
        if (adapter->flags & USING_MSIX)
                free_msix_info(adapter);
@@ -5573,6 +5848,8 @@ static void remove_one(struct pci_dev *pdev)
                        t4_uld_clean_up(adapter);
                }
 
+               adap_free_hma_mem(adapter);
+
                disable_interrupts(adapter);
 
                for_each_port(adapter, i)
index 6b5fea4532f363f4c3b8708e777b4f0cdf38f6fd..a95cde0fadf77345d808cf65f8d9e33ba095c24e 100644 (file)
@@ -342,6 +342,7 @@ static void free_queues_uld(struct adapter *adap, unsigned int uld_type)
 {
        struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 
+       adap->sge.uld_rxq_info[uld_type] = NULL;
        kfree(rxq_info->rspq_id);
        kfree(rxq_info->uldrxq);
        kfree(rxq_info);
@@ -665,6 +666,8 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
        lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
        lld->nodeid = dev_to_node(adap->pdev_dev);
        lld->fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support;
+       lld->write_w_imm_support = adap->params.write_w_imm_support;
+       lld->write_cmpl_support = adap->params.write_cmpl_support;
 }
 
 static void uld_attach(struct adapter *adap, unsigned int uld)
index a14e8db51cdc17c1e091eb4e18746f50193200f1..b0ca06edaa7c021f99d27fcc00bd8011df2f86e7 100644 (file)
@@ -257,7 +257,8 @@ enum cxgb4_state {
        CXGB4_STATE_UP,
        CXGB4_STATE_START_RECOVERY,
        CXGB4_STATE_DOWN,
-       CXGB4_STATE_DETACH
+       CXGB4_STATE_DETACH,
+       CXGB4_STATE_FATAL_ERROR
 };
 
 enum cxgb4_control {
@@ -283,6 +284,7 @@ struct cxgb4_virt_res {                      /* virtualized HW resources */
        struct cxgb4_range iscsi;
        struct cxgb4_range stag;
        struct cxgb4_range rq;
+       struct cxgb4_range srq;
        struct cxgb4_range pbl;
        struct cxgb4_range qp;
        struct cxgb4_range cq;
@@ -352,6 +354,8 @@ struct cxgb4_lld_info {
        void **iscsi_ppm;                    /* iscsi page pod manager */
        int nodeid;                          /* device numa node id */
        bool fr_nsmr_tpte_wr_support;        /* FW supports FR_NSMR_TPTE_WR */
+       bool write_w_imm_support;         /* FW supports WRITE_WITH_IMMEDIATE */
+       bool write_cmpl_support;             /* FW supports WRITE_CMPL WR */
 };
 
 struct cxgb4_uld_info {
index 77b2b3fd9021c7a4a4bdc7dd8e18079dc20f784d..3a49e00a38a1dcdd183c45bae8b898d63aa1d4de 100644 (file)
@@ -42,8 +42,8 @@
 
 #define FW_SCHED_CLS_NONE 0xffffffff
 
-/* Max rate that can be set to a scheduling class is 10 Gbps */
-#define SCHED_MAX_RATE_KBPS 10000000U
+/* Max rate that can be set to a scheduling class is 100 Gbps */
+#define SCHED_MAX_RATE_KBPS 100000000U
 
 enum {
        SCHED_STATE_ACTIVE,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.c b/drivers/net/ethernet/chelsio/cxgb4/srq.c
new file mode 100644 (file)
index 0000000..6228a57
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * This file is part of the Chelsio T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017-2018 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cxgb4.h"
+#include "t4_msg.h"
+#include "srq.h"
+
+struct srq_data *t4_init_srq(int srq_size)
+{
+       struct srq_data *s;
+
+       s = kvzalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+               return NULL;
+
+       s->srq_size = srq_size;
+       init_completion(&s->comp);
+       mutex_init(&s->lock);
+
+       return s;
+}
+
+/* cxgb4_get_srq_entry: read the SRQ table entry
+ * @dev: Pointer to the net_device
+ * @idx: Index to the srq
+ * @entryp: pointer to the srq entry
+ *
+ * Sends CPL_SRQ_TABLE_REQ message for the given index.
+ * Contents will be returned in CPL_SRQ_TABLE_RPL message.
+ *
+ * Returns zero if the read is successful, else a error
+ * number will be returned. Caller should not use the srq
+ * entry if the return value is non-zero.
+ *
+ *
+ */
+int cxgb4_get_srq_entry(struct net_device *dev,
+                       int srq_idx, struct srq_entry *entryp)
+{
+       struct cpl_srq_table_req *req;
+       struct adapter *adap;
+       struct sk_buff *skb;
+       struct srq_data *s;
+       int rc = -ENODEV;
+
+       adap = netdev2adap(dev);
+       s = adap->srq;
+
+       if (!(adap->flags & FULL_INIT_DONE) || !s)
+               goto out;
+
+       skb = alloc_skb(sizeof(*req), GFP_KERNEL);
+       if (!skb)
+               return -ENOMEM;
+       req = (struct cpl_srq_table_req *)
+               __skb_put(skb, sizeof(*req));
+       memset(req, 0, sizeof(*req));
+       INIT_TP_WR(req, 0);
+       OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SRQ_TABLE_REQ,
+                                             TID_TID_V(srq_idx) |
+                               TID_QID_V(adap->sge.fw_evtq.abs_id)));
+       req->idx = srq_idx;
+
+       mutex_lock(&s->lock);
+
+       s->entryp = entryp;
+       t4_mgmt_tx(adap, skb);
+
+       rc = wait_for_completion_timeout(&s->comp, SRQ_WAIT_TO);
+       if (rc)
+               rc = 0;
+       else /* !rc means we timed out */
+               rc = -ETIMEDOUT;
+
+       WARN_ON_ONCE(entryp->idx != srq_idx);
+       mutex_unlock(&s->lock);
+out:
+       return rc;
+}
+EXPORT_SYMBOL(cxgb4_get_srq_entry);
+
+void do_srq_table_rpl(struct adapter *adap,
+                     const struct cpl_srq_table_rpl *rpl)
+{
+       unsigned int idx = TID_TID_G(GET_TID(rpl));
+       struct srq_data *s = adap->srq;
+       struct srq_entry *e;
+
+       if (unlikely(rpl->status != CPL_CONTAINS_READ_RPL)) {
+               dev_err(adap->pdev_dev,
+                       "Unexpected SRQ_TABLE_RPL status %u for entry %u\n",
+                               rpl->status, idx);
+               goto out;
+       }
+
+       /* Store the read entry */
+       e = s->entryp;
+       e->valid = 1;
+       e->idx = idx;
+       e->pdid = SRQT_PDID_G(be64_to_cpu(rpl->rsvd_pdid));
+       e->qlen = SRQT_QLEN_G(be32_to_cpu(rpl->qlen_qbase));
+       e->qbase = SRQT_QBASE_G(be32_to_cpu(rpl->qlen_qbase));
+       e->cur_msn = be16_to_cpu(rpl->cur_msn);
+       e->max_msn = be16_to_cpu(rpl->max_msn);
+out:
+       complete(&s->comp);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.h b/drivers/net/ethernet/chelsio/cxgb4/srq.h
new file mode 100644 (file)
index 0000000..ec85cf9
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * This file is part of the Chelsio T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017-2018 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_SRQ_H
+#define __CXGB4_SRQ_H
+
+struct adapter;
+struct cpl_srq_table_rpl;
+
+#define SRQ_WAIT_TO    (HZ * 5)
+
+struct srq_entry {
+       u8 valid;
+       u8 idx;
+       u8 qlen;
+       u16 pdid;
+       u16 cur_msn;
+       u16 max_msn;
+       u32 qbase;
+};
+
+struct srq_data {
+       unsigned int srq_size;
+       struct srq_entry *entryp;
+       struct completion comp;
+       struct mutex lock; /* generic mutex for srq data */
+};
+
+struct srq_data *t4_init_srq(int srq_size);
+int cxgb4_get_srq_entry(struct net_device *dev,
+                       int srq_idx, struct srq_entry *entryp);
+void do_srq_table_rpl(struct adapter *adap,
+                     const struct cpl_srq_table_rpl *rpl);
+#endif  /* __CXGB4_SRQ_H */
index bd41f93f73edc96b27430769517d81308769d21e..7cb3ef466cc7799eea7a9e7cb85465d40b5619e8 100644 (file)
@@ -487,7 +487,7 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
  * t4_memory_rw_init - Get memory window relative offset, base, and size.
  * @adap: the adapter
  * @win: PCI-E Memory Window to use
- * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
+ * @mtype: memory type: MEM_EDC0, MEM_EDC1, MEM_HMA or MEM_MC
  * @mem_off: memory relative offset with respect to @mtype.
  * @mem_base: configured memory base address.
  * @mem_aperture: configured memory window aperture.
@@ -4066,8 +4066,6 @@ int t4_link_l1cfg(struct adapter *adapter, unsigned int mbox,
        unsigned int fw_mdi = FW_PORT_CAP32_MDI_V(FW_PORT_CAP32_MDI_AUTO);
        fw_port_cap32_t fw_fc, cc_fec, fw_fec, rcap;
 
-       lc->link_ok = 0;
-
        /* Convert driver coding of Pause Frame Flow Control settings into the
         * Firmware's API.
         */
@@ -6091,6 +6089,7 @@ unsigned int t4_get_tp_ch_map(struct adapter *adap, int pidx)
 
        case CHELSIO_T6:
                switch (nports) {
+               case 1:
                case 2: return 1 << pidx;
                }
                break;
@@ -8605,6 +8604,25 @@ static int t4_get_flash_params(struct adapter *adap)
                }
                break;
        }
+       case 0x9d: { /* ISSI -- Integrated Silicon Solution, Inc. */
+               /* This Density -> Size decoding table is taken from ISSI
+                * Data Sheets.
+                */
+               density = (flashid >> 16) & 0xff;
+               switch (density) {
+               case 0x16: /* 32 MB */
+                       size = 1 << 25;
+                       break;
+               case 0x17: /* 64MB */
+                       size = 1 << 26;
+                       break;
+               default:
+                       dev_err(adap->pdev_dev, "ISSI Flash Part has bad size, ID = %#x, Density code = %#x\n",
+                               flashid, density);
+                       return -EINVAL;
+               }
+               break;
+       }
        case 0xc2: { /* Macronix */
                /* This Density -> Size decoding table is taken from Macronix
                 * Data Sheets.
index d0db4427b77e43bcf2eed98f62a00672a516388f..5e8f5ca8e3ee38bce784e7b1d37f1488f77e960c 100644 (file)
@@ -52,6 +52,7 @@ enum {
        CPL_L2T_WRITE_REQ     = 0x12,
        CPL_SMT_WRITE_REQ     = 0x14,
        CPL_TID_RELEASE       = 0x1A,
+       CPL_SRQ_TABLE_REQ     = 0x1C,
        CPL_TX_DATA_ISO       = 0x1F,
 
        CPL_CLOSE_LISTSRV_RPL = 0x20,
@@ -102,6 +103,7 @@ enum {
        CPL_FW4_MSG           = 0xC0,
        CPL_FW4_PLD           = 0xC1,
        CPL_FW4_ACK           = 0xC3,
+       CPL_SRQ_TABLE_RPL     = 0xCC,
 
        CPL_RX_PHYS_DSGL      = 0xD0,
 
@@ -136,6 +138,8 @@ enum CPL_error {
        CPL_ERR_KEEPALV_NEG_ADVICE = 37,
        CPL_ERR_ABORT_FAILED       = 42,
        CPL_ERR_IWARP_FLM          = 50,
+       CPL_CONTAINS_READ_RPL      = 60,
+       CPL_CONTAINS_WRITE_RPL     = 61,
 };
 
 enum {
@@ -198,6 +202,7 @@ union opcode_tid {
 /* partitioning of TID fields that also carry a queue id */
 #define TID_TID_S    0
 #define TID_TID_M    0x3fff
+#define TID_TID_V(x) ((x) << TID_TID_S)
 #define TID_TID_G(x) (((x) >> TID_TID_S) & TID_TID_M)
 
 #define TID_QID_S    14
@@ -743,6 +748,22 @@ struct cpl_abort_req_rss {
        u8 status;
 };
 
+struct cpl_abort_req_rss6 {
+       WR_HDR;
+       union opcode_tid ot;
+       __u32 srqidx_status;
+};
+
+#define ABORT_RSS_STATUS_S    0
+#define ABORT_RSS_STATUS_M    0xff
+#define ABORT_RSS_STATUS_V(x) ((x) << ABORT_RSS_STATUS_S)
+#define ABORT_RSS_STATUS_G(x) (((x) >> ABORT_RSS_STATUS_S) & ABORT_RSS_STATUS_M)
+
+#define ABORT_RSS_SRQIDX_S    8
+#define ABORT_RSS_SRQIDX_M    0xffffff
+#define ABORT_RSS_SRQIDX_V(x) ((x) << ABORT_RSS_SRQIDX_S)
+#define ABORT_RSS_SRQIDX_G(x) (((x) >> ABORT_RSS_SRQIDX_S) & ABORT_RSS_SRQIDX_M)
+
 struct cpl_abort_req {
        WR_HDR;
        union opcode_tid ot;
@@ -758,6 +779,11 @@ struct cpl_abort_rpl_rss {
        u8 status;
 };
 
+struct cpl_abort_rpl_rss6 {
+       union opcode_tid ot;
+       __u32 srqidx_status;
+};
+
 struct cpl_abort_rpl {
        WR_HDR;
        union opcode_tid ot;
@@ -2112,4 +2138,49 @@ enum {
        X_CPL_RX_MPS_PKT_TYPE_QFC   = 1 << 2,
        X_CPL_RX_MPS_PKT_TYPE_PTP   = 1 << 3
 };
+
+struct cpl_srq_table_req {
+       WR_HDR;
+       union opcode_tid ot;
+       __u8 status;
+       __u8 rsvd[2];
+       __u8 idx;
+       __be64 rsvd_pdid;
+       __be32 qlen_qbase;
+       __be16 cur_msn;
+       __be16 max_msn;
+};
+
+struct cpl_srq_table_rpl {
+       union opcode_tid ot;
+       __u8 status;
+       __u8 rsvd[2];
+       __u8 idx;
+       __be64 rsvd_pdid;
+       __be32 qlen_qbase;
+       __be16 cur_msn;
+       __be16 max_msn;
+};
+
+/* cpl_srq_table_{req,rpl}.params fields */
+#define SRQT_QLEN_S   28
+#define SRQT_QLEN_M   0xF
+#define SRQT_QLEN_V(x) ((x) << SRQT_QLEN_S)
+#define SRQT_QLEN_G(x) (((x) >> SRQT_QLEN_S) & SRQT_QLEN_M)
+
+#define SRQT_QBASE_S    0
+#define SRQT_QBASE_M   0x3FFFFFF
+#define SRQT_QBASE_V(x) ((x) << SRQT_QBASE_S)
+#define SRQT_QBASE_G(x) (((x) >> SRQT_QBASE_S) & SRQT_QBASE_M)
+
+#define SRQT_PDID_S    0
+#define SRQT_PDID_M   0xFF
+#define SRQT_PDID_V(x) ((x) << SRQT_PDID_S)
+#define SRQT_PDID_G(x) (((x) >> SRQT_PDID_S) & SRQT_PDID_M)
+
+#define SRQT_IDX_S    0
+#define SRQT_IDX_M    0xF
+#define SRQT_IDX_V(x) ((x) << SRQT_IDX_S)
+#define SRQT_IDX_G(x) (((x) >> SRQT_IDX_S) & SRQT_IDX_M)
+
 #endif  /* __T4_MSG_H */
index 0d83b4064a78522ff3651082814a1a680b1e691a..544757f6ab3a5670af38195a021f08047ce00707 100644 (file)
@@ -101,6 +101,7 @@ enum fw_wr_opcodes {
        FW_RI_BIND_MW_WR               = 0x18,
        FW_RI_FR_NSMR_WR               = 0x19,
        FW_RI_FR_NSMR_TPTE_WR          = 0x20,
+       FW_RI_RDMA_WRITE_CMPL_WR       = 0x21,
        FW_RI_INV_LSTAG_WR             = 0x1a,
        FW_ISCSI_TX_DATA_WR            = 0x45,
        FW_PTP_TX_PKT_WR               = 0x46,
@@ -766,6 +767,7 @@ enum fw_cmd_opcodes {
        FW_DEVLOG_CMD                  = 0x25,
        FW_CLIP_CMD                    = 0x28,
        FW_PTP_CMD                     = 0x3e,
+       FW_HMA_CMD                     = 0x3f,
        FW_LASTC2E_CMD                 = 0x40,
        FW_ERROR_CMD                   = 0x80,
        FW_DEBUG_CMD                   = 0x81,
@@ -1132,6 +1134,7 @@ enum fw_memtype_cf {
        FW_MEMTYPE_CF_FLASH             = 0x4,
        FW_MEMTYPE_CF_INTERNAL          = 0x5,
        FW_MEMTYPE_CF_EXTMEM1           = 0x6,
+       FW_MEMTYPE_CF_HMA               = 0x7,
 };
 
 struct fw_caps_config_cmd {
@@ -1210,6 +1213,9 @@ enum fw_params_param_dev {
        FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR  = 0x1C,
        FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
        FW_PARAMS_PARAM_DEV_MPSBGMAP    = 0x1E,
+       FW_PARAMS_PARAM_DEV_HMA_SIZE    = 0x20,
+       FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
+       FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR    = 0x24,
 };
 
 /*
@@ -1241,6 +1247,8 @@ enum fw_params_param_pfvf {
        FW_PARAMS_PARAM_PFVF_SQRQ_END   = 0x16,
        FW_PARAMS_PARAM_PFVF_CQ_START   = 0x17,
        FW_PARAMS_PARAM_PFVF_CQ_END     = 0x18,
+       FW_PARAMS_PARAM_PFVF_SRQ_START  = 0x19,
+       FW_PARAMS_PARAM_PFVF_SRQ_END    = 0x1A,
        FW_PARAMS_PARAM_PFVF_SCHEDCLASS_ETH = 0x20,
        FW_PARAMS_PARAM_PFVF_VIID       = 0x24,
        FW_PARAMS_PARAM_PFVF_CPMASK     = 0x25,
@@ -3435,6 +3443,59 @@ struct fw_debug_cmd {
 #define FW_DEBUG_CMD_TYPE_G(x) \
        (((x) >> FW_DEBUG_CMD_TYPE_S) & FW_DEBUG_CMD_TYPE_M)
 
+struct fw_hma_cmd {
+       __be32 op_pkd;
+       __be32 retval_len16;
+       __be32 mode_to_pcie_params;
+       __be32 naddr_size;
+       __be32 addr_size_pkd;
+       __be32 r6;
+       __be64 phy_address[5];
+};
+
+#define FW_HMA_CMD_MODE_S      31
+#define FW_HMA_CMD_MODE_M      0x1
+#define FW_HMA_CMD_MODE_V(x)   ((x) << FW_HMA_CMD_MODE_S)
+#define FW_HMA_CMD_MODE_G(x)   \
+       (((x) >> FW_HMA_CMD_MODE_S) & FW_HMA_CMD_MODE_M)
+#define FW_HMA_CMD_MODE_F      FW_HMA_CMD_MODE_V(1U)
+
+#define FW_HMA_CMD_SOC_S       30
+#define FW_HMA_CMD_SOC_M       0x1
+#define FW_HMA_CMD_SOC_V(x)    ((x) << FW_HMA_CMD_SOC_S)
+#define FW_HMA_CMD_SOC_G(x)    (((x) >> FW_HMA_CMD_SOC_S) & FW_HMA_CMD_SOC_M)
+#define FW_HMA_CMD_SOC_F       FW_HMA_CMD_SOC_V(1U)
+
+#define FW_HMA_CMD_EOC_S       29
+#define FW_HMA_CMD_EOC_M       0x1
+#define FW_HMA_CMD_EOC_V(x)    ((x) << FW_HMA_CMD_EOC_S)
+#define FW_HMA_CMD_EOC_G(x)    (((x) >> FW_HMA_CMD_EOC_S) & FW_HMA_CMD_EOC_M)
+#define FW_HMA_CMD_EOC_F       FW_HMA_CMD_EOC_V(1U)
+
+#define FW_HMA_CMD_PCIE_PARAMS_S       0
+#define FW_HMA_CMD_PCIE_PARAMS_M       0x7ffffff
+#define FW_HMA_CMD_PCIE_PARAMS_V(x)    ((x) << FW_HMA_CMD_PCIE_PARAMS_S)
+#define FW_HMA_CMD_PCIE_PARAMS_G(x)    \
+       (((x) >> FW_HMA_CMD_PCIE_PARAMS_S) & FW_HMA_CMD_PCIE_PARAMS_M)
+
+#define FW_HMA_CMD_NADDR_S     12
+#define FW_HMA_CMD_NADDR_M     0x3f
+#define FW_HMA_CMD_NADDR_V(x)  ((x) << FW_HMA_CMD_NADDR_S)
+#define FW_HMA_CMD_NADDR_G(x)  \
+       (((x) >> FW_HMA_CMD_NADDR_S) & FW_HMA_CMD_NADDR_M)
+
+#define FW_HMA_CMD_SIZE_S      0
+#define FW_HMA_CMD_SIZE_M      0xfff
+#define FW_HMA_CMD_SIZE_V(x)   ((x) << FW_HMA_CMD_SIZE_S)
+#define FW_HMA_CMD_SIZE_G(x)   \
+       (((x) >> FW_HMA_CMD_SIZE_S) & FW_HMA_CMD_SIZE_M)
+
+#define FW_HMA_CMD_ADDR_SIZE_S         11
+#define FW_HMA_CMD_ADDR_SIZE_M         0x1fffff
+#define FW_HMA_CMD_ADDR_SIZE_V(x)      ((x) << FW_HMA_CMD_ADDR_SIZE_S)
+#define FW_HMA_CMD_ADDR_SIZE_G(x)      \
+       (((x) >> FW_HMA_CMD_ADDR_SIZE_S) & FW_HMA_CMD_ADDR_SIZE_M)
+
 enum pcie_fw_eval {
        PCIE_FW_EVAL_CRASH = 0,
 };
index b7e79e64d2ed17f96957645f98b1201aefde1103..7bd8497fd9be6bcd07dd88569156eee5fc44fd96 100644 (file)
@@ -155,8 +155,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
                const char *fc;
                const struct port_info *pi = netdev_priv(dev);
 
-               netif_carrier_on(dev);
-
                switch (pi->link_cfg.speed) {
                case 100:
                        s = "100Mbps";
@@ -202,7 +200,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 
                netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
        } else {
-               netif_carrier_off(dev);
                netdev_info(dev, "link down\n");
        }
 }
@@ -278,6 +275,17 @@ static int link_start(struct net_device *dev)
         */
        if (ret == 0)
                ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
+
+       /* The Virtual Interfaces are connected to an internal switch on the
+        * chip which allows VIs attached to the same port to talk to each
+        * other even when the port link is down.  As a result, we generally
+        * want to always report a VI's link as being "up", provided there are
+        * no errors in enabling vi.
+        */
+
+       if (ret == 0)
+               netif_carrier_on(dev);
+
        return ret;
 }
 
@@ -1281,22 +1289,22 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
        case FW_PORT_TYPE_KR:
                SET_LMM(Backplane);
-               SET_LMM(10000baseKR_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
                break;
 
        case FW_PORT_TYPE_BP_AP:
                SET_LMM(Backplane);
-               SET_LMM(10000baseR_FEC);
-               SET_LMM(10000baseKR_Full);
-               SET_LMM(1000baseKX_Full);
+               FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
                break;
 
        case FW_PORT_TYPE_BP4_AP:
                SET_LMM(Backplane);
-               SET_LMM(10000baseR_FEC);
-               SET_LMM(10000baseKR_Full);
-               SET_LMM(1000baseKX_Full);
-               SET_LMM(10000baseKX4_Full);
+               FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
                break;
 
        case FW_PORT_TYPE_FIBER_XFI:
@@ -1312,18 +1320,24 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
        case FW_PORT_TYPE_BP40_BA:
        case FW_PORT_TYPE_QSFP:
                SET_LMM(FIBRE);
-               SET_LMM(40000baseSR4_Full);
+               FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+               FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
                break;
 
        case FW_PORT_TYPE_CR_QSFP:
        case FW_PORT_TYPE_SFP28:
                SET_LMM(FIBRE);
-               SET_LMM(25000baseCR_Full);
+               FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+               FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
                break;
 
        case FW_PORT_TYPE_KR_SFP28:
                SET_LMM(Backplane);
-               SET_LMM(25000baseKR_Full);
+               FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+               FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full);
                break;
 
        case FW_PORT_TYPE_KR_XLAUI:
@@ -1335,13 +1349,18 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
        case FW_PORT_TYPE_CR2_QSFP:
                SET_LMM(FIBRE);
-               SET_LMM(50000baseSR2_Full);
+               FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
                break;
 
        case FW_PORT_TYPE_KR4_100G:
        case FW_PORT_TYPE_CR4_QSFP:
                SET_LMM(FIBRE);
-               SET_LMM(100000baseCR4_Full);
+               FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
+               FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
+               FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
+               FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
+               FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
+               FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
                break;
 
        default:
index 977d4c2c759d6f47c138064cedebf708f8396c05..3f8fe8fd79cc65beb17e334a0403b01006f0bd08 100644 (file)
   local_irq_{dis,en}able()
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 static const char version[] =
 "cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
 
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
-   or override something. */
 #include <linux/module.h>
 
 /*
@@ -93,6 +83,7 @@ static const char version[] =
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
@@ -105,24 +96,22 @@ static const char version[] =
 
 #include "cs89x0.h"
 
-static unsigned int net_debug = NET_DEBUG;
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message level");
 
 /* Information that need to be kept for each board. */
 struct net_local {
+       int msg_enable;
        int chip_type;          /* one of: CS8900, CS8920, CS8920M */
        char chip_revision;     /* revision letter of the chip ('A'...) */
        int send_cmd;           /* the propercommand used to send a packet. */
        int rx_mode;
        int curr_rx_cfg;
         int send_underrun;      /* keep track of how many underruns in a row we get */
-       struct sk_buff *skb;
 };
 
 /* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
 static int net_open(struct net_device *dev);
 static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t net_interrupt(int irq, void *dev_id);
@@ -132,10 +121,6 @@ static int net_close(struct net_device *dev);
 static struct net_device_stats *net_get_stats(struct net_device *dev);
 static int set_mac_address(struct net_device *dev, void *addr);
 
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
 /* For reading/writing registers ISA-style */
 static inline int
 readreg_io(struct net_device *dev, int portno)
@@ -176,12 +161,10 @@ static const struct net_device_ops mac89x0_netdev_ops = {
 
 /* Probe for the CS8900 card in slot E.  We won't bother looking
    anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
+static int mac89x0_device_probe(struct platform_device *pdev)
 {
        struct net_device *dev;
-       static int once_is_enough;
        struct net_local *lp;
-       static unsigned version_printed;
        int i, slot;
        unsigned rev_type = 0;
        unsigned long ioaddr;
@@ -189,21 +172,9 @@ struct net_device * __init mac89x0_probe(int unit)
        int err = -ENODEV;
        struct nubus_rsrc *fres;
 
-       if (!MACH_IS_MAC)
-               return ERR_PTR(-ENODEV);
-
        dev = alloc_etherdev(sizeof(struct net_local));
        if (!dev)
-               return ERR_PTR(-ENOMEM);
-
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
-       if (once_is_enough)
-               goto out;
-       once_is_enough = 1;
+               return -ENOMEM;
 
        /* We might have to parameterize this later */
        slot = 0xE;
@@ -230,9 +201,13 @@ struct net_device * __init mac89x0_probe(int unit)
        if (sig != swab16(CHIP_EISA_ID_SIG))
                goto out;
 
+       SET_NETDEV_DEV(dev, &pdev->dev);
+
        /* Initialize the net_device structure. */
        lp = netdev_priv(dev);
 
+       lp->msg_enable = netif_msg_init(debug, 0);
+
        /* Fill in the 'dev' fields. */
        dev->base_addr = ioaddr;
        dev->mem_start = (unsigned long)
@@ -255,19 +230,16 @@ struct net_device * __init mac89x0_probe(int unit)
        if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
                lp->send_cmd = TX_NOW;
 
-       if (net_debug && version_printed++ == 0)
-               printk(version);
+       netif_dbg(lp, drv, dev, "%s", version);
 
-       printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
-              dev->name,
-              lp->chip_type==CS8900?'0':'2',
-              lp->chip_type==CS8920M?"M":"",
-              lp->chip_revision,
-              dev->base_addr);
+       pr_info("cs89%c0%s rev %c found at %#8lx\n",
+               lp->chip_type == CS8900 ? '0' : '2',
+               lp->chip_type == CS8920M ? "M" : "",
+               lp->chip_revision, dev->base_addr);
 
        /* Try to read the MAC address */
        if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
-               printk("\nmac89x0: No EEPROM, giving up now.\n");
+               pr_info("No EEPROM, giving up now.\n");
                goto out1;
         } else {
                 for (i = 0; i < ETH_ALEN; i += 2) {
@@ -282,39 +254,23 @@ struct net_device * __init mac89x0_probe(int unit)
 
        /* print the IRQ and ethernet address. */
 
-       printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+       pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq);
 
        dev->netdev_ops         = &mac89x0_netdev_ops;
 
        err = register_netdev(dev);
        if (err)
                goto out1;
-       return NULL;
+
+       platform_set_drvdata(pdev, dev);
+       return 0;
 out1:
        nubus_writew(0, dev->base_addr + ADD_PORT);
 out:
        free_netdev(dev);
-       return ERR_PTR(err);
+       return err;
 }
 
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
-       int reset_start_time;
-
-       writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
-       /* wait 30 ms */
-       msleep_interruptible(30);
-
-       /* Wait until the chip is reset */
-       reset_start_time = jiffies;
-       while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
-               ;
-}
-#endif
-
 /* Open/initialize the board.  This is called (in the current kernel)
    sometime after booting when the 'ifconfig' program is run.
 
@@ -374,11 +330,9 @@ net_send_packet(struct sk_buff *skb, struct net_device *dev)
        struct net_local *lp = netdev_priv(dev);
        unsigned long flags;
 
-       if (net_debug > 3)
-               printk("%s: sent %d byte packet of type %x\n",
-                      dev->name, skb->len,
-                      (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-                      | skb->data[ETH_ALEN+ETH_ALEN+1]);
+       netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n",
+                 skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+                 skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
        /* keep the upload from being interrupted, since we
           ask the chip to start transmitting before the
@@ -416,11 +370,6 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
        struct net_local *lp;
        int ioaddr, status;
 
-       if (dev == NULL) {
-               printk ("net_interrupt(): irq %d for unknown device.\n", irq);
-               return IRQ_NONE;
-       }
-
        ioaddr = dev->base_addr;
        lp = netdev_priv(dev);
 
@@ -432,7 +381,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
            faster than you can read them off, you're screwed.  Hasta la
            vista, baby!  */
        while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
-               if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+               netif_dbg(lp, intr, dev, "status=%04x\n", status);
                switch(status & ISQ_EVENT_MASK) {
                case ISQ_RECEIVER_EVENT:
                        /* Got a packet(s). */
@@ -462,7 +411,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
                                netif_wake_queue(dev);
                        }
                        if (status & TX_UNDERRUN) {
-                               if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+                               netif_dbg(lp, tx_err, dev, "transmit underrun\n");
                                 lp->send_underrun++;
                                 if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
                                 else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
@@ -483,6 +432,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 static void
 net_rx(struct net_device *dev)
 {
+       struct net_local *lp = netdev_priv(dev);
        struct sk_buff *skb;
        int status, length;
 
@@ -506,7 +456,6 @@ net_rx(struct net_device *dev)
        /* Malloc up new buffer. */
        skb = alloc_skb(length, GFP_ATOMIC);
        if (skb == NULL) {
-               printk("%s: Memory squeeze, dropping packet.\n", dev->name);
                dev->stats.rx_dropped++;
                return;
        }
@@ -515,10 +464,9 @@ net_rx(struct net_device *dev)
        skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
                                length);
 
-       if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
-                                 dev->name, length,
-                                 (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-                                | skb->data[ETH_ALEN+ETH_ALEN+1]);
+       netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n",
+                 length, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+                 skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
         skb->protocol=eth_type_trans(skb,dev);
        netif_rx(skb);
@@ -594,7 +542,7 @@ static int set_mac_address(struct net_device *dev, void *addr)
                return -EADDRNOTAVAIL;
 
        memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
-       printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr);
+       netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr);
 
        /* set the Ethernet address */
        for (i=0; i < ETH_ALEN/2; i++)
@@ -603,32 +551,24 @@ static int set_mac_address(struct net_device *dev, void *addr)
        return 0;
 }
 
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
 MODULE_LICENSE("GPL");
 
-int __init
-init_module(void)
+static int mac89x0_device_remove(struct platform_device *pdev)
 {
-       net_debug = debug;
-        dev_cs89x0 = mac89x0_probe(-1);
-       if (IS_ERR(dev_cs89x0)) {
-                printk(KERN_WARNING "mac89x0.c: No card found\n");
-               return PTR_ERR(dev_cs89x0);
-       }
+       struct net_device *dev = platform_get_drvdata(pdev);
+
+       unregister_netdev(dev);
+       nubus_writew(0, dev->base_addr + ADD_PORT);
+       free_netdev(dev);
        return 0;
 }
 
-void
-cleanup_module(void)
-{
-       unregister_netdev(dev_cs89x0);
-       nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
-       free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
+static struct platform_driver mac89x0_platform_driver = {
+       .probe = mac89x0_device_probe,
+       .remove = mac89x0_device_remove,
+       .driver = {
+               .name = "mac89x0",
+       },
+};
+
+module_platform_driver(mac89x0_platform_driver);
index 9b218f0e5a4cddc285ab89843065de2c0eab9d04..0dd64acd2a3fb4fb6fe41c822c01f9235df40992 100644 (file)
@@ -33,7 +33,7 @@
 
 #define DRV_NAME               "enic"
 #define DRV_DESCRIPTION                "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION            "2.3.0.45"
+#define DRV_VERSION            "2.3.0.53"
 #define DRV_COPYRIGHT          "Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX          6
@@ -140,6 +140,7 @@ struct enic_rfs_flw_tbl {
 struct vxlan_offload {
        u16 vxlan_udp_port_number;
        u8 patch_level;
+       u8 flags;
 };
 
 /* Per-instance private data structure */
index efb9333c7cf80179560915f0e251f05252f1baa5..869006c2002d3be453f2443fd69cb6d07fbee4b2 100644 (file)
@@ -474,6 +474,39 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
        return 0;
 }
 
+static int enic_get_rx_flow_hash(struct enic *enic, struct ethtool_rxnfc *cmd)
+{
+       cmd->data = 0;
+
+       switch (cmd->flow_type) {
+       case TCP_V6_FLOW:
+       case TCP_V4_FLOW:
+               cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               /* Fall through */
+       case UDP_V6_FLOW:
+       case UDP_V4_FLOW:
+               if (vnic_dev_capable_udp_rss(enic->vdev))
+                       cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               /* Fall through */
+       case SCTP_V4_FLOW:
+       case AH_ESP_V4_FLOW:
+       case AH_V4_FLOW:
+       case ESP_V4_FLOW:
+       case SCTP_V6_FLOW:
+       case AH_ESP_V6_FLOW:
+       case AH_V6_FLOW:
+       case ESP_V6_FLOW:
+       case IPV4_FLOW:
+       case IPV6_FLOW:
+               cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
                          u32 *rule_locs)
 {
@@ -500,6 +533,9 @@ static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
                ret = enic_grxclsrule(enic, cmd);
                spin_unlock_bh(&enic->rfs_h.lock);
                break;
+       case ETHTOOL_GRXFH:
+               ret = enic_get_rx_flow_hash(enic, cmd);
+               break;
        default:
                ret = -EOPNOTSUPP;
                break;
index f202ba72a8116af610216d756596747a27741104..81684acf52afa576bda05d9c535a7890c5ee8deb 100644 (file)
@@ -191,8 +191,16 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
                goto error;
        }
 
-       if (ti->sa_family != AF_INET) {
-               netdev_info(netdev, "vxlan: only IPv4 offload supported");
+       switch (ti->sa_family) {
+       case AF_INET6:
+               if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) {
+                       netdev_info(netdev, "vxlan: only IPv4 offload supported");
+                       goto error;
+               }
+               /* Fall through */
+       case AF_INET:
+               break;
+       default:
                goto error;
        }
 
@@ -204,6 +212,11 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
 
                goto error;
        }
+       if ((vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) != 1) &&
+           !(enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ)) {
+               netdev_info(netdev, "vxlan: vxlan offload with multi wq not supported on this adapter");
+               goto error;
+       }
 
        err = vnic_dev_overlay_offload_cfg(enic->vdev,
                                           OVERLAY_CFG_VXLAN_PORT_UPDATE,
@@ -238,9 +251,8 @@ static void enic_udp_tunnel_del(struct net_device *netdev,
 
        spin_lock_bh(&enic->devcmd_lock);
 
-       if ((ti->sa_family != AF_INET) ||
-           ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number)) ||
-           (ti->type != UDP_TUNNEL_TYPE_VXLAN)) {
+       if ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number) ||
+           ti->type != UDP_TUNNEL_TYPE_VXLAN) {
                netdev_info(netdev, "udp_tnl: port:%d, sa_family: %d, type: %d not offloaded",
                            ntohs(ti->port), ti->sa_family, ti->type);
                goto unlock;
@@ -271,22 +283,37 @@ static netdev_features_t enic_features_check(struct sk_buff *skb,
        struct enic *enic = netdev_priv(dev);
        struct udphdr *udph;
        u16 port = 0;
-       u16 proto;
+       u8 proto;
 
        if (!skb->encapsulation)
                return features;
 
        features = vxlan_features_check(skb, features);
 
-       /* hardware only supports IPv4 vxlan tunnel */
-       if (vlan_get_protocol(skb) != htons(ETH_P_IP))
+       switch (vlan_get_protocol(skb)) {
+       case htons(ETH_P_IPV6):
+               if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6))
+                       goto out;
+               proto = ipv6_hdr(skb)->nexthdr;
+               break;
+       case htons(ETH_P_IP):
+               proto = ip_hdr(skb)->protocol;
+               break;
+       default:
                goto out;
+       }
 
-       /* hardware does not support offload of ipv6 inner pkt */
-       if (eth->h_proto != ntohs(ETH_P_IP))
+       switch (eth->h_proto) {
+       case ntohs(ETH_P_IPV6):
+               if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6))
+                       goto out;
+               /* Fall through */
+       case ntohs(ETH_P_IP):
+               break;
+       default:
                goto out;
+       }
 
-       proto = ip_hdr(skb)->protocol;
 
        if (proto == IPPROTO_UDP) {
                udph = udp_hdr(skb);
@@ -635,12 +662,25 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq,
 
 static void enic_preload_tcp_csum_encap(struct sk_buff *skb)
 {
-       if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+       const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+       switch (eth->h_proto) {
+       case ntohs(ETH_P_IP):
                inner_ip_hdr(skb)->check = 0;
                inner_tcp_hdr(skb)->check =
                        ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
                                           inner_ip_hdr(skb)->daddr, 0,
                                           IPPROTO_TCP, 0);
+               break;
+       case ntohs(ETH_P_IPV6):
+               inner_tcp_hdr(skb)->check =
+                       ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr,
+                                        &inner_ipv6_hdr(skb)->daddr, 0,
+                                        IPPROTO_TCP, 0);
+               break;
+       default:
+               WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload");
+               break;
        }
 }
 
@@ -1898,6 +1938,8 @@ static int enic_open(struct net_device *netdev)
        }
 
        for (i = 0; i < enic->rq_count; i++) {
+               /* enable rq before updating rq desc */
+               vnic_rq_enable(&enic->rq[i]);
                vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf);
                /* Need at least one buffer on ring to get going */
                if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
@@ -1909,8 +1951,6 @@ static int enic_open(struct net_device *netdev)
 
        for (i = 0; i < enic->wq_count; i++)
                vnic_wq_enable(&enic->wq[i]);
-       for (i = 0; i < enic->rq_count; i++)
-               vnic_rq_enable(&enic->rq[i]);
 
        if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
                enic_dev_add_station_addr(enic);
@@ -1936,8 +1976,12 @@ static int enic_open(struct net_device *netdev)
        return 0;
 
 err_out_free_rq:
-       for (i = 0; i < enic->rq_count; i++)
+       for (i = 0; i < enic->rq_count; i++) {
+               err = vnic_rq_disable(&enic->rq[i]);
+               if (err)
+                       return err;
                vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
+       }
        enic_dev_notify_unset(enic);
 err_out_free_intr:
        enic_unset_affinity_hint(enic);
@@ -2151,9 +2195,10 @@ static int enic_dev_wait(struct vnic_dev *vdev,
 static int enic_dev_open(struct enic *enic)
 {
        int err;
+       u32 flags = CMD_OPENF_IG_DESCCACHE;
 
        err = enic_dev_wait(enic->vdev, vnic_dev_open,
-               vnic_dev_open_done, 0);
+               vnic_dev_open_done, flags);
        if (err)
                dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n",
                        err);
@@ -2275,7 +2320,7 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
 {
        struct device *dev = enic_get_dev(enic);
        const u8 rss_default_cpu = 0;
-       const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
+       u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
                NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 |
                NIC_CFG_RSS_HASH_TYPE_IPV6 |
                NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
@@ -2283,6 +2328,8 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
        const u8 rss_base_cpu = 0;
        u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
 
+       if (vnic_dev_capable_udp_rss(enic->vdev))
+               rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP;
        if (rss_enable) {
                if (!enic_set_rsskey(enic)) {
                        if (enic_set_rsscpu(enic, rss_hash_bits)) {
@@ -2901,9 +2948,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                netdev->hw_features |= NETIF_F_RXCSUM;
        if (ENIC_SETTING(enic, VXLAN)) {
                u64 patch_level;
+               u64 a1 = 0;
 
                netdev->hw_enc_features |= NETIF_F_RXCSUM               |
                                           NETIF_F_TSO                  |
+                                          NETIF_F_TSO6                 |
                                           NETIF_F_TSO_ECN              |
                                           NETIF_F_GSO_UDP_TUNNEL       |
                                           NETIF_F_HW_CSUM              |
@@ -2922,9 +2971,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 */
                err = vnic_dev_get_supported_feature_ver(enic->vdev,
                                                         VIC_FEATURE_VXLAN,
-                                                        &patch_level);
+                                                        &patch_level, &a1);
                if (err)
                        patch_level = 0;
+               enic->vxlan.flags = (u8)a1;
                /* mask bits that are supported by driver
                 */
                patch_level &= BIT_ULL(0) | BIT_ULL(2);
index 39bad67422dd48ae460dc2081b30ccbd64caaeee..76cdd4c9d11f3e64c33102d3e1b145f45c57762a 100644 (file)
@@ -1269,16 +1269,32 @@ int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
 }
 
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-                                      u64 *supported_versions)
+                                      u64 *supported_versions, u64 *a1)
 {
        u64 a0 = feature;
        int wait = 1000;
-       u64 a1 = 0;
        int ret;
 
-       ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait);
+       ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, a1, wait);
        if (!ret)
                *supported_versions = a0;
 
        return ret;
 }
+
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev)
+{
+       u64 a0 = CMD_NIC_CFG, a1 = 0;
+       u64 rss_hash_type;
+       int wait = 1000;
+       int err;
+
+       err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait);
+       if (err || !a0)
+               return false;
+
+       rss_hash_type = (a1 >> NIC_CFG_RSS_HASH_TYPE_SHIFT) &
+                       NIC_CFG_RSS_HASH_TYPE_MASK_FIELD;
+
+       return (rss_hash_type & NIC_CFG_RSS_HASH_TYPE_UDP);
+}
index 9d43d6bb9907ec323d9c1520720447bb7731c808..59d4cc8fbb85b21f4ce37e39a64066112db68844 100644 (file)
@@ -183,6 +183,7 @@ int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config);
 int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
                                 u16 vxlan_udp_port_number);
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-                                      u64 *supported_versions);
+                                      u64 *supported_versions, u64 *a1);
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev);
 
 #endif /* _VNIC_DEV_H_ */
index d83880b0d46852d51ddaec76f59fbed19cefba0c..41de4ba622a1605db7d4d35e1c5f6f105f5db1aa 100644 (file)
@@ -439,6 +439,7 @@ enum vnic_devcmd_cmd {
 
 /* flags for CMD_OPEN */
 #define CMD_OPENF_OPROM                0x1     /* open coming from option rom */
+#define CMD_OPENF_IG_DESCCACHE 0x2     /* Do not flush IG DESC cache */
 
 /* flags for CMD_INIT */
 #define CMD_INITF_DEFAULT_MAC  0x1     /* init with default mac addr */
@@ -697,6 +698,10 @@ enum overlay_ofld_cmd {
 
 #define OVERLAY_CFG_VXLAN_PORT_UPDATE  0
 
+#define ENIC_VXLAN_INNER_IPV6          BIT(0)
+#define ENIC_VXLAN_OUTER_IPV6          BIT(1)
+#define ENIC_VXLAN_MULTI_WQ            BIT(2)
+
 /* Use this enum to get the supported versions for each of these features
  * If you need to use the devcmd_get_supported_feature_version(), add
  * the new feature into this enum and install function handler in devcmd.c
index 995a50dd4c99ca926769e8caeb1a5fbd51c22d9b..5a93db0d7afcaee53e6f5cd47a9c00d1e6672845 100644 (file)
@@ -47,6 +47,7 @@
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6         (1 << 4)
 #define NIC_CFG_RSS_HASH_TYPE_IPV6_EX          (1 << 5)
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX      (1 << 6)
+#define NIC_CFG_RSS_HASH_TYPE_UDP              (1 << 7)
 
 static inline void vnic_set_nic_cfg(u32 *nic_cfg,
        u8 rss_default_cpu, u8 rss_hash_type,
index 5eb999af2c40004fc028ecd67901c492916480c5..bd3f6e4d134138424ec5d627e6ce1937a7529079 100644 (file)
@@ -540,6 +540,7 @@ static int gmac_setup_txqs(struct net_device *netdev)
 
        if (port->txq_dma_base & ~DMA_Q_BASE_MASK) {
                dev_warn(geth->dev, "TX queue base it not aligned\n");
+               kfree(skb_tab);
                return -ENOMEM;
        }
 
index 1a49297224ed956a6dc3f4fbf9460d476fadbe5c..ff92ab1daeb80cec7779e1f00faa5f0174fcf727 100644 (file)
@@ -19,7 +19,7 @@
 #include "be.h"
 #include "be_cmds.h"
 
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
        "Physical Link is functional",
        "Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
        "Optics of two types installed â€“ Remove one optic or install matching pair of optics.",
index 09da2d82c2f0dc1f86074a73f89942fb06c771c2..e8b43cf44b6f0991f3eab067c930b4a1abc35f54 100644 (file)
@@ -201,7 +201,7 @@ enum {
                         phy_state == BE_PHY_UNQUALIFIED ||     \
                         phy_state == BE_PHY_UNCERTIFIED)
 
-extern  char *be_misconfig_evt_port_state[];
+extern const  char * const be_misconfig_evt_port_state[];
 
 /* async event indicating misconfigured port */
 struct be_async_event_misconfig_port {
index a998c36c5e610b95f210ec7120d72e0fb499ff24..fd43f98ddbe74bedec6c53c3467e622b7480acc8 100644 (file)
@@ -454,6 +454,16 @@ static void dpaa_set_rx_mode(struct net_device *net_dev)
                                  err);
        }
 
+       if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+               priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+               err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+                                                 priv->mac_dev->allmulti);
+               if (err < 0)
+                       netif_err(priv, drv, net_dev,
+                                 "mac_dev->set_allmulti() = %d\n",
+                                 err);
+       }
+
        err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
        if (err < 0)
                netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
@@ -1916,8 +1926,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
                goto csum_failed;
        }
 
+       /* SGT[0] is used by the linear part */
        sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
-       qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+       frag_len = skb_headlen(skb);
+       qm_sg_entry_set_len(&sgt[0], frag_len);
        sgt[0].bpid = FSL_DPAA_BPID_INV;
        sgt[0].offset = 0;
        addr = dma_map_single(dev, skb->data,
@@ -1930,9 +1942,9 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
        qm_sg_entry_set64(&sgt[0], addr);
 
        /* populate the rest of SGT entries */
-       frag = &skb_shinfo(skb)->frags[0];
-       frag_len = frag->size;
-       for (i = 1; i <= nr_frags; i++, frag++) {
+       for (i = 0; i < nr_frags; i++) {
+               frag = &skb_shinfo(skb)->frags[i];
+               frag_len = frag->size;
                WARN_ON(!skb_frag_page(frag));
                addr = skb_frag_dma_map(dev, frag, 0,
                                        frag_len, dma_dir);
@@ -1942,15 +1954,16 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
                        goto sg_map_failed;
                }
 
-               qm_sg_entry_set_len(&sgt[i], frag_len);
-               sgt[i].bpid = FSL_DPAA_BPID_INV;
-               sgt[i].offset = 0;
+               qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+               sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+               sgt[i + 1].offset = 0;
 
                /* keep the offset in the address */
-               qm_sg_entry_set64(&sgt[i], addr);
-               frag_len = frag->size;
+               qm_sg_entry_set64(&sgt[i + 1], addr);
        }
-       qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+       /* Set the final bit in the last used entry of the SGT */
+       qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
 
        qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
 
@@ -2008,7 +2021,6 @@ static inline int dpaa_xmit(struct dpaa_priv *priv,
        }
 
        if (unlikely(err < 0)) {
-               percpu_stats->tx_errors++;
                percpu_stats->tx_fifo_errors++;
                return err;
        }
@@ -2052,19 +2064,23 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
        /* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
         * make sure we don't feed FMan with more fragments than it supports.
         */
-       if (nonlinear &&
-           likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
-               /* Just create a S/G fd based on the skb */
-               err = skb_to_sg_fd(priv, skb, &fd);
-               percpu_priv->tx_frag_skbuffs++;
-       } else {
+       if (unlikely(nonlinear &&
+                    (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
                /* If the egress skb contains more fragments than we support
                 * we have no choice but to linearize it ourselves.
                 */
-               if (unlikely(nonlinear) && __skb_linearize(skb))
+               if (__skb_linearize(skb))
                        goto enomem;
 
-               /* Finally, create a contig FD from this skb */
+               nonlinear = skb_is_nonlinear(skb);
+       }
+
+       if (nonlinear) {
+               /* Just create a S/G fd based on the skb */
+               err = skb_to_sg_fd(priv, skb, &fd);
+               percpu_priv->tx_frag_skbuffs++;
+       } else {
+               /* Create a contig FD from this skb */
                err = skb_to_contig_fd(priv, skb, &fd, &offset);
        }
        if (unlikely(err < 0))
@@ -2201,14 +2217,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
        if (dpaa_eth_napi_schedule(percpu_priv, portal))
                return qman_cb_dqrr_stop;
 
-       if (dpaa_eth_refill_bpools(priv))
-               /* Unable to refill the buffer pool due to insufficient
-                * system memory. Just release the frame back into the pool,
-                * otherwise we'll soon end up with an empty buffer pool.
-                */
-               dpaa_fd_release(net_dev, &dq->fd);
-       else
-               dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+       dpaa_eth_refill_bpools(priv);
+       dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
 
        return qman_cb_dqrr_consume;
 }
@@ -2278,7 +2288,6 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
        vaddr = phys_to_virt(addr);
        prefetch(vaddr + qm_fd_get_offset(fd));
 
-       fd_format = qm_fd_get_format(fd);
        /* The only FD types that we may receive are contig and S/G */
        WARN_ON((fd_format != qm_fd_contig) && (fd_format != qm_fd_sg));
 
@@ -2311,8 +2320,10 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
 
        skb_len = skb->len;
 
-       if (unlikely(netif_receive_skb(skb) == NET_RX_DROP))
+       if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) {
+               percpu_stats->rx_dropped++;
                return qman_cb_dqrr_consume;
+       }
 
        percpu_stats->rx_packets++;
        percpu_stats->rx_bytes += skb_len;
@@ -2860,7 +2871,7 @@ static int dpaa_remove(struct platform_device *pdev)
        struct device *dev;
        int err;
 
-       dev = &pdev->dev;
+       dev = pdev->dev.parent;
        net_dev = dev_get_drvdata(dev);
 
        priv = netdev_priv(net_dev);
index 85306d1b2acf5b3dbfb64a6d5b2dae46818accec..2f933b6b2f4e79b9e1359d8a9b57ba6af4acbfb9 100644 (file)
@@ -344,7 +344,7 @@ static void dpaa_get_ethtool_stats(struct net_device *net_dev,
 
        /* gather congestion related counters */
        cg_num    = 0;
-       cg_status = 0;
+       cg_status = false;
        cg_time   = jiffies_to_msecs(priv->cgr_data.congested_jiffies);
        if (qman_query_cgr_congested(&priv->cgr_data.cgr, &cg_status) == 0) {
                cg_num    = priv->cgr_data.cgr_congested_count;
index 7a7f3a42b2aa1ee12e467d9295b8422a835bdd12..d4604bc8eb5b04742534100c4c285065bda2021e 100644 (file)
@@ -3600,6 +3600,8 @@ fec_drv_remove(struct platform_device *pdev)
        fec_enet_mii_remove(fep);
        if (fep->reg_phy)
                regulator_disable(fep->reg_phy);
+       pm_runtime_put(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
        if (of_phy_is_fixed_link(np))
                of_phy_deregister_fixed_link(np);
        of_node_put(fep->phy_node);
index ea43b497414986c55d07ce9b175082202f951044..57b1e2b47c0a9c68a8bfeb18e166804c3fe0cacd 100644 (file)
@@ -1100,7 +1100,7 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
        set_bucket(dtsec->regs, bucket, true);
 
        /* Create element to be added to the driver hash table */
-       hash_entry = kmalloc(sizeof(*hash_entry), GFP_KERNEL);
+       hash_entry = kmalloc(sizeof(*hash_entry), GFP_ATOMIC);
        if (!hash_entry)
                return -ENOMEM;
        hash_entry->addr = addr;
@@ -1117,6 +1117,25 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
        return 0;
 }
 
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+       u32 tmp;
+       struct dtsec_regs __iomem *regs = dtsec->regs;
+
+       if (!is_init_done(dtsec->dtsec_drv_param))
+               return -EINVAL;
+
+       tmp = ioread32be(&regs->rctrl);
+       if (enable)
+               tmp |= RCTRL_MPROM;
+       else
+               tmp &= ~RCTRL_MPROM;
+
+       iowrite32be(tmp, &regs->rctrl);
+
+       return 0;
+}
+
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 {
        struct dtsec_regs __iomem *regs = dtsec->regs;
index c4467c072058644b97699e1d61e654510ed610d4..1a689adf5a22744a12d81f8ef1f535f1f9d420f4 100644 (file)
@@ -55,5 +55,6 @@ int dtsec_set_exception(struct fman_mac *dtsec,
 int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
 
 #endif /* __DTSEC_H */
index c0296880feba7f1afa505d4f9c08003e496de429..446a97b792e3dea467f96c4fed3dfee1cd7840f4 100644 (file)
@@ -350,6 +350,7 @@ struct fman_mac {
        struct fman_rev_info fm_rev_info;
        bool basex_if;
        struct phy_device *pcsphy;
+       bool allmulti_enabled;
 };
 
 static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
@@ -940,6 +941,29 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
        return 0;
 }
 
+int memac_set_allmulti(struct fman_mac *memac, bool enable)
+{
+       u32 entry;
+       struct memac_regs __iomem *regs = memac->regs;
+
+       if (!is_init_done(memac->memac_drv_param))
+               return -EINVAL;
+
+       if (enable) {
+               for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry | HASH_CTRL_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       } else {
+               for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry & ~HASH_CTRL_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       }
+
+       memac->allmulti_enabled = enable;
+
+       return 0;
+}
+
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 {
        struct memac_regs __iomem *regs = memac->regs;
@@ -963,8 +987,12 @@ int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
                        break;
                }
        }
-       if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
-               iowrite32be(hash & ~HASH_CTRL_MCAST_EN, &regs->hashtable_ctrl);
+
+       if (!memac->allmulti_enabled) {
+               if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
+                       iowrite32be(hash & ~HASH_CTRL_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       }
 
        return 0;
 }
index c4a66469a9074daab469f304751acc06df8f9839..b5a50338ed9ae21dd824129295ec868454c1da1a 100644 (file)
@@ -57,5 +57,6 @@ int memac_set_exception(struct fman_mac *memac,
                        enum fman_mac_exceptions exception, bool enable);
 int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
+int memac_set_allmulti(struct fman_mac *memac, bool enable);
 
 #endif /* __MEMAC_H */
index 4b0f3a50b2939aa31fc6274235e263f1277461fb..284735d4ebe9bbd452fd902c6723a6deb99cbd69 100644 (file)
@@ -217,6 +217,7 @@ struct fman_mac {
        struct tgec_cfg *cfg;
        void *fm;
        struct fman_rev_info fm_rev_info;
+       bool allmulti_enabled;
 };
 
 static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
@@ -564,6 +565,29 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
        return 0;
 }
 
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
+{
+       u32 entry;
+       struct tgec_regs __iomem *regs = tgec->regs;
+
+       if (!is_init_done(tgec->cfg))
+               return -EINVAL;
+
+       if (enable) {
+               for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry | TGEC_HASH_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       } else {
+               for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+                       iowrite32be(entry & ~TGEC_HASH_MCAST_EN,
+                                   &regs->hashtable_ctrl);
+       }
+
+       tgec->allmulti_enabled = enable;
+
+       return 0;
+}
+
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 {
        struct tgec_regs __iomem *regs = tgec->regs;
@@ -591,9 +615,12 @@ int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
                        break;
                }
        }
-       if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
-               iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
-                           &regs->hashtable_ctrl);
+
+       if (!tgec->allmulti_enabled) {
+               if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
+                       iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
+                                   &regs->hashtable_ctrl);
+       }
 
        return 0;
 }
index 514bba9f47ce65df203861b92e23b414a5e5e140..cbbd3b422a98b43a50a0c5a1e33705051f9b02dd 100644 (file)
@@ -51,5 +51,6 @@ int tgec_set_exception(struct fman_mac *tgec,
 int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
 
 #endif /* __TGEC_H */
index 88c0a0636b44562fa68f59c9912e242739d63874..4829dcd9e0771f2bcea84f8b2148f8fe11a30500 100644 (file)
@@ -470,6 +470,7 @@ static void setup_dtsec(struct mac_device *mac_dev)
        mac_dev->set_tx_pause           = dtsec_set_tx_pause_frames;
        mac_dev->set_rx_pause           = dtsec_accept_rx_pause_frames;
        mac_dev->set_exception          = dtsec_set_exception;
+       mac_dev->set_allmulti           = dtsec_set_allmulti;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
@@ -488,6 +489,7 @@ static void setup_tgec(struct mac_device *mac_dev)
        mac_dev->set_tx_pause           = tgec_set_tx_pause_frames;
        mac_dev->set_rx_pause           = tgec_accept_rx_pause_frames;
        mac_dev->set_exception          = tgec_set_exception;
+       mac_dev->set_allmulti           = tgec_set_allmulti;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
@@ -506,6 +508,7 @@ static void setup_memac(struct mac_device *mac_dev)
        mac_dev->set_tx_pause           = memac_set_tx_pause_frames;
        mac_dev->set_rx_pause           = memac_accept_rx_pause_frames;
        mac_dev->set_exception          = memac_set_exception;
+       mac_dev->set_allmulti           = memac_set_allmulti;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
index eefb3357e304a9c6eeeadd89065094c2a882287d..b520cec120ee0af8fc571f8820079cffad8e4371 100644 (file)
@@ -59,6 +59,7 @@ struct mac_device {
        bool rx_pause_active;
        bool tx_pause_active;
        bool promisc;
+       bool allmulti;
 
        int (*init)(struct mac_device *mac_dev);
        int (*start)(struct mac_device *mac_dev);
@@ -66,6 +67,7 @@ struct mac_device {
        void (*adjust_link)(struct mac_device *mac_dev);
        int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
        int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+       int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
        int (*set_multi)(struct net_device *net_dev,
                         struct mac_device *mac_dev);
        int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);
index f5c87bd35fa1ae52e854ea1bc75e4e9d80a8b4dc..f27f9bae1a4ac02811590636f55d74dcb0568225 100644 (file)
@@ -3063,9 +3063,6 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb)
        if (ndev->features & NETIF_F_RXCSUM)
                gfar_rx_checksum(skb, fcb);
 
-       /* Tell the skb what kind of packet this is */
-       skb->protocol = eth_type_trans(skb, ndev);
-
        /* There's need to check for NETIF_F_HW_VLAN_CTAG_RX here.
         * Even if vlan rx accel is disabled, on some chips
         * RXFCB_VLN is pseudo randomly set.
@@ -3136,13 +3133,15 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
                        continue;
                }
 
+               gfar_process_frame(ndev, skb);
+
                /* Increment the number of packets */
                total_pkts++;
                total_bytes += skb->len;
 
                skb_record_rx_queue(skb, rx_queue->qindex);
 
-               gfar_process_frame(ndev, skb);
+               skb->protocol = eth_type_trans(skb, ndev);
 
                /* Send the packet up the stack */
                napi_gro_receive(&rx_queue->grp->napi_rx, skb);
index 86944bc3b273fd97232a60e68f25046e68042882..74bd260ca02a887869a507f8746dfc928522d4be 100644 (file)
@@ -666,7 +666,7 @@ static void hns_gmac_get_strings(u32 stringset, u8 *data)
 
 static int hns_gmac_get_sset_count(int stringset)
 {
-       if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS)
+       if (stringset == ETH_SS_STATS)
                return ARRAY_SIZE(g_gmac_stats_string);
 
        return 0;
index b62816c1574eb840f74a334b904f9fd993733116..93e71e27401b4da815e899753dc7be1a83ff3f14 100644 (file)
@@ -422,7 +422,7 @@ void hns_ppe_update_stats(struct hns_ppe_cb *ppe_cb)
 
 int hns_ppe_get_sset_count(int stringset)
 {
-       if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS)
+       if (stringset == ETH_SS_STATS)
                return ETH_PPE_STATIC_NUM;
        return 0;
 }
index 6f3570cfb501604bea3f22d73374dd8dc28d756f..e2e28532e4dc2d03cf15330c621f8fb49469e382 100644 (file)
@@ -876,7 +876,7 @@ void hns_rcb_get_stats(struct hnae_queue *queue, u64 *data)
  */
 int hns_rcb_get_ring_sset_count(int stringset)
 {
-       if (stringset == ETH_SS_STATS || stringset == ETH_SS_PRIV_FLAGS)
+       if (stringset == ETH_SS_STATS)
                return HNS_RING_STATIC_REG_NUM;
 
        return 0;
index 7ea7f8a4aa2a9456f2d71cceccae9eff2b83421a..2e14a3ae1d8be0f9841a5c53f456c4d2e4f4d270 100644 (file)
@@ -993,8 +993,10 @@ int hns_get_sset_count(struct net_device *netdev, int stringset)
                        cnt--;
 
                return cnt;
-       } else {
+       } else if (stringset == ETH_SS_STATS) {
                return (HNS_NET_STATS_CNT + ops->get_sset_count(h, stringset));
+       } else {
+               return -EOPNOTSUPP;
        }
 }
 
index 3e9203ea42a692b23f633793485a5030a69c2972..519e2bd6aa60ece71f5404ab35f13261fc3297a1 100644 (file)
@@ -11,6 +11,7 @@
 
 enum HCLGE_MBX_OPCODE {
        HCLGE_MBX_RESET = 0x01,         /* (VF -> PF) assert reset */
+       HCLGE_MBX_ASSERTING_RESET,      /* (PF -> VF) PF is asserting reset*/
        HCLGE_MBX_SET_UNICAST,          /* (VF -> PF) set UC addr */
        HCLGE_MBX_SET_MULTICAST,        /* (VF -> PF) set MC addr */
        HCLGE_MBX_SET_VLAN,             /* (VF -> PF) set VLAN */
@@ -57,6 +58,8 @@ enum hclge_mbx_vlan_cfg_subcode {
 
 #define HCLGE_MBX_MAX_MSG_SIZE 16
 #define HCLGE_MBX_MAX_RESP_DATA_SIZE   8
+#define HCLGE_MBX_RING_MAP_BASIC_MSG_NUM       3
+#define HCLGE_MBX_RING_NODE_VARIABLE_NUM       3
 
 struct hclgevf_mbx_resp_status {
        struct mutex mbx_mutex; /* protects against contending sync cmd resp */
@@ -83,6 +86,21 @@ struct hclge_mbx_pf_to_vf_cmd {
        u16 msg[8];
 };
 
+/* used by VF to store the received Async responses from PF */
+struct hclgevf_mbx_arq_ring {
+#define HCLGE_MBX_MAX_ARQ_MSG_SIZE     8
+#define HCLGE_MBX_MAX_ARQ_MSG_NUM      1024
+       struct hclgevf_dev *hdev;
+       u32 head;
+       u32 tail;
+       u32 count;
+       u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE];
+};
+
 #define hclge_mbx_ring_ptr_move_crq(crq) \
        (crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num)
+#define hclge_mbx_tail_ptr_move_arq(arq) \
+       (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
+#define hclge_mbx_head_ptr_move_arq(arq) \
+               (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
 #endif
index fd06bc78c58ec90b9619205e698f2972c6433cf7..37ec1b3286c6d38796b36b480f8a4f670d64f3cc 100644 (file)
@@ -118,6 +118,8 @@ enum hnae3_reset_notify_type {
 };
 
 enum hnae3_reset_type {
+       HNAE3_VF_RESET,
+       HNAE3_VF_FULL_RESET,
        HNAE3_FUNC_RESET,
        HNAE3_CORE_RESET,
        HNAE3_GLOBAL_RESET,
@@ -265,6 +267,8 @@ struct hnae3_ae_dev {
  *   Get tc size of handle
  * get_vector()
  *   Get vector number and vector information
+ * put_vector()
+ *   Put the vector in hdev
  * map_ring_to_vector()
  *   Map rings to vector
  * unmap_ring_from_vector()
@@ -336,7 +340,8 @@ struct hnae3_ae_ops {
                                   u32 *tx_usecs_high, u32 *rx_usecs_high);
 
        void (*get_mac_addr)(struct hnae3_handle *handle, u8 *p);
-       int (*set_mac_addr)(struct hnae3_handle *handle, void *p);
+       int (*set_mac_addr)(struct hnae3_handle *handle, void *p,
+                           bool is_first);
        int (*add_uc_addr)(struct hnae3_handle *handle,
                           const unsigned char *addr);
        int (*rm_uc_addr)(struct hnae3_handle *handle,
@@ -375,6 +380,7 @@ struct hnae3_ae_ops {
 
        int (*get_vector)(struct hnae3_handle *handle, u16 vector_num,
                          struct hnae3_vector_info *vector_info);
+       int (*put_vector)(struct hnae3_handle *handle, int vector_num);
        int (*map_ring_to_vector)(struct hnae3_handle *handle,
                                  int vector_num,
                                  struct hnae3_ring_chain_node *vr_chain);
@@ -396,8 +402,7 @@ struct hnae3_ae_ops {
        int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
                                  u16 vlan, u8 qos, __be16 proto);
        int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
-       void (*reset_event)(struct hnae3_handle *handle,
-                           enum hnae3_reset_type reset);
+       void (*reset_event)(struct hnae3_handle *handle);
        void (*get_channels)(struct hnae3_handle *handle,
                             struct ethtool_channels *ch);
        void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
@@ -407,6 +412,10 @@ struct hnae3_ae_ops {
                                 u32 *flowctrl_adv);
        int (*set_led_id)(struct hnae3_handle *handle,
                          enum ethtool_phys_id_state status);
+       void (*get_link_mode)(struct hnae3_handle *handle,
+                             unsigned long *supported,
+                             unsigned long *advertising);
+       void (*get_port_type)(struct hnae3_handle *handle, u8 *port_type);
 };
 
 struct hnae3_dcb_ops {
@@ -487,6 +496,9 @@ struct hnae3_handle {
        struct hnae3_ae_algo *ae_algo;  /* the class who provides this handle */
        u64 flags; /* Indicate the capabilities for this handle*/
 
+       unsigned long last_reset_time;
+       enum hnae3_reset_type reset_level;
+
        union {
                struct net_device *netdev; /* first member */
                struct hnae3_knic_private_info kinfo;
index 601b6295d3f82db1b63b2f88689dc8c7161cb333..40a3eb70629e14eaaa9d41b064a7c229c530ca0e 100644 (file)
@@ -168,8 +168,8 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
         * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing
         */
 
-       if (rl_reg > 0 && !tqp_vector->tx_group.gl_adapt_enable &&
-           !tqp_vector->rx_group.gl_adapt_enable)
+       if (rl_reg > 0 && !tqp_vector->tx_group.coal.gl_adapt_enable &&
+           !tqp_vector->rx_group.coal.gl_adapt_enable)
                /* According to the hardware, the range of rl_reg is
                 * 0-59 and the unit is 4.
                 */
@@ -205,23 +205,30 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector,
         */
 
        /* Default: enable interrupt coalescing self-adaptive and GL */
-       tqp_vector->tx_group.gl_adapt_enable = 1;
-       tqp_vector->rx_group.gl_adapt_enable = 1;
+       tqp_vector->tx_group.coal.gl_adapt_enable = 1;
+       tqp_vector->rx_group.coal.gl_adapt_enable = 1;
 
-       tqp_vector->tx_group.int_gl = HNS3_INT_GL_50K;
-       tqp_vector->rx_group.int_gl = HNS3_INT_GL_50K;
-
-       hns3_set_vector_coalesce_tx_gl(tqp_vector,
-                                      tqp_vector->tx_group.int_gl);
-       hns3_set_vector_coalesce_rx_gl(tqp_vector,
-                                      tqp_vector->rx_group.int_gl);
+       tqp_vector->tx_group.coal.int_gl = HNS3_INT_GL_50K;
+       tqp_vector->rx_group.coal.int_gl = HNS3_INT_GL_50K;
 
        /* Default: disable RL */
        h->kinfo.int_rl_setting = 0;
-       hns3_set_vector_coalesce_rl(tqp_vector, h->kinfo.int_rl_setting);
 
-       tqp_vector->rx_group.flow_level = HNS3_FLOW_LOW;
-       tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
+       tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
+       tqp_vector->rx_group.coal.flow_level = HNS3_FLOW_LOW;
+       tqp_vector->tx_group.coal.flow_level = HNS3_FLOW_LOW;
+}
+
+static void hns3_vector_gl_rl_init_hw(struct hns3_enet_tqp_vector *tqp_vector,
+                                     struct hns3_nic_priv *priv)
+{
+       struct hnae3_handle *h = priv->ae_handle;
+
+       hns3_set_vector_coalesce_tx_gl(tqp_vector,
+                                      tqp_vector->tx_group.coal.int_gl);
+       hns3_set_vector_coalesce_rx_gl(tqp_vector,
+                                      tqp_vector->rx_group.coal.int_gl);
+       hns3_set_vector_coalesce_rl(tqp_vector, h->kinfo.int_rl_setting);
 }
 
 static int hns3_nic_set_real_num_queue(struct net_device *netdev)
@@ -249,6 +256,16 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
        return 0;
 }
 
+static u16 hns3_get_max_available_channels(struct hnae3_handle *h)
+{
+       u16 free_tqps, max_rss_size, max_tqps;
+
+       h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
+       max_tqps = h->kinfo.num_tc * max_rss_size;
+
+       return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
+}
+
 static int hns3_nic_net_up(struct net_device *netdev)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -303,7 +320,7 @@ static int hns3_nic_net_open(struct net_device *netdev)
                return ret;
        }
 
-       priv->last_reset_time = jiffies;
+       priv->ae_handle->last_reset_time = jiffies;
        return 0;
 }
 
@@ -1104,7 +1121,7 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
        if (!mac_addr || !is_valid_ether_addr((const u8 *)mac_addr->sa_data))
                return -EADDRNOTAVAIL;
 
-       ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data);
+       ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data, false);
        if (ret) {
                netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret);
                return ret;
@@ -1388,11 +1405,15 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev,
                                __be16 proto, u16 vid)
 {
        struct hnae3_handle *h = hns3_get_handle(netdev);
+       struct hns3_nic_priv *priv = netdev_priv(netdev);
        int ret = -EIO;
 
        if (h->ae_algo->ops->set_vlan_filter)
                ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false);
 
+       if (!ret)
+               set_bit(vid, priv->active_vlans);
+
        return ret;
 }
 
@@ -1400,14 +1421,32 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
                                 __be16 proto, u16 vid)
 {
        struct hnae3_handle *h = hns3_get_handle(netdev);
+       struct hns3_nic_priv *priv = netdev_priv(netdev);
        int ret = -EIO;
 
        if (h->ae_algo->ops->set_vlan_filter)
                ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true);
 
+       if (!ret)
+               clear_bit(vid, priv->active_vlans);
+
        return ret;
 }
 
+static void hns3_restore_vlan(struct net_device *netdev)
+{
+       struct hns3_nic_priv *priv = netdev_priv(netdev);
+       u16 vid;
+       int ret;
+
+       for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
+               ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
+               if (ret)
+                       netdev_warn(netdev, "Restore vlan: %d filter, ret:%d\n",
+                                   vid, ret);
+       }
+}
+
 static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
                                u8 qos, __be16 vlan_proto)
 {
@@ -1504,7 +1543,6 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 static void hns3_nic_net_timeout(struct net_device *ndev)
 {
        struct hns3_nic_priv *priv = netdev_priv(ndev);
-       unsigned long last_reset_time = priv->last_reset_time;
        struct hnae3_handle *h = priv->ae_handle;
 
        if (!hns3_get_tx_timeo_queue_info(ndev))
@@ -1512,24 +1550,12 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
 
        priv->tx_timeout_count++;
 
-       /* This timeout is far away enough from last timeout,
-        * if timeout again,set the reset type to PF reset
-        */
-       if (time_after(jiffies, (last_reset_time + 20 * HZ)))
-               priv->reset_level = HNAE3_FUNC_RESET;
-
-       /* Don't do any new action before the next timeout */
-       else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo)))
+       if (time_before(jiffies, (h->last_reset_time + ndev->watchdog_timeo)))
                return;
 
-       priv->last_reset_time = jiffies;
-
+       /* request the reset */
        if (h->ae_algo->ops->reset_event)
-               h->ae_algo->ops->reset_event(h, priv->reset_level);
-
-       priv->reset_level++;
-       if (priv->reset_level > HNAE3_GLOBAL_RESET)
-               priv->reset_level = HNAE3_GLOBAL_RESET;
+               h->ae_algo->ops->reset_event(h);
 }
 
 static const struct net_device_ops hns3_nic_netdev_ops = {
@@ -2064,15 +2090,13 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
        desc = &ring->desc[ring->next_to_clean];
        size = le16_to_cpu(desc->rx.size);
 
-       if (twobufs) {
-               truesize = hnae_buf_size(ring);
-       } else {
-               truesize = ALIGN(size, L1_CACHE_BYTES);
+       truesize = hnae_buf_size(ring);
+
+       if (!twobufs)
                last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
-       }
 
        skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
-                       size - pull_len, truesize - pull_len);
+                       size - pull_len, truesize);
 
         /* Avoid re-using remote pages,flag default unreuse */
        if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()))
@@ -2369,20 +2393,20 @@ int hns3_clean_rx_ring(
 
 static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 {
-#define HNS3_RX_ULTRA_PACKET_RATE 40000
+       struct hns3_enet_tqp_vector *tqp_vector =
+                                       ring_group->ring->tqp_vector;
        enum hns3_flow_level_range new_flow_level;
-       struct hns3_enet_tqp_vector *tqp_vector;
-       int packets_per_secs;
-       int bytes_per_usecs;
+       int packets_per_msecs;
+       int bytes_per_msecs;
+       u32 time_passed_ms;
        u16 new_int_gl;
-       int usecs;
 
-       if (!ring_group->int_gl)
+       if (!ring_group->coal.int_gl || !tqp_vector->last_jiffies)
                return false;
 
        if (ring_group->total_packets == 0) {
-               ring_group->int_gl = HNS3_INT_GL_50K;
-               ring_group->flow_level = HNS3_FLOW_LOW;
+               ring_group->coal.int_gl = HNS3_INT_GL_50K;
+               ring_group->coal.flow_level = HNS3_FLOW_LOW;
                return true;
        }
 
@@ -2392,35 +2416,46 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
         * 20-1249MB/s high      (18000 ints/s)
         * > 40000pps  ultra     (8000 ints/s)
         */
-       new_flow_level = ring_group->flow_level;
-       new_int_gl = ring_group->int_gl;
-       tqp_vector = ring_group->ring->tqp_vector;
-       usecs = (ring_group->int_gl << 1);
-       bytes_per_usecs = ring_group->total_bytes / usecs;
-       /* 1000000 microseconds */
-       packets_per_secs = ring_group->total_packets * 1000000 / usecs;
+       new_flow_level = ring_group->coal.flow_level;
+       new_int_gl = ring_group->coal.int_gl;
+       time_passed_ms =
+               jiffies_to_msecs(jiffies - tqp_vector->last_jiffies);
+
+       if (!time_passed_ms)
+               return false;
+
+       do_div(ring_group->total_packets, time_passed_ms);
+       packets_per_msecs = ring_group->total_packets;
+
+       do_div(ring_group->total_bytes, time_passed_ms);
+       bytes_per_msecs = ring_group->total_bytes;
+
+#define HNS3_RX_LOW_BYTE_RATE 10000
+#define HNS3_RX_MID_BYTE_RATE 20000
 
        switch (new_flow_level) {
        case HNS3_FLOW_LOW:
-               if (bytes_per_usecs > 10)
+               if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE)
                        new_flow_level = HNS3_FLOW_MID;
                break;
        case HNS3_FLOW_MID:
-               if (bytes_per_usecs > 20)
+               if (bytes_per_msecs > HNS3_RX_MID_BYTE_RATE)
                        new_flow_level = HNS3_FLOW_HIGH;
-               else if (bytes_per_usecs <= 10)
+               else if (bytes_per_msecs <= HNS3_RX_LOW_BYTE_RATE)
                        new_flow_level = HNS3_FLOW_LOW;
                break;
        case HNS3_FLOW_HIGH:
        case HNS3_FLOW_ULTRA:
        default:
-               if (bytes_per_usecs <= 20)
+               if (bytes_per_msecs <= HNS3_RX_MID_BYTE_RATE)
                        new_flow_level = HNS3_FLOW_MID;
                break;
        }
 
-       if ((packets_per_secs > HNS3_RX_ULTRA_PACKET_RATE) &&
-           (&tqp_vector->rx_group == ring_group))
+#define HNS3_RX_ULTRA_PACKET_RATE 40
+
+       if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE &&
+           &tqp_vector->rx_group == ring_group)
                new_flow_level = HNS3_FLOW_ULTRA;
 
        switch (new_flow_level) {
@@ -2442,9 +2477,9 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 
        ring_group->total_bytes = 0;
        ring_group->total_packets = 0;
-       ring_group->flow_level = new_flow_level;
-       if (new_int_gl != ring_group->int_gl) {
-               ring_group->int_gl = new_int_gl;
+       ring_group->coal.flow_level = new_flow_level;
+       if (new_int_gl != ring_group->coal.int_gl) {
+               ring_group->coal.int_gl = new_int_gl;
                return true;
        }
        return false;
@@ -2456,19 +2491,27 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
        struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group;
        bool rx_update, tx_update;
 
-       if (rx_group->gl_adapt_enable) {
+       if (tqp_vector->int_adapt_down > 0) {
+               tqp_vector->int_adapt_down--;
+               return;
+       }
+
+       if (rx_group->coal.gl_adapt_enable) {
                rx_update = hns3_get_new_int_gl(rx_group);
                if (rx_update)
                        hns3_set_vector_coalesce_rx_gl(tqp_vector,
-                                                      rx_group->int_gl);
+                                                      rx_group->coal.int_gl);
        }
 
-       if (tx_group->gl_adapt_enable) {
+       if (tx_group->coal.gl_adapt_enable) {
                tx_update = hns3_get_new_int_gl(&tqp_vector->tx_group);
                if (tx_update)
                        hns3_set_vector_coalesce_tx_gl(tqp_vector,
-                                                      tx_group->int_gl);
+                                                      tx_group->coal.int_gl);
        }
+
+       tqp_vector->last_jiffies = jiffies;
+       tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
 }
 
 static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
@@ -2615,32 +2658,18 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
        struct hnae3_ring_chain_node vector_ring_chain;
        struct hnae3_handle *h = priv->ae_handle;
        struct hns3_enet_tqp_vector *tqp_vector;
-       struct hnae3_vector_info *vector;
-       struct pci_dev *pdev = h->pdev;
-       u16 tqp_num = h->kinfo.num_tqps;
-       u16 vector_num;
        int ret = 0;
        u16 i;
 
-       /* RSS size, cpu online and vector_num should be the same */
-       /* Should consider 2p/4p later */
-       vector_num = min_t(u16, num_online_cpus(), tqp_num);
-       vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector),
-                             GFP_KERNEL);
-       if (!vector)
-               return -ENOMEM;
-
-       vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
-
-       priv->vector_num = vector_num;
-       priv->tqp_vector = (struct hns3_enet_tqp_vector *)
-               devm_kcalloc(&pdev->dev, vector_num, sizeof(*priv->tqp_vector),
-                            GFP_KERNEL);
-       if (!priv->tqp_vector)
-               return -ENOMEM;
+       for (i = 0; i < priv->vector_num; i++) {
+               tqp_vector = &priv->tqp_vector[i];
+               hns3_vector_gl_rl_init_hw(tqp_vector, priv);
+               tqp_vector->num_tqps = 0;
+       }
 
-       for (i = 0; i < tqp_num; i++) {
-               u16 vector_i = i % vector_num;
+       for (i = 0; i < h->kinfo.num_tqps; i++) {
+               u16 vector_i = i % priv->vector_num;
+               u16 tqp_num = h->kinfo.num_tqps;
 
                tqp_vector = &priv->tqp_vector[vector_i];
 
@@ -2650,52 +2679,94 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
                hns3_add_ring_to_group(&tqp_vector->rx_group,
                                       priv->ring_data[i + tqp_num].ring);
 
-               tqp_vector->idx = vector_i;
-               tqp_vector->mask_addr = vector[vector_i].io_addr;
-               tqp_vector->vector_irq = vector[vector_i].vector;
-               tqp_vector->num_tqps++;
-
                priv->ring_data[i].ring->tqp_vector = tqp_vector;
                priv->ring_data[i + tqp_num].ring->tqp_vector = tqp_vector;
+               tqp_vector->num_tqps++;
        }
 
-       for (i = 0; i < vector_num; i++) {
+       for (i = 0; i < priv->vector_num; i++) {
                tqp_vector = &priv->tqp_vector[i];
 
                tqp_vector->rx_group.total_bytes = 0;
                tqp_vector->rx_group.total_packets = 0;
                tqp_vector->tx_group.total_bytes = 0;
                tqp_vector->tx_group.total_packets = 0;
-               hns3_vector_gl_rl_init(tqp_vector, priv);
                tqp_vector->handle = h;
 
                ret = hns3_get_vector_ring_chain(tqp_vector,
                                                 &vector_ring_chain);
                if (ret)
-                       goto out;
+                       return ret;
 
                ret = h->ae_algo->ops->map_ring_to_vector(h,
                        tqp_vector->vector_irq, &vector_ring_chain);
-               if (ret)
-                       goto out;
 
                hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
+               if (ret)
+                       return ret;
+
                netif_napi_add(priv->netdev, &tqp_vector->napi,
                               hns3_nic_common_poll, NAPI_POLL_WEIGHT);
        }
 
+       return 0;
+}
+
+static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
+{
+       struct hnae3_handle *h = priv->ae_handle;
+       struct hns3_enet_tqp_vector *tqp_vector;
+       struct hnae3_vector_info *vector;
+       struct pci_dev *pdev = h->pdev;
+       u16 tqp_num = h->kinfo.num_tqps;
+       u16 vector_num;
+       int ret = 0;
+       u16 i;
+
+       /* RSS size, cpu online and vector_num should be the same */
+       /* Should consider 2p/4p later */
+       vector_num = min_t(u16, num_online_cpus(), tqp_num);
+       vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector),
+                             GFP_KERNEL);
+       if (!vector)
+               return -ENOMEM;
+
+       vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
+
+       priv->vector_num = vector_num;
+       priv->tqp_vector = (struct hns3_enet_tqp_vector *)
+               devm_kcalloc(&pdev->dev, vector_num, sizeof(*priv->tqp_vector),
+                            GFP_KERNEL);
+       if (!priv->tqp_vector) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       for (i = 0; i < priv->vector_num; i++) {
+               tqp_vector = &priv->tqp_vector[i];
+               tqp_vector->idx = i;
+               tqp_vector->mask_addr = vector[i].io_addr;
+               tqp_vector->vector_irq = vector[i].vector;
+               hns3_vector_gl_rl_init(tqp_vector, priv);
+       }
+
 out:
        devm_kfree(&pdev->dev, vector);
        return ret;
 }
 
+static void hns3_clear_ring_group(struct hns3_enet_ring_group *group)
+{
+       group->ring = NULL;
+       group->count = 0;
+}
+
 static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 {
        struct hnae3_ring_chain_node vector_ring_chain;
        struct hnae3_handle *h = priv->ae_handle;
        struct hns3_enet_tqp_vector *tqp_vector;
-       struct pci_dev *pdev = h->pdev;
        int i, ret;
 
        for (i = 0; i < priv->vector_num; i++) {
@@ -2711,6 +2782,10 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
                if (ret)
                        return ret;
 
+               ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
+               if (ret)
+                       return ret;
+
                hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
                if (priv->tqp_vector[i].irq_init_flag == HNS3_VECTOR_INITED) {
@@ -2722,12 +2797,30 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
                }
 
                priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
-
+               hns3_clear_ring_group(&tqp_vector->rx_group);
+               hns3_clear_ring_group(&tqp_vector->tx_group);
                netif_napi_del(&priv->tqp_vector[i].napi);
        }
 
-       devm_kfree(&pdev->dev, priv->tqp_vector);
+       return 0;
+}
 
+static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
+{
+       struct hnae3_handle *h = priv->ae_handle;
+       struct pci_dev *pdev = h->pdev;
+       int i, ret;
+
+       for (i = 0; i < priv->vector_num; i++) {
+               struct hns3_enet_tqp_vector *tqp_vector;
+
+               tqp_vector = &priv->tqp_vector[i];
+               ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
+               if (ret)
+                       return ret;
+       }
+
+       devm_kfree(&pdev->dev, priv->tqp_vector);
        return 0;
 }
 
@@ -2957,13 +3050,8 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
                        h->ae_algo->ops->reset_queue(h, i);
 
                hns3_fini_ring(priv->ring_data[i].ring);
-               devm_kfree(priv->dev, priv->ring_data[i].ring);
                hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
-               devm_kfree(priv->dev,
-                          priv->ring_data[i + h->kinfo.num_tqps].ring);
        }
-       devm_kfree(priv->dev, priv->ring_data);
-
        return 0;
 }
 
@@ -2987,7 +3075,7 @@ static void hns3_init_mac_addr(struct net_device *netdev)
        }
 
        if (h->ae_algo->ops->set_mac_addr)
-               h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
+               h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
 
 }
 
@@ -3013,7 +3101,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
        int ret;
 
        netdev = alloc_etherdev_mq(sizeof(struct hns3_nic_priv),
-                                  handle->kinfo.num_tqps);
+                                  hns3_get_max_available_channels(handle));
        if (!netdev)
                return -ENOMEM;
 
@@ -3021,8 +3109,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
        priv->dev = &pdev->dev;
        priv->netdev = netdev;
        priv->ae_handle = handle;
-       priv->last_reset_time = jiffies;
-       priv->reset_level = HNAE3_FUNC_RESET;
+       priv->ae_handle->reset_level = HNAE3_NONE_RESET;
+       priv->ae_handle->last_reset_time = jiffies;
        priv->tx_timeout_count = 0;
 
        handle->kinfo.netdev = netdev;
@@ -3048,6 +3136,12 @@ static int hns3_client_init(struct hnae3_handle *handle)
                goto out_get_ring_cfg;
        }
 
+       ret = hns3_nic_alloc_vector_data(priv);
+       if (ret) {
+               ret = -ENOMEM;
+               goto out_alloc_vector_data;
+       }
+
        ret = hns3_nic_init_vector_data(priv);
        if (ret) {
                ret = -ENOMEM;
@@ -3076,8 +3170,10 @@ static int hns3_client_init(struct hnae3_handle *handle)
 out_reg_netdev_fail:
 out_init_ring_data:
        (void)hns3_nic_uninit_vector_data(priv);
-       priv->ring_data = NULL;
 out_init_vector_data:
+       hns3_nic_dealloc_vector_data(priv);
+out_alloc_vector_data:
+       priv->ring_data = NULL;
 out_get_ring_cfg:
        priv->ae_handle = NULL;
        free_netdev(netdev);
@@ -3097,10 +3193,16 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
        if (ret)
                netdev_err(netdev, "uninit vector error\n");
 
+       ret = hns3_nic_dealloc_vector_data(priv);
+       if (ret)
+               netdev_err(netdev, "dealloc vector error\n");
+
        ret = hns3_uninit_all_ring(priv);
        if (ret)
                netdev_err(netdev, "uninit ring error\n");
 
+       hns3_put_ring_config(priv);
+
        priv->ring_data = NULL;
 
        free_netdev(netdev);
@@ -3240,7 +3342,6 @@ static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
 {
        struct hnae3_knic_private_info *kinfo = &handle->kinfo;
-       struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
        int ret = 0;
 
        if (netif_running(kinfo->netdev)) {
@@ -3250,8 +3351,7 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
                                   "hns net up fail, ret=%d!\n", ret);
                        return ret;
                }
-
-               priv->last_reset_time = jiffies;
+               handle->last_reset_time = jiffies;
        }
 
        return ret;
@@ -3263,11 +3363,14 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        int ret;
 
-       priv->reset_level = 1;
        hns3_init_mac_addr(netdev);
        hns3_nic_set_rx_mode(netdev);
        hns3_recover_hw_addr(netdev);
 
+       /* Hardware table is only clear when pf resets */
+       if (!(handle->flags & HNAE3_SUPPORT_VF))
+               hns3_restore_vlan(netdev);
+
        /* Carrier off reporting is important to ethtool even BEFORE open */
        netif_carrier_off(netdev);
 
@@ -3306,6 +3409,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
        if (ret)
                netdev_err(netdev, "uninit ring error\n");
 
+       hns3_put_ring_config(priv);
+
        priv->ring_data = NULL;
 
        return ret;
@@ -3336,18 +3441,24 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
        return ret;
 }
 
-static u16 hns3_get_max_available_channels(struct net_device *netdev)
+static void hns3_restore_coal(struct hns3_nic_priv *priv,
+                             struct hns3_enet_coalesce *tx,
+                             struct hns3_enet_coalesce *rx)
 {
-       struct hnae3_handle *h = hns3_get_handle(netdev);
-       u16 free_tqps, max_rss_size, max_tqps;
-
-       h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
-       max_tqps = h->kinfo.num_tc * max_rss_size;
+       u16 vector_num = priv->vector_num;
+       int i;
 
-       return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
+       for (i = 0; i < vector_num; i++) {
+               memcpy(&priv->tqp_vector[i].tx_group.coal, tx,
+                      sizeof(struct hns3_enet_coalesce));
+               memcpy(&priv->tqp_vector[i].rx_group.coal, rx,
+                      sizeof(struct hns3_enet_coalesce));
+       }
 }
 
-static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
+static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num,
+                              struct hns3_enet_coalesce *tx,
+                              struct hns3_enet_coalesce *rx)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -3361,6 +3472,12 @@ static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
        if (ret)
                return ret;
 
+       ret = hns3_nic_alloc_vector_data(priv);
+       if (ret)
+               goto err_alloc_vector;
+
+       hns3_restore_coal(priv, tx, rx);
+
        ret = hns3_nic_init_vector_data(priv);
        if (ret)
                goto err_uninit_vector;
@@ -3375,6 +3492,8 @@ static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
        hns3_put_ring_config(priv);
 err_uninit_vector:
        hns3_nic_uninit_vector_data(priv);
+err_alloc_vector:
+       hns3_nic_dealloc_vector_data(priv);
        return ret;
 }
 
@@ -3389,6 +3508,7 @@ int hns3_set_channels(struct net_device *netdev,
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        struct hnae3_handle *h = hns3_get_handle(netdev);
        struct hnae3_knic_private_info *kinfo = &h->kinfo;
+       struct hns3_enet_coalesce tx_coal, rx_coal;
        bool if_running = netif_running(netdev);
        u32 new_tqp_num = ch->combined_count;
        u16 org_tqp_num;
@@ -3397,12 +3517,12 @@ int hns3_set_channels(struct net_device *netdev,
        if (ch->rx_count || ch->tx_count)
                return -EINVAL;
 
-       if (new_tqp_num > hns3_get_max_available_channels(netdev) ||
+       if (new_tqp_num > hns3_get_max_available_channels(h) ||
            new_tqp_num < kinfo->num_tc) {
                dev_err(&netdev->dev,
                        "Change tqps fail, the tqp range is from %d to %d",
                        kinfo->num_tc,
-                       hns3_get_max_available_channels(netdev));
+                       hns3_get_max_available_channels(h));
                return -EINVAL;
        }
 
@@ -3411,7 +3531,7 @@ int hns3_set_channels(struct net_device *netdev,
                return 0;
 
        if (if_running)
-               dev_close(netdev);
+               hns3_nic_net_stop(netdev);
 
        hns3_clear_all_ring(h);
 
@@ -3422,12 +3542,26 @@ int hns3_set_channels(struct net_device *netdev,
                goto open_netdev;
        }
 
+       /* Changing the tqp num may also change the vector num,
+        * ethtool only support setting and querying one coal
+        * configuation for now, so save the vector 0' coal
+        * configuation here in order to restore it.
+        */
+       memcpy(&tx_coal, &priv->tqp_vector[0].tx_group.coal,
+              sizeof(struct hns3_enet_coalesce));
+       memcpy(&rx_coal, &priv->tqp_vector[0].rx_group.coal,
+              sizeof(struct hns3_enet_coalesce));
+
+       hns3_nic_dealloc_vector_data(priv);
+
        hns3_uninit_all_ring(priv);
+       hns3_put_ring_config(priv);
 
        org_tqp_num = h->kinfo.num_tqps;
-       ret = hns3_modify_tqp_num(netdev, new_tqp_num);
+       ret = hns3_modify_tqp_num(netdev, new_tqp_num, &tx_coal, &rx_coal);
        if (ret) {
-               ret = hns3_modify_tqp_num(netdev, org_tqp_num);
+               ret = hns3_modify_tqp_num(netdev, org_tqp_num,
+                                         &tx_coal, &rx_coal);
                if (ret) {
                        /* If revert to old tqp failed, fatal error occurred */
                        dev_err(&netdev->dev,
@@ -3440,7 +3574,7 @@ int hns3_set_channels(struct net_device *netdev,
 
 open_netdev:
        if (if_running)
-               dev_open(netdev);
+               hns3_nic_net_open(netdev);
 
        return ret;
 }
index 213f501b30bb82397eda7da638e0860a900b5026..9e4cfbbf8dcd97b1b52619b66e4ba45741a8b4ab 100644 (file)
@@ -10,6 +10,8 @@
 #ifndef __HNS3_ENET_H
 #define __HNS3_ENET_H
 
+#include <linux/if_vlan.h>
+
 #include "hnae3.h"
 
 extern const char hns3_driver_version[];
@@ -460,15 +462,21 @@ enum hns3_link_mode_bits {
 #define HNS3_INT_RL_MAX                        0x00EC
 #define HNS3_INT_RL_ENABLE_MASK                0x40
 
+#define HNS3_INT_ADAPT_DOWN_START      100
+
+struct hns3_enet_coalesce {
+       u16 int_gl;
+       u8 gl_adapt_enable;
+       enum hns3_flow_level_range flow_level;
+};
+
 struct hns3_enet_ring_group {
        /* array of pointers to rings */
        struct hns3_enet_ring *ring;
        u64 total_bytes;        /* total bytes processed this group */
        u64 total_packets;      /* total packets processed this group */
        u16 count;
-       enum hns3_flow_level_range flow_level;
-       u16 int_gl;
-       u8 gl_adapt_enable;
+       struct hns3_enet_coalesce coal;
 };
 
 struct hns3_enet_tqp_vector {
@@ -491,6 +499,7 @@ struct hns3_enet_tqp_vector {
 
        /* when 0 should adjust interrupt coalesce parameter */
        u8 int_adapt_down;
+       unsigned long last_jiffies;
 } ____cacheline_internodealigned_in_smp;
 
 enum hns3_udp_tnl_type {
@@ -523,8 +532,6 @@ struct hns3_nic_priv {
        /* The most recently read link state */
        int link;
        u64 tx_timeout_count;
-       enum hnae3_reset_type reset_level;
-       unsigned long last_reset_time;
 
        unsigned long state;
 
@@ -535,6 +542,7 @@ struct hns3_nic_priv {
        struct notifier_block notifier_block;
        /* Vxlan/Geneve information */
        struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX];
+       unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 };
 
 union l3_hdr_info {
index b034c7f24eda69adcbb29a5a842c64276e2c89a5..9d07116a4426a351ad598c6c52c2a19a189bfdbc 100644 (file)
@@ -74,19 +74,6 @@ struct hns3_link_mode_mapping {
        u32 ethtool_link_mode;
 };
 
-static const struct hns3_link_mode_mapping hns3_lm_map[] = {
-       {HNS3_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
-       {HNS3_LM_AUTONEG_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
-       {HNS3_LM_TP_BIT, ETHTOOL_LINK_MODE_TP_BIT},
-       {HNS3_LM_PAUSE_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
-       {HNS3_LM_BACKPLANE_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
-       {HNS3_LM_10BASET_HALF_BIT, ETHTOOL_LINK_MODE_10baseT_Half_BIT},
-       {HNS3_LM_10BASET_FULL_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT},
-       {HNS3_LM_100BASET_HALF_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT},
-       {HNS3_LM_100BASET_FULL_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT},
-       {HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
-};
-
 static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop)
 {
        struct hnae3_handle *h = hns3_get_handle(ndev);
@@ -309,6 +296,9 @@ static void hns3_self_test(struct net_device *ndev,
        struct hnae3_handle *h = priv->ae_handle;
        int st_param[HNS3_SELF_TEST_TPYE_NUM][2];
        bool if_running = netif_running(ndev);
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+       bool dis_vlan_filter;
+#endif
        int test_index = 0;
        u32 i;
 
@@ -323,6 +313,14 @@ static void hns3_self_test(struct net_device *ndev,
        if (if_running)
                dev_close(ndev);
 
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+       /* Disable the vlan filter for selftest does not support it */
+       dis_vlan_filter = (ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+                               h->ae_algo->ops->enable_vlan_filter;
+       if (dis_vlan_filter)
+               h->ae_algo->ops->enable_vlan_filter(h, false);
+#endif
+
        set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
        for (i = 0; i < HNS3_SELF_TEST_TPYE_NUM; i++) {
@@ -345,28 +343,15 @@ static void hns3_self_test(struct net_device *ndev,
 
        clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+       if (dis_vlan_filter)
+               h->ae_algo->ops->enable_vlan_filter(h, true);
+#endif
+
        if (if_running)
                dev_open(ndev);
 }
 
-static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
-                                 bool is_advertised)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(hns3_lm_map); i++) {
-               if (!(caps & hns3_lm_map[i].hns3_link_mode))
-                       continue;
-
-               if (is_advertised)
-                       __set_bit(hns3_lm_map[i].ethtool_link_mode,
-                                 cmd->link_modes.advertising);
-               else
-                       __set_bit(hns3_lm_map[i].ethtool_link_mode,
-                                 cmd->link_modes.supported);
-       }
-}
-
 static int hns3_get_sset_count(struct net_device *netdev, int stringset)
 {
        struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -578,18 +563,19 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 {
        struct hnae3_handle *h = hns3_get_handle(netdev);
        u32 flowctrl_adv = 0;
-       u32 supported_caps;
-       u32 advertised_caps;
-       u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN;
        u8 link_stat;
 
        if (!h->ae_algo || !h->ae_algo->ops)
                return -EOPNOTSUPP;
 
        /* 1.auto_neg & speed & duplex from cmd */
-       if (netdev->phydev)
+       if (netdev->phydev) {
                phy_ethtool_ksettings_get(netdev->phydev, cmd);
-       else if (h->ae_algo->ops->get_ksettings_an_result)
+
+               return 0;
+       }
+
+       if (h->ae_algo->ops->get_ksettings_an_result)
                h->ae_algo->ops->get_ksettings_an_result(h,
                                                         &cmd->base.autoneg,
                                                         &cmd->base.speed,
@@ -603,62 +589,16 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
                cmd->base.duplex = DUPLEX_UNKNOWN;
        }
 
-       /* 2.media_type get from bios parameter block */
-       if (h->ae_algo->ops->get_media_type) {
-               h->ae_algo->ops->get_media_type(h, &media_type);
-
-               switch (media_type) {
-               case HNAE3_MEDIA_TYPE_FIBER:
-                       cmd->base.port = PORT_FIBRE;
-                       supported_caps = HNS3_LM_FIBRE_BIT |
-                                        HNS3_LM_AUTONEG_BIT |
-                                        HNS3_LM_PAUSE_BIT |
-                                        HNS3_LM_1000BASET_FULL_BIT;
-
-                       advertised_caps = supported_caps;
-                       break;
-               case HNAE3_MEDIA_TYPE_COPPER:
-                       cmd->base.port = PORT_TP;
-                       supported_caps = HNS3_LM_TP_BIT |
-                                        HNS3_LM_AUTONEG_BIT |
-                                        HNS3_LM_PAUSE_BIT |
-                                        HNS3_LM_1000BASET_FULL_BIT |
-                                        HNS3_LM_100BASET_FULL_BIT |
-                                        HNS3_LM_100BASET_HALF_BIT |
-                                        HNS3_LM_10BASET_FULL_BIT |
-                                        HNS3_LM_10BASET_HALF_BIT;
-                       advertised_caps = supported_caps;
-                       break;
-               case HNAE3_MEDIA_TYPE_BACKPLANE:
-                       cmd->base.port = PORT_NONE;
-                       supported_caps = HNS3_LM_BACKPLANE_BIT |
-                                        HNS3_LM_PAUSE_BIT |
-                                        HNS3_LM_AUTONEG_BIT |
-                                        HNS3_LM_1000BASET_FULL_BIT |
-                                        HNS3_LM_100BASET_FULL_BIT |
-                                        HNS3_LM_100BASET_HALF_BIT |
-                                        HNS3_LM_10BASET_FULL_BIT |
-                                        HNS3_LM_10BASET_HALF_BIT;
-
-                       advertised_caps = supported_caps;
-                       break;
-               case HNAE3_MEDIA_TYPE_UNKNOWN:
-               default:
-                       cmd->base.port = PORT_OTHER;
-                       supported_caps = 0;
-                       advertised_caps = 0;
-                       break;
-               }
-
-               if (!cmd->base.autoneg)
-                       advertised_caps &= ~HNS3_LM_AUTONEG_BIT;
+       /* 2.get link mode and port type*/
+       if (h->ae_algo->ops->get_link_mode)
+               h->ae_algo->ops->get_link_mode(h,
+                                              cmd->link_modes.supported,
+                                              cmd->link_modes.advertising);
 
-               advertised_caps &= ~HNS3_LM_PAUSE_BIT;
-
-               /* now, map driver link modes to ethtool link modes */
-               hns3_driv_to_eth_caps(supported_caps, cmd, false);
-               hns3_driv_to_eth_caps(advertised_caps, cmd, true);
-       }
+       cmd->base.port = PORT_NONE;
+       if (h->ae_algo->ops->get_port_type)
+               h->ae_algo->ops->get_port_type(h,
+                                              &cmd->base.port);
 
        /* 3.mdix_ctrl&mdix get from phy reg */
        if (h->ae_algo->ops->get_mdix_mode)
@@ -905,11 +845,13 @@ static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue,
        tx_vector = priv->ring_data[queue].ring->tqp_vector;
        rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
 
-       cmd->use_adaptive_tx_coalesce = tx_vector->tx_group.gl_adapt_enable;
-       cmd->use_adaptive_rx_coalesce = rx_vector->rx_group.gl_adapt_enable;
+       cmd->use_adaptive_tx_coalesce =
+                       tx_vector->tx_group.coal.gl_adapt_enable;
+       cmd->use_adaptive_rx_coalesce =
+                       rx_vector->rx_group.coal.gl_adapt_enable;
 
-       cmd->tx_coalesce_usecs = tx_vector->tx_group.int_gl;
-       cmd->rx_coalesce_usecs = rx_vector->rx_group.int_gl;
+       cmd->tx_coalesce_usecs = tx_vector->tx_group.coal.int_gl;
+       cmd->rx_coalesce_usecs = rx_vector->rx_group.coal.int_gl;
 
        cmd->tx_coalesce_usecs_high = h->kinfo.int_rl_setting;
        cmd->rx_coalesce_usecs_high = h->kinfo.int_rl_setting;
@@ -1029,14 +971,18 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev,
        tx_vector = priv->ring_data[queue].ring->tqp_vector;
        rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
 
-       tx_vector->tx_group.gl_adapt_enable = cmd->use_adaptive_tx_coalesce;
-       rx_vector->rx_group.gl_adapt_enable = cmd->use_adaptive_rx_coalesce;
+       tx_vector->tx_group.coal.gl_adapt_enable =
+                               cmd->use_adaptive_tx_coalesce;
+       rx_vector->rx_group.coal.gl_adapt_enable =
+                               cmd->use_adaptive_rx_coalesce;
 
-       tx_vector->tx_group.int_gl = cmd->tx_coalesce_usecs;
-       rx_vector->rx_group.int_gl = cmd->rx_coalesce_usecs;
+       tx_vector->tx_group.coal.int_gl = cmd->tx_coalesce_usecs;
+       rx_vector->rx_group.coal.int_gl = cmd->rx_coalesce_usecs;
 
-       hns3_set_vector_coalesce_tx_gl(tx_vector, tx_vector->tx_group.int_gl);
-       hns3_set_vector_coalesce_rx_gl(rx_vector, rx_vector->rx_group.int_gl);
+       hns3_set_vector_coalesce_tx_gl(tx_vector,
+                                      tx_vector->tx_group.coal.int_gl);
+       hns3_set_vector_coalesce_rx_gl(rx_vector,
+                                      rx_vector->rx_group.coal.int_gl);
 
        hns3_set_vector_coalesce_rl(tx_vector, h->kinfo.int_rl_setting);
        hns3_set_vector_coalesce_rl(rx_vector, h->kinfo.int_rl_setting);
@@ -1111,6 +1057,7 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
        .get_channels = hns3_get_channels,
        .get_coalesce = hns3_get_coalesce,
        .set_coalesce = hns3_set_coalesce,
+       .get_link = hns3_get_link,
 };
 
 static const struct ethtool_ops hns3_ethtool_ops = {
index 3fd10a6bec5358cb1e44f1454ab7aa29257947a4..ee3cbac6dfaa88408b70712bf2437357f219632d 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/types.h>
 #include <linux/io.h>
 
-#define HCLGE_CMDQ_TX_TIMEOUT          1000
+#define HCLGE_CMDQ_TX_TIMEOUT          30000
 
 struct hclge_dev;
 struct hclge_desc {
@@ -414,6 +414,8 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_DEFAULT_SPEED_M      GENMASK(23, 16)
 #define HCLGE_CFG_RSS_SIZE_S   24
 #define HCLGE_CFG_RSS_SIZE_M   GENMASK(31, 24)
+#define HCLGE_CFG_SPEED_ABILITY_S      0
+#define HCLGE_CFG_SPEED_ABILITY_M      GENMASK(7, 0)
 
 struct hclge_cfg_param_cmd {
        __le32 offset;
index 5018d66331337647515dd15cc35d422d1f242780..955f0e3d5c954e61b96962fe09170a9175c9bfb1 100644 (file)
@@ -144,6 +144,8 @@ static int hclge_map_update(struct hnae3_handle *h)
        if (ret)
                return ret;
 
+       hclge_rss_indir_init_cfg(hdev);
+
        return hclge_rss_init_hw(hdev);
 }
 
@@ -203,9 +205,11 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
 
 static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 {
+       u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC];
        struct hclge_vport *vport = hclge_get_vport(h);
        struct hclge_dev *hdev = vport->back;
        u8 i, j, pfc_map, *prio_tc;
+       int ret;
 
        memset(pfc, 0, sizeof(*pfc));
        pfc->pfc_cap = hdev->pfc_max;
@@ -220,6 +224,18 @@ static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
                }
        }
 
+       ret = hclge_pfc_tx_stats_get(hdev, requests);
+       if (ret)
+               return ret;
+
+       ret = hclge_pfc_rx_stats_get(hdev, indications);
+       if (ret)
+               return ret;
+
+       for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+               pfc->requests[i] = requests[i];
+               pfc->indications[i] = indications[i];
+       }
        return 0;
 }
 
index 32bc6f68e2974762d9eb6f2597d976a59f833322..bede4117bad9dff1100bdc232393d94df8abf103 100644 (file)
@@ -55,6 +55,8 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
        {0, }
 };
 
+MODULE_DEVICE_TABLE(pci, ae_algo_pci_tbl);
+
 static const char hns3_nic_test_strs[][ETH_GSTRING_LEN] = {
        "Mac    Loopback test",
        "Serdes Loopback test",
@@ -1024,6 +1026,45 @@ static int hclge_parse_speed(int speed_cmd, int *speed)
        return 0;
 }
 
+static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
+                                       u8 speed_ability)
+{
+       unsigned long *supported = hdev->hw.mac.supported;
+
+       if (speed_ability & HCLGE_SUPPORT_1G_BIT)
+               set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+                       supported);
+
+       if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+               set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+                       supported);
+
+       if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+               set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+                       supported);
+
+       if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+               set_bit(ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+                       supported);
+
+       if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+               set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+                       supported);
+
+       set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
+       set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+}
+
+static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
+{
+       u8 media_type = hdev->hw.mac.media_type;
+
+       if (media_type != HNAE3_MEDIA_TYPE_FIBER)
+               return;
+
+       hclge_parse_fiber_link_mode(hdev, speed_ability);
+}
+
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
        struct hclge_cfg_param_cmd *req;
@@ -1072,6 +1113,10 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 
        req = (struct hclge_cfg_param_cmd *)desc[1].data;
        cfg->numa_node_map = __le32_to_cpu(req->param[0]);
+
+       cfg->speed_ability = hnae_get_field(__le32_to_cpu(req->param[1]),
+                                           HCLGE_CFG_SPEED_ABILITY_M,
+                                           HCLGE_CFG_SPEED_ABILITY_S);
 }
 
 /* hclge_get_cfg: query the static parameter from flash
@@ -1160,6 +1205,8 @@ static int hclge_configure(struct hclge_dev *hdev)
                return ret;
        }
 
+       hclge_parse_link_mode(hdev, cfg.speed_ability);
+
        if ((hdev->tc_max > HNAE3_MAX_TC) ||
            (hdev->tc_max < 1)) {
                dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
@@ -2702,7 +2749,7 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
        return 0;
 }
 
-static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
 {
        struct hclge_desc desc;
        struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data;
@@ -2798,27 +2845,31 @@ static void hclge_reset(struct hclge_dev *hdev)
        hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 }
 
-static void hclge_reset_event(struct hnae3_handle *handle,
-                             enum hnae3_reset_type reset)
+static void hclge_reset_event(struct hnae3_handle *handle)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_dev *hdev = vport->back;
 
-       dev_info(&hdev->pdev->dev,
-                "Receive reset event , reset_type is %d", reset);
+       /* check if this is a new reset request and we are not here just because
+        * last reset attempt did not succeed and watchdog hit us again. We will
+        * know this if last reset request did not occur very recently (watchdog
+        * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
+        * In case of new request we reset the "reset level" to PF reset.
+        */
+       if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
+               handle->reset_level = HNAE3_FUNC_RESET;
 
-       switch (reset) {
-       case HNAE3_FUNC_RESET:
-       case HNAE3_CORE_RESET:
-       case HNAE3_GLOBAL_RESET:
-               /* request reset & schedule reset task */
-               set_bit(reset, &hdev->reset_request);
-               hclge_reset_task_schedule(hdev);
-               break;
-       default:
-               dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset);
-               break;
-       }
+       dev_info(&hdev->pdev->dev, "received reset event , reset type is %d",
+                handle->reset_level);
+
+       /* request reset & schedule reset task */
+       set_bit(handle->reset_level, &hdev->reset_request);
+       hclge_reset_task_schedule(hdev);
+
+       if (handle->reset_level < HNAE3_GLOBAL_RESET)
+               handle->reset_level++;
+
+       handle->last_reset_time = jiffies;
 }
 
 static void hclge_reset_subtask(struct hclge_dev *hdev)
@@ -2969,6 +3020,24 @@ static int hclge_get_vector_index(struct hclge_dev *hdev, int vector)
        return -EINVAL;
 }
 
+static int hclge_put_vector(struct hnae3_handle *handle, int vector)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+       int vector_id;
+
+       vector_id = hclge_get_vector_index(hdev, vector);
+       if (vector_id < 0) {
+               dev_err(&hdev->pdev->dev,
+                       "Get vector index fail. vector_id =%d\n", vector_id);
+               return vector_id;
+       }
+
+       hclge_free_vector(hdev, vector_id);
+
+       return 0;
+}
+
 static u32 hclge_get_rss_key_size(struct hnae3_handle *handle)
 {
        return HCLGE_RSS_KEY_SIZE;
@@ -2979,31 +3048,6 @@ static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
        return HCLGE_RSS_IND_TBL_SIZE;
 }
 
-static int hclge_get_rss_algo(struct hclge_dev *hdev)
-{
-       struct hclge_rss_config_cmd *req;
-       struct hclge_desc desc;
-       int rss_hash_algo;
-       int ret;
-
-       hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG, true);
-
-       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
-               dev_err(&hdev->pdev->dev,
-                       "Get link status error, status =%d\n", ret);
-               return ret;
-       }
-
-       req = (struct hclge_rss_config_cmd *)desc.data;
-       rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK);
-
-       if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ)
-               return ETH_RSS_HASH_TOP;
-
-       return -EINVAL;
-}
-
 static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
                                  const u8 hfunc, const u8 *key)
 {
@@ -3042,7 +3086,7 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
        return 0;
 }
 
-static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
+static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u8 *indir)
 {
        struct hclge_rss_indirection_table_cmd *req;
        struct hclge_desc desc;
@@ -3116,14 +3160,16 @@ static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
 
        req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-       req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-       req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-       req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
-       req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-       req->ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-       req->ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-       req->ipv6_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
-       req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+
+       /* Get the tuple cfg from pf */
+       req->ipv4_tcp_en = hdev->vport[0].rss_tuple_sets.ipv4_tcp_en;
+       req->ipv4_udp_en = hdev->vport[0].rss_tuple_sets.ipv4_udp_en;
+       req->ipv4_sctp_en = hdev->vport[0].rss_tuple_sets.ipv4_sctp_en;
+       req->ipv4_fragment_en = hdev->vport[0].rss_tuple_sets.ipv4_fragment_en;
+       req->ipv6_tcp_en = hdev->vport[0].rss_tuple_sets.ipv6_tcp_en;
+       req->ipv6_udp_en = hdev->vport[0].rss_tuple_sets.ipv6_udp_en;
+       req->ipv6_sctp_en = hdev->vport[0].rss_tuple_sets.ipv6_sctp_en;
+       req->ipv6_fragment_en = hdev->vport[0].rss_tuple_sets.ipv6_fragment_en;
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
        if (ret) {
                dev_err(&hdev->pdev->dev,
@@ -3138,12 +3184,11 @@ static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
                         u8 *key, u8 *hfunc)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
-       struct hclge_dev *hdev = vport->back;
        int i;
 
        /* Get hash algorithm */
        if (hfunc)
-               *hfunc = hclge_get_rss_algo(hdev);
+               *hfunc = vport->rss_algo;
 
        /* Get the RSS Key required by the user */
        if (key)
@@ -3167,8 +3212,6 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 
        /* Set the RSS Hash Key if specififed by the user */
        if (key) {
-               /* Update the shadow RSS key with user specified qids */
-               memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
 
                if (hfunc == ETH_RSS_HASH_TOP ||
                    hfunc == ETH_RSS_HASH_NO_CHANGE)
@@ -3178,6 +3221,10 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
                ret = hclge_set_rss_algo_key(hdev, hash_algo, key);
                if (ret)
                        return ret;
+
+               /* Update the shadow RSS key with user specified qids */
+               memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
+               vport->rss_algo = hash_algo;
        }
 
        /* Update the shadow RSS table with user specified qids */
@@ -3185,8 +3232,7 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
                vport->rss_indirection_tbl[i] = indir[i];
 
        /* Update the hardware */
-       ret = hclge_set_rss_indir_table(hdev, indir);
-       return ret;
+       return hclge_set_rss_indir_table(hdev, vport->rss_indirection_tbl);
 }
 
 static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc)
@@ -3229,15 +3275,16 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
                return -EINVAL;
 
        req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-       hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
-       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
-               dev_err(&hdev->pdev->dev,
-                       "Read rss tuple fail, status = %d\n", ret);
-               return ret;
-       }
+       hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
 
-       hclge_cmd_reuse_desc(&desc, false);
+       req->ipv4_tcp_en = vport->rss_tuple_sets.ipv4_tcp_en;
+       req->ipv4_udp_en = vport->rss_tuple_sets.ipv4_udp_en;
+       req->ipv4_sctp_en = vport->rss_tuple_sets.ipv4_sctp_en;
+       req->ipv4_fragment_en = vport->rss_tuple_sets.ipv4_fragment_en;
+       req->ipv6_tcp_en = vport->rss_tuple_sets.ipv6_tcp_en;
+       req->ipv6_udp_en = vport->rss_tuple_sets.ipv6_udp_en;
+       req->ipv6_sctp_en = vport->rss_tuple_sets.ipv6_sctp_en;
+       req->ipv6_fragment_en = vport->rss_tuple_sets.ipv6_fragment_en;
 
        tuple_sets = hclge_get_rss_hash_bits(nfc);
        switch (nfc->flow_type) {
@@ -3274,52 +3321,49 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
        }
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret)
+       if (ret) {
                dev_err(&hdev->pdev->dev,
                        "Set rss tuple fail, status = %d\n", ret);
+               return ret;
+       }
 
-       return ret;
+       vport->rss_tuple_sets.ipv4_tcp_en = req->ipv4_tcp_en;
+       vport->rss_tuple_sets.ipv4_udp_en = req->ipv4_udp_en;
+       vport->rss_tuple_sets.ipv4_sctp_en = req->ipv4_sctp_en;
+       vport->rss_tuple_sets.ipv4_fragment_en = req->ipv4_fragment_en;
+       vport->rss_tuple_sets.ipv6_tcp_en = req->ipv6_tcp_en;
+       vport->rss_tuple_sets.ipv6_udp_en = req->ipv6_udp_en;
+       vport->rss_tuple_sets.ipv6_sctp_en = req->ipv6_sctp_en;
+       vport->rss_tuple_sets.ipv6_fragment_en = req->ipv6_fragment_en;
+       return 0;
 }
 
 static int hclge_get_rss_tuple(struct hnae3_handle *handle,
                               struct ethtool_rxnfc *nfc)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
-       struct hclge_dev *hdev = vport->back;
-       struct hclge_rss_input_tuple_cmd *req;
-       struct hclge_desc desc;
        u8 tuple_sets;
-       int ret;
 
        nfc->data = 0;
 
-       req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-       hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
-       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
-               dev_err(&hdev->pdev->dev,
-                       "Read rss tuple fail, status = %d\n", ret);
-               return ret;
-       }
-
        switch (nfc->flow_type) {
        case TCP_V4_FLOW:
-               tuple_sets = req->ipv4_tcp_en;
+               tuple_sets = vport->rss_tuple_sets.ipv4_tcp_en;
                break;
        case UDP_V4_FLOW:
-               tuple_sets = req->ipv4_udp_en;
+               tuple_sets = vport->rss_tuple_sets.ipv4_udp_en;
                break;
        case TCP_V6_FLOW:
-               tuple_sets = req->ipv6_tcp_en;
+               tuple_sets = vport->rss_tuple_sets.ipv6_tcp_en;
                break;
        case UDP_V6_FLOW:
-               tuple_sets = req->ipv6_udp_en;
+               tuple_sets = vport->rss_tuple_sets.ipv6_udp_en;
                break;
        case SCTP_V4_FLOW:
-               tuple_sets = req->ipv4_sctp_en;
+               tuple_sets = vport->rss_tuple_sets.ipv4_sctp_en;
                break;
        case SCTP_V6_FLOW:
-               tuple_sets = req->ipv6_sctp_en;
+               tuple_sets = vport->rss_tuple_sets.ipv6_sctp_en;
                break;
        case IPV4_FLOW:
        case IPV6_FLOW:
@@ -3354,50 +3398,28 @@ static int hclge_get_tc_size(struct hnae3_handle *handle)
 
 int hclge_rss_init_hw(struct hclge_dev *hdev)
 {
-       const  u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
        struct hclge_vport *vport = hdev->vport;
+       u8 *rss_indir = vport[0].rss_indirection_tbl;
+       u16 rss_size = vport[0].alloc_rss_size;
+       u8 *key = vport[0].rss_hash_key;
+       u8 hfunc = vport[0].rss_algo;
        u16 tc_offset[HCLGE_MAX_TC_NUM];
-       u8 rss_key[HCLGE_RSS_KEY_SIZE];
        u16 tc_valid[HCLGE_MAX_TC_NUM];
        u16 tc_size[HCLGE_MAX_TC_NUM];
-       u32 *rss_indir = NULL;
-       u16 rss_size = 0, roundup_size;
-       const u8 *key;
-       int i, ret, j;
-
-       rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
-       if (!rss_indir)
-               return -ENOMEM;
-
-       /* Get default RSS key */
-       netdev_rss_key_fill(rss_key, HCLGE_RSS_KEY_SIZE);
-
-       /* Initialize RSS indirect table for each vport */
-       for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
-               for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
-                       vport[j].rss_indirection_tbl[i] =
-                               i % vport[j].alloc_rss_size;
-
-                       /* vport 0 is for PF */
-                       if (j != 0)
-                               continue;
+       u16 roundup_size;
+       int i, ret;
 
-                       rss_size = vport[j].alloc_rss_size;
-                       rss_indir[i] = vport[j].rss_indirection_tbl[i];
-               }
-       }
        ret = hclge_set_rss_indir_table(hdev, rss_indir);
        if (ret)
-               goto err;
+               return ret;
 
-       key = rss_key;
        ret = hclge_set_rss_algo_key(hdev, hfunc, key);
        if (ret)
-               goto err;
+               return ret;
 
        ret = hclge_set_rss_input_tuple(hdev);
        if (ret)
-               goto err;
+               return ret;
 
        /* Each TC have the same queue size, and tc_size set to hardware is
         * the log2 of roundup power of two of rss_size, the acutal queue
@@ -3407,8 +3429,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
                dev_err(&hdev->pdev->dev,
                        "Configure rss tc size failed, invalid TC_SIZE = %d\n",
                        rss_size);
-               ret = -EINVAL;
-               goto err;
+               return -EINVAL;
        }
 
        roundup_size = roundup_pow_of_two(rss_size);
@@ -3425,12 +3446,50 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
                tc_offset[i] = rss_size * i;
        }
 
-       ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+       return hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+}
 
-err:
-       kfree(rss_indir);
+void hclge_rss_indir_init_cfg(struct hclge_dev *hdev)
+{
+       struct hclge_vport *vport = hdev->vport;
+       int i, j;
 
-       return ret;
+       for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
+               for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+                       vport[j].rss_indirection_tbl[i] =
+                               i % vport[j].alloc_rss_size;
+       }
+}
+
+static void hclge_rss_init_cfg(struct hclge_dev *hdev)
+{
+       struct hclge_vport *vport = hdev->vport;
+       int i;
+
+       netdev_rss_key_fill(vport->rss_hash_key, HCLGE_RSS_KEY_SIZE);
+
+       for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+               vport[i].rss_tuple_sets.ipv4_tcp_en =
+                       HCLGE_RSS_INPUT_TUPLE_OTHER;
+               vport[i].rss_tuple_sets.ipv4_udp_en =
+                       HCLGE_RSS_INPUT_TUPLE_OTHER;
+               vport[i].rss_tuple_sets.ipv4_sctp_en =
+                       HCLGE_RSS_INPUT_TUPLE_SCTP;
+               vport[i].rss_tuple_sets.ipv4_fragment_en =
+                       HCLGE_RSS_INPUT_TUPLE_OTHER;
+               vport[i].rss_tuple_sets.ipv6_tcp_en =
+                       HCLGE_RSS_INPUT_TUPLE_OTHER;
+               vport[i].rss_tuple_sets.ipv6_udp_en =
+                       HCLGE_RSS_INPUT_TUPLE_OTHER;
+               vport[i].rss_tuple_sets.ipv6_sctp_en =
+                       HCLGE_RSS_INPUT_TUPLE_SCTP;
+               vport[i].rss_tuple_sets.ipv6_fragment_en =
+                       HCLGE_RSS_INPUT_TUPLE_OTHER;
+
+               vport[i].rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+       }
+
+       hclge_rss_indir_init_cfg(hdev);
 }
 
 int hclge_bind_ring_with_vector(struct hclge_vport *vport,
@@ -3533,18 +3592,13 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
        }
 
        ret = hclge_bind_ring_with_vector(vport, vector_id, false, ring_chain);
-       if (ret) {
+       if (ret)
                dev_err(&handle->pdev->dev,
                        "Unmap ring from vector fail. vectorid=%d, ret =%d\n",
                        vector_id,
                        ret);
-               return ret;
-       }
-
-       /* Free this MSIX or MSI vector */
-       hclge_free_vector(hdev, vector_id);
 
-       return 0;
+       return ret;
 }
 
 int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
@@ -3717,20 +3771,11 @@ static int hclge_ae_start(struct hnae3_handle *handle)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_dev *hdev = vport->back;
-       int i, queue_id, ret;
+       int i, ret;
 
-       for (i = 0; i < vport->alloc_tqps; i++) {
-               /* todo clear interrupt */
-               /* ring enable */
-               queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
-               if (queue_id < 0) {
-                       dev_warn(&hdev->pdev->dev,
-                                "Get invalid queue id, ignore it\n");
-                       continue;
-               }
+       for (i = 0; i < vport->alloc_tqps; i++)
+               hclge_tqp_enable(hdev, i, 0, true);
 
-               hclge_tqp_enable(hdev, queue_id, 0, true);
-       }
        /* mac enable */
        hclge_cfg_mac_mode(hdev, true);
        clear_bit(HCLGE_STATE_DOWN, &hdev->state);
@@ -3750,19 +3795,11 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_dev *hdev = vport->back;
-       int i, queue_id;
+       int i;
 
-       for (i = 0; i < vport->alloc_tqps; i++) {
-               /* Ring disable */
-               queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
-               if (queue_id < 0) {
-                       dev_warn(&hdev->pdev->dev,
-                                "Get invalid queue id, ignore it\n");
-                       continue;
-               }
+       for (i = 0; i < vport->alloc_tqps; i++)
+               hclge_tqp_enable(hdev, i, 0, false);
 
-               hclge_tqp_enable(hdev, queue_id, 0, false);
-       }
        /* Mac disable */
        hclge_cfg_mac_mode(hdev, false);
 
@@ -3770,6 +3807,9 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
        /* reset tqp stats */
        hclge_reset_tqp_stats(handle);
+       del_timer_sync(&hdev->service_timer);
+       cancel_work_sync(&hdev->service_task);
+       hclge_update_link_status(hdev);
 }
 
 static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
@@ -3790,11 +3830,11 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
                if ((!resp_code) || (resp_code == 1)) {
                        return_status = 0;
                } else if (resp_code == 2) {
-                       return_status = -EIO;
+                       return_status = -ENOSPC;
                        dev_err(&hdev->pdev->dev,
                                "add mac addr failed for uc_overflow.\n");
                } else if (resp_code == 3) {
-                       return_status = -EIO;
+                       return_status = -ENOSPC;
                        dev_err(&hdev->pdev->dev,
                                "add mac addr failed for mc_overflow.\n");
                } else {
@@ -3806,7 +3846,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
                if (!resp_code) {
                        return_status = 0;
                } else if (resp_code == 1) {
-                       return_status = -EIO;
+                       return_status = -ENOENT;
                        dev_dbg(&hdev->pdev->dev,
                                "remove mac addr failed for miss.\n");
                } else {
@@ -3818,7 +3858,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
                if (!resp_code) {
                        return_status = 0;
                } else if (resp_code == 1) {
-                       return_status = -EIO;
+                       return_status = -ENOENT;
                        dev_dbg(&hdev->pdev->dev,
                                "lookup mac addr failed for miss.\n");
                } else {
@@ -3827,7 +3867,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
                                resp_code);
                }
        } else {
-               return_status = -EIO;
+               return_status = -EINVAL;
                dev_err(&hdev->pdev->dev,
                        "unknown opcode for get_mac_vlan_cmd_status,opcode=%d.\n",
                        op);
@@ -4118,8 +4158,9 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 {
        struct hclge_dev *hdev = vport->back;
        struct hclge_mac_vlan_tbl_entry_cmd req;
-       enum hclge_cmd_status status;
+       struct hclge_desc desc;
        u16 egress_port = 0;
+       int ret;
 
        /* mac addr check */
        if (is_zero_ether_addr(addr) ||
@@ -4151,9 +4192,23 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 
        hclge_prepare_mac_addr(&req, addr);
 
-       status = hclge_add_mac_vlan_tbl(vport, &req, NULL);
+       /* Lookup the mac address in the mac_vlan table, and add
+        * it if the entry is inexistent. Repeated unicast entry
+        * is not allowed in the mac vlan table.
+        */
+       ret = hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false);
+       if (ret == -ENOENT)
+               return hclge_add_mac_vlan_tbl(vport, &req, NULL);
+
+       /* check if we just hit the duplicate */
+       if (!ret)
+               ret = -EINVAL;
 
-       return status;
+       dev_err(&hdev->pdev->dev,
+               "PF failed to add unicast entry(%pM) in the MAC table\n",
+               addr);
+
+       return ret;
 }
 
 static int hclge_rm_uc_addr(struct hnae3_handle *handle,
@@ -4169,7 +4224,7 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 {
        struct hclge_dev *hdev = vport->back;
        struct hclge_mac_vlan_tbl_entry_cmd req;
-       enum hclge_cmd_status status;
+       int ret;
 
        /* mac addr check */
        if (is_zero_ether_addr(addr) ||
@@ -4185,9 +4240,9 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
        hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
        hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
        hclge_prepare_mac_addr(&req, addr);
-       status = hclge_remove_mac_vlan_tbl(vport, &req);
+       ret = hclge_remove_mac_vlan_tbl(vport, &req);
 
-       return status;
+       return ret;
 }
 
 static int hclge_add_mc_addr(struct hnae3_handle *handle,
@@ -4392,7 +4447,8 @@ static void hclge_get_mac_addr(struct hnae3_handle *handle, u8 *p)
        ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
-static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
+static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
+                             bool is_first)
 {
        const unsigned char *new_addr = (const unsigned char *)p;
        struct hclge_vport *vport = hclge_get_vport(handle);
@@ -4409,11 +4465,9 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
                return -EINVAL;
        }
 
-       ret = hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr);
-       if (ret)
+       if (!is_first && hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr))
                dev_warn(&hdev->pdev->dev,
-                        "remove old uc mac address fail, ret =%d.\n",
-                        ret);
+                        "remove old uc mac address fail.\n");
 
        ret = hclge_add_uc_addr(handle, new_addr);
        if (ret) {
@@ -4421,17 +4475,15 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
                        "add uc mac address fail, ret =%d.\n",
                        ret);
 
-               ret = hclge_add_uc_addr(handle, hdev->hw.mac.mac_addr);
-               if (ret) {
+               if (!is_first &&
+                   hclge_add_uc_addr(handle, hdev->hw.mac.mac_addr))
                        dev_err(&hdev->pdev->dev,
-                               "restore uc mac address fail, ret =%d.\n",
-                               ret);
-               }
+                               "restore uc mac address fail.\n");
 
                return -EIO;
        }
 
-       ret = hclge_mac_pause_addr_cfg(hdev, new_addr);
+       ret = hclge_pause_addr_cfg(hdev, new_addr);
        if (ret) {
                dev_err(&hdev->pdev->dev,
                        "configure mac pause address fail, ret =%d.\n",
@@ -4771,11 +4823,9 @@ static int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
        return hclge_set_vlan_rx_offload_cfg(vport);
 }
 
-static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mtu)
 {
-       struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_config_max_frm_size_cmd *req;
-       struct hclge_dev *hdev = vport->back;
        struct hclge_desc desc;
        int max_frm_size;
        int ret;
@@ -4804,6 +4854,27 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
        return 0;
 }
 
+static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+       int ret;
+
+       ret = hclge_set_mac_mtu(hdev, new_mtu);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "Change mtu fail, ret =%d\n", ret);
+               return ret;
+       }
+
+       ret = hclge_buffer_alloc(hdev);
+       if (ret)
+               dev_err(&hdev->pdev->dev,
+                       "Allocate buffer fail, ret =%d\n", ret);
+
+       return ret;
+}
+
 static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
                                    bool enable)
 {
@@ -4848,21 +4919,36 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
        return hnae_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
 }
 
+static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
+                                         u16 queue_id)
+{
+       struct hnae3_queue *queue;
+       struct hclge_tqp *tqp;
+
+       queue = handle->kinfo.tqp[queue_id];
+       tqp = container_of(queue, struct hclge_tqp, q);
+
+       return tqp->index;
+}
+
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_dev *hdev = vport->back;
        int reset_try_times = 0;
        int reset_status;
+       u16 queue_gid;
        int ret;
 
+       queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
+
        ret = hclge_tqp_enable(hdev, queue_id, 0, false);
        if (ret) {
                dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
                return;
        }
 
-       ret = hclge_send_reset_tqp_cmd(hdev, queue_id, true);
+       ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
        if (ret) {
                dev_warn(&hdev->pdev->dev,
                         "Send reset tqp cmd fail, ret = %d\n", ret);
@@ -4873,7 +4959,7 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
        while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
                /* Wait for tqp hw reset */
                msleep(20);
-               reset_status = hclge_get_reset_status(hdev, queue_id);
+               reset_status = hclge_get_reset_status(hdev, queue_gid);
                if (reset_status)
                        break;
        }
@@ -4883,7 +4969,7 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
                return;
        }
 
-       ret = hclge_send_reset_tqp_cmd(hdev, queue_id, false);
+       ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
        if (ret) {
                dev_warn(&hdev->pdev->dev,
                         "Deassert the soft reset fail, ret = %d\n", ret);
@@ -4891,6 +4977,43 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
        }
 }
 
+void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
+{
+       struct hclge_dev *hdev = vport->back;
+       int reset_try_times = 0;
+       int reset_status;
+       u16 queue_gid;
+       int ret;
+
+       queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id);
+
+       ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
+       if (ret) {
+               dev_warn(&hdev->pdev->dev,
+                        "Send reset tqp cmd fail, ret = %d\n", ret);
+               return;
+       }
+
+       reset_try_times = 0;
+       while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
+               /* Wait for tqp hw reset */
+               msleep(20);
+               reset_status = hclge_get_reset_status(hdev, queue_gid);
+               if (reset_status)
+                       break;
+       }
+
+       if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
+               dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
+               return;
+       }
+
+       ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
+       if (ret)
+               dev_warn(&hdev->pdev->dev,
+                        "Deassert the soft reset fail, ret = %d\n", ret);
+}
+
 static u32 hclge_get_fw_version(struct hnae3_handle *handle)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
@@ -5376,11 +5499,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
                dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
                return ret;
        }
-       ret = hclge_buffer_alloc(hdev);
-       if (ret) {
-               dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
-               return  ret;
-       }
 
        ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
        if (ret) {
@@ -5400,6 +5518,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
                return ret;
        }
 
+       hclge_rss_init_cfg(hdev);
        ret = hclge_rss_init_hw(hdev);
        if (ret) {
                dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
@@ -5486,12 +5605,6 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
                return ret;
        }
 
-       ret = hclge_buffer_alloc(hdev);
-       if (ret) {
-               dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
-               return ret;
-       }
-
        ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
        if (ret) {
                dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
@@ -5504,9 +5617,9 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
                return ret;
        }
 
-       ret = hclge_tm_schd_init(hdev);
+       ret = hclge_tm_init_hw(hdev);
        if (ret) {
-               dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret);
+               dev_err(&pdev->dev, "tm init hw fail, ret =%d\n", ret);
                return ret;
        }
 
@@ -5997,6 +6110,42 @@ static int hclge_update_led_status(struct hclge_dev *hdev)
                                        HCLGE_LED_NO_CHANGE);
 }
 
+static void hclge_get_link_mode(struct hnae3_handle *handle,
+                               unsigned long *supported,
+                               unsigned long *advertising)
+{
+       unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+       unsigned int idx = 0;
+
+       for (; idx < size; idx++) {
+               supported[idx] = hdev->hw.mac.supported[idx];
+               advertising[idx] = hdev->hw.mac.advertising[idx];
+       }
+}
+
+static void hclge_get_port_type(struct hnae3_handle *handle,
+                               u8 *port_type)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+       u8 media_type = hdev->hw.mac.media_type;
+
+       switch (media_type) {
+       case HNAE3_MEDIA_TYPE_FIBER:
+               *port_type = PORT_FIBRE;
+               break;
+       case HNAE3_MEDIA_TYPE_COPPER:
+               *port_type = PORT_TP;
+               break;
+       case HNAE3_MEDIA_TYPE_UNKNOWN:
+       default:
+               *port_type = PORT_OTHER;
+               break;
+       }
+}
+
 static const struct hnae3_ae_ops hclge_ops = {
        .init_ae_dev = hclge_init_ae_dev,
        .uninit_ae_dev = hclge_uninit_ae_dev,
@@ -6005,6 +6154,7 @@ static const struct hnae3_ae_ops hclge_ops = {
        .map_ring_to_vector = hclge_map_ring_to_vector,
        .unmap_ring_from_vector = hclge_unmap_ring_frm_vector,
        .get_vector = hclge_get_vector,
+       .put_vector = hclge_put_vector,
        .set_promisc_mode = hclge_set_promisc_mode,
        .set_loopback = hclge_set_loopback,
        .start = hclge_ae_start,
@@ -6051,6 +6201,8 @@ static const struct hnae3_ae_ops hclge_ops = {
        .get_regs_len = hclge_get_regs_len,
        .get_regs = hclge_get_regs,
        .set_led_id = hclge_set_led_id,
+       .get_link_mode = hclge_get_link_mode,
+       .get_port_type = hclge_get_port_type,
 };
 
 static struct hnae3_ae_algo ae_algo = {
index d99a76a9557cdfb90cdf988546389477e1374a48..0f4157e7128215467e22b04654046182d8387425 100644 (file)
 #define HCLGE_MAC_MIN_FRAME            64
 #define HCLGE_MAC_MAX_FRAME            9728
 
+#define HCLGE_SUPPORT_1G_BIT           BIT(0)
+#define HCLGE_SUPPORT_10G_BIT          BIT(1)
+#define HCLGE_SUPPORT_25G_BIT          BIT(2)
+#define HCLGE_SUPPORT_50G_BIT          BIT(3)
+#define HCLGE_SUPPORT_100G_BIT         BIT(4)
+
 enum HCLGE_DEV_STATE {
        HCLGE_STATE_REINITING,
        HCLGE_STATE_DOWN,
@@ -170,6 +176,8 @@ struct hclge_mac {
        struct phy_device *phydev;
        struct mii_bus *mdio_bus;
        phy_interface_t phy_if;
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
 };
 
 struct hclge_hw {
@@ -236,6 +244,7 @@ struct hclge_cfg {
        u8 mac_addr[ETH_ALEN];
        u8 default_speed;
        u32 numa_node_map;
+       u8 speed_ability;
 };
 
 struct hclge_tm_info {
@@ -573,12 +582,27 @@ struct hclge_rx_vtag_cfg {
        bool vlan2_vlan_prionly;/* Outer VLAN Tag up to descriptor Enable */
 };
 
+struct hclge_rss_tuple_cfg {
+       u8 ipv4_tcp_en;
+       u8 ipv4_udp_en;
+       u8 ipv4_sctp_en;
+       u8 ipv4_fragment_en;
+       u8 ipv6_tcp_en;
+       u8 ipv6_udp_en;
+       u8 ipv6_sctp_en;
+       u8 ipv6_fragment_en;
+};
+
 struct hclge_vport {
        u16 alloc_tqps; /* Allocated Tx/Rx queues */
 
        u8  rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */
        /* User configured lookup table entries */
        u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
+       int rss_algo;           /* User configured hash algorithm */
+       /* User configured rss tuple sets */
+       struct hclge_rss_tuple_cfg rss_tuple_sets;
+
        u16 alloc_rss_size;
 
        u16 qs_offset;
@@ -627,8 +651,11 @@ int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
 
 int hclge_buffer_alloc(struct hclge_dev *hdev);
 int hclge_rss_init_hw(struct hclge_dev *hdev);
+void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
+void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
 int hclge_cfg_flowctrl(struct hclge_dev *hdev);
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
 #endif
index f38fc5ce9f5120f2897a9d5b707a75cb7d166d3d..a6f7ffa9c25975df834dae0e777d369efc29ccd8 100644 (file)
@@ -79,6 +79,18 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
        return status;
 }
 
+static int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
+{
+       u8 msg_data[2];
+       u8 dest_vfid;
+
+       dest_vfid = (u8)vport->vport_id;
+
+       /* send this requested info to VF */
+       return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+                                 HCLGE_MBX_ASSERTING_RESET, dest_vfid);
+}
+
 static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head)
 {
        struct hnae3_ring_chain_node *chain_tmp, *chain;
@@ -105,14 +117,17 @@ static int hclge_get_ring_chain_from_mbx(
                        struct hnae3_ring_chain_node *ring_chain,
                        struct hclge_vport *vport)
 {
-#define HCLGE_RING_NODE_VARIABLE_NUM           3
-#define HCLGE_RING_MAP_MBX_BASIC_MSG_NUM       3
        struct hnae3_ring_chain_node *cur_chain, *new_chain;
        int ring_num;
        int i;
 
        ring_num = req->msg[2];
 
+       if (ring_num > ((HCLGE_MBX_VF_MSG_DATA_NUM -
+               HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) /
+               HCLGE_MBX_RING_NODE_VARIABLE_NUM))
+               return -ENOMEM;
+
        hnae_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]);
        ring_chain->tqp_index =
                        hclge_get_queue_id(vport->nic.kinfo.tqp[req->msg[4]]);
@@ -128,18 +143,18 @@ static int hclge_get_ring_chain_from_mbx(
                        goto err;
 
                hnae_set_bit(new_chain->flag, HNAE3_RING_TYPE_B,
-                            req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-                            HCLGE_RING_MAP_MBX_BASIC_MSG_NUM]);
+                            req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+                            HCLGE_MBX_RING_MAP_BASIC_MSG_NUM]);
 
                new_chain->tqp_index =
                hclge_get_queue_id(vport->nic.kinfo.tqp
-                       [req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-                       HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 1]]);
+                       [req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+                       HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 1]]);
 
                hnae_set_field(new_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
                               HCLGE_INT_GL_IDX_S,
-                              req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-                              HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 2]);
+                              req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+                              HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 2]);
 
                cur_chain->next = new_chain;
                cur_chain = new_chain;
@@ -196,6 +211,8 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
 
                hclge_rm_uc_addr_common(vport, old_addr);
                status = hclge_add_uc_addr_common(vport, mac_addr);
+               if (status)
+                       hclge_add_uc_addr_common(vport, old_addr);
        } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_ADD) {
                status = hclge_add_uc_addr_common(vport, mac_addr);
        } else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_REMOVE) {
@@ -291,7 +308,7 @@ static int hclge_get_vf_queue_info(struct hclge_vport *vport,
 
        /* get the queue related info */
        memcpy(&resp_data[0], &vport->alloc_tqps, sizeof(u16));
-       memcpy(&resp_data[2], &hdev->rss_size_max, sizeof(u16));
+       memcpy(&resp_data[2], &vport->nic.kinfo.rss_size, sizeof(u16));
        memcpy(&resp_data[4], &hdev->num_desc, sizeof(u16));
        memcpy(&resp_data[6], &hdev->rx_buf_len, sizeof(u16));
 
@@ -304,27 +321,61 @@ static int hclge_get_link_info(struct hclge_vport *vport,
 {
        struct hclge_dev *hdev = vport->back;
        u16 link_status;
-       u8 msg_data[2];
+       u8 msg_data[8];
        u8 dest_vfid;
+       u16 duplex;
 
        /* mac.link can only be 0 or 1 */
        link_status = (u16)hdev->hw.mac.link;
+       duplex = hdev->hw.mac.duplex;
        memcpy(&msg_data[0], &link_status, sizeof(u16));
+       memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32));
+       memcpy(&msg_data[6], &duplex, sizeof(u16));
        dest_vfid = mbx_req->mbx_src_vfid;
 
        /* send this requested info to VF */
-       return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+       return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
                                  HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid);
 }
 
-static void hclge_reset_vf_queue(struct hclge_vport *vport,
-                                struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport,
+                                    struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
        u16 queue_id;
 
        memcpy(&queue_id, &mbx_req->msg[2], sizeof(queue_id));
 
-       hclge_reset_tqp(&vport->nic, queue_id);
+       hclge_reset_vf_queue(vport, queue_id);
+
+       /* send response msg to VF after queue reset complete*/
+       hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0);
+}
+
+static void hclge_reset_vf(struct hclge_vport *vport,
+                          struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+       struct hclge_dev *hdev = vport->back;
+       int ret;
+
+       dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!",
+                mbx_req->mbx_src_vfid);
+
+       /* Acknowledge VF that PF is now about to assert the reset for the VF.
+        * On receiving this message VF will get into pending state and will
+        * start polling for the hardware reset completion status.
+        */
+       ret = hclge_inform_reset_assert_to_vf(vport);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "PF fail(%d) to inform VF(%d)of reset, reset failed!\n",
+                       ret, vport->vport_id);
+               return;
+       }
+
+       dev_warn(&hdev->pdev->dev, "PF is now resetting VF %d.\n",
+                mbx_req->mbx_src_vfid);
+       /* reset this virtual function */
+       hclge_func_reset_cmd(hdev, mbx_req->mbx_src_vfid);
 }
 
 void hclge_mbx_handler(struct hclge_dev *hdev)
@@ -333,11 +384,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
        struct hclge_mbx_vf_to_pf_cmd *req;
        struct hclge_vport *vport;
        struct hclge_desc *desc;
-       int ret;
+       int ret, flag;
 
+       flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
        /* handle all the mailbox requests in the queue */
-       while (hnae_get_bit(crq->desc[crq->next_to_use].flag,
-                           HCLGE_CMDQ_RX_OUTVLD_B)) {
+       while (hnae_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B)) {
                desc = &crq->desc[crq->next_to_use];
                req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
 
@@ -360,7 +411,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
                                        ret);
                        break;
                case HCLGE_MBX_SET_UNICAST:
-                       ret = hclge_set_vf_uc_mac_addr(vport, req, false);
+                       ret = hclge_set_vf_uc_mac_addr(vport, req, true);
                        if (ret)
                                dev_err(&hdev->pdev->dev,
                                        "PF fail(%d) to set VF UC MAC Addr\n",
@@ -402,7 +453,10 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
                                        ret);
                        break;
                case HCLGE_MBX_QUEUE_RESET:
-                       hclge_reset_vf_queue(vport, req);
+                       hclge_mbx_reset_vf_queue(vport, req);
+                       break;
+               case HCLGE_MBX_RESET:
+                       hclge_reset_vf(vport, req);
                        break;
                default:
                        dev_err(&hdev->pdev->dev,
@@ -410,7 +464,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
                                req->msg[0]);
                        break;
                }
+               crq->desc[crq->next_to_use].flag = 0;
                hclge_mbx_ring_ptr_move_crq(crq);
+               flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
        }
 
        /* Write back CMDQ_RQ header pointer, M7 need this pointer */
index 36bd79a7794041988ece651c9a56f4dbc6458b86..885f25cd7be49fa7669341d629abcef706fe9ffc 100644 (file)
@@ -23,6 +23,9 @@ enum hclge_shaper_level {
        HCLGE_SHAPER_LVL_PF     = 1,
 };
 
+#define HCLGE_TM_PFC_PKT_GET_CMD_NUM   3
+#define HCLGE_TM_PFC_NUM_GET_PER_CMD   3
+
 #define HCLGE_SHAPER_BS_U_DEF  5
 #define HCLGE_SHAPER_BS_S_DEF  20
 
@@ -112,6 +115,56 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
        return 0;
 }
 
+static int hclge_pfc_stats_get(struct hclge_dev *hdev,
+                              enum hclge_opcode_type opcode, u64 *stats)
+{
+       struct hclge_desc desc[HCLGE_TM_PFC_PKT_GET_CMD_NUM];
+       int ret, i, j;
+
+       if (!(opcode == HCLGE_OPC_QUERY_PFC_RX_PKT_CNT ||
+             opcode == HCLGE_OPC_QUERY_PFC_TX_PKT_CNT))
+               return -EINVAL;
+
+       for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+               hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
+               if (i != (HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1))
+                       desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+               else
+                       desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+       }
+
+       ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "Get pfc pause stats fail, ret = %d.\n", ret);
+               return ret;
+       }
+
+       for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+               struct hclge_pfc_stats_cmd *pfc_stats =
+                               (struct hclge_pfc_stats_cmd *)desc[i].data;
+
+               for (j = 0; j < HCLGE_TM_PFC_NUM_GET_PER_CMD; j++) {
+                       u32 index = i * HCLGE_TM_PFC_PKT_GET_CMD_NUM + j;
+
+                       if (index < HCLGE_MAX_TC_NUM)
+                               stats[index] =
+                                       le64_to_cpu(pfc_stats->pkt_num[j]);
+               }
+       }
+       return 0;
+}
+
+int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats)
+{
+       return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_RX_PKT_CNT, stats);
+}
+
+int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats)
+{
+       return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_TX_PKT_CNT, stats);
+}
+
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
 {
        struct hclge_desc desc;
@@ -138,8 +191,8 @@ static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
        return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-static int hclge_mac_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
-                                    u8 pause_trans_gap, u16 pause_trans_time)
+static int hclge_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
+                                u8 pause_trans_gap, u16 pause_trans_time)
 {
        struct hclge_cfg_pause_param_cmd *pause_param;
        struct hclge_desc desc;
@@ -155,7 +208,7 @@ static int hclge_mac_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
        return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
+int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
 {
        struct hclge_cfg_pause_param_cmd *pause_param;
        struct hclge_desc desc;
@@ -174,7 +227,7 @@ int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
        trans_gap = pause_param->pause_trans_gap;
        trans_time = le16_to_cpu(pause_param->pause_trans_time);
 
-       return hclge_mac_pause_param_cfg(hdev, mac_addr, trans_gap,
+       return hclge_pause_param_cfg(hdev, mac_addr, trans_gap,
                                         trans_time);
 }
 
@@ -1096,11 +1149,11 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
        return hclge_tm_schd_mode_hw(hdev);
 }
 
-static int hclge_mac_pause_param_setup_hw(struct hclge_dev *hdev)
+static int hclge_pause_param_setup_hw(struct hclge_dev *hdev)
 {
        struct hclge_mac *mac = &hdev->hw.mac;
 
-       return hclge_mac_pause_param_cfg(hdev, mac->mac_addr,
+       return hclge_pause_param_cfg(hdev, mac->mac_addr,
                                         HCLGE_DEFAULT_PAUSE_TRANS_GAP,
                                         HCLGE_DEFAULT_PAUSE_TRANS_TIME);
 }
@@ -1151,13 +1204,12 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
        int ret;
        u8 i;
 
-       if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
-               ret = hclge_mac_pause_setup_hw(hdev);
-               if (ret)
-                       return ret;
+       ret = hclge_pause_param_setup_hw(hdev);
+       if (ret)
+               return ret;
 
-               return hclge_mac_pause_param_setup_hw(hdev);
-       }
+       if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
+               return hclge_mac_pause_setup_hw(hdev);
 
        /* Only DCB-supported dev supports qset back pressure and pfc cmd */
        if (!hnae3_dev_dcb_supported(hdev))
index 5401e75594376200008e9b995c7bbbc9b86ff450..2dbe177581e982c1ecf3e115a6cf9d078f680df3 100644 (file)
@@ -109,6 +109,10 @@ struct hclge_cfg_pause_param_cmd {
        __le16 pause_trans_time;
 };
 
+struct hclge_pfc_stats_cmd {
+       __le64 pkt_num[3];
+};
+
 struct hclge_port_shapping_cmd {
        __le32 port_shapping_para;
 };
@@ -129,5 +133,7 @@ int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
 int hclge_tm_map_cfg(struct hclge_dev *hdev);
 int hclge_tm_init_hw(struct hclge_dev *hdev);
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
-int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
+int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
 #endif
index 85985e731311fa4e63823aacbfbd2d1e221bcd99..1bbfe131b596e498c8c12dff3339b5e48bec6acd 100644 (file)
@@ -315,6 +315,12 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
                goto err_csq;
        }
 
+       /* initialize the pointers of async rx queue of mailbox */
+       hdev->arq.hdev = hdev;
+       hdev->arq.head = 0;
+       hdev->arq.tail = 0;
+       hdev->arq.count = 0;
+
        /* get firmware version */
        ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version);
        if (ret) {
index 2caca9317f8c35d945db09b44f0e78af5e701040..621c6cbacf767c983a4a26efc7f2871c747cfa64 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/types.h>
 #include "hnae3.h"
 
-#define HCLGEVF_CMDQ_TX_TIMEOUT                200
+#define HCLGEVF_CMDQ_TX_TIMEOUT                30000
 #define HCLGEVF_CMDQ_RX_INVLD_B                0
 #define HCLGEVF_CMDQ_RX_OUTVLD_B       1
 
index 0d89965f79288710da4b73ee09bd431925a2ef40..2b8426412cc9ae504b24423601a539c8502f2573 100644 (file)
@@ -2,6 +2,7 @@
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/etherdevice.h>
+#include <net/rtnetlink.h>
 #include "hclgevf_cmd.h"
 #include "hclgevf_main.h"
 #include "hclge_mbx.h"
@@ -9,6 +10,8 @@
 
 #define HCLGEVF_NAME   "hclgevf"
 
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev);
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev);
 static struct hnae3_ae_algo ae_algovf;
 
 static const struct pci_device_id ae_algovf_pci_tbl[] = {
@@ -18,6 +21,8 @@ static const struct pci_device_id ae_algovf_pci_tbl[] = {
        {0, }
 };
 
+MODULE_DEVICE_TABLE(pci, ae_algovf_pci_tbl);
+
 static inline struct hclgevf_dev *hclgevf_ae_get_hdev(
        struct hnae3_handle *handle)
 {
@@ -206,6 +211,12 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
        struct hclgevf_tqp *tqp;
        int i;
 
+       /* if this is on going reset then we need to re-allocate the TPQs
+        * since we cannot assume we would get same number of TPQs back from PF
+        */
+       if (hclgevf_dev_ongoing_reset(hdev))
+               devm_kfree(&hdev->pdev->dev, hdev->htqp);
+
        hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
                                  sizeof(struct hclgevf_tqp), GFP_KERNEL);
        if (!hdev->htqp)
@@ -249,6 +260,12 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
        new_tqps = kinfo->rss_size * kinfo->num_tc;
        kinfo->num_tqps = min(new_tqps, hdev->num_tqps);
 
+       /* if this is on going reset then we need to re-allocate the hnae queues
+        * as well since number of TPQs from PF might have changed.
+        */
+       if (hclgevf_dev_ongoing_reset(hdev))
+               devm_kfree(&hdev->pdev->dev, kinfo->tqp);
+
        kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
                                  sizeof(struct hnae3_queue *), GFP_KERNEL);
        if (!kinfo->tqp)
@@ -533,13 +550,11 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
                                       int vector,
                                       struct hnae3_ring_chain_node *ring_chain)
 {
-#define HCLGEVF_RING_NODE_VARIABLE_NUM         3
-#define HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM     3
        struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
        struct hnae3_ring_chain_node *node;
        struct hclge_mbx_vf_to_pf_cmd *req;
        struct hclgevf_desc desc;
-       int i, vector_id;
+       int i = 0, vector_id;
        int status;
        u8 type;
 
@@ -551,28 +566,33 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
                return vector_id;
        }
 
-       hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
-       type = en ?
-               HCLGE_MBX_MAP_RING_TO_VECTOR : HCLGE_MBX_UNMAP_RING_TO_VECTOR;
-       req->msg[0] = type;
-       req->msg[1] = vector_id; /* vector_id should be id in VF */
-
-       i = 0;
        for (node = ring_chain; node; node = node->next) {
-               i++;
-               /* msg[2] is cause num */
-               req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i] =
+               int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM +
+                                       HCLGE_MBX_RING_NODE_VARIABLE_NUM * i;
+
+               if (i == 0) {
+                       hclgevf_cmd_setup_basic_desc(&desc,
+                                                    HCLGEVF_OPC_MBX_VF_TO_PF,
+                                                    false);
+                       type = en ?
+                               HCLGE_MBX_MAP_RING_TO_VECTOR :
+                               HCLGE_MBX_UNMAP_RING_TO_VECTOR;
+                       req->msg[0] = type;
+                       req->msg[1] = vector_id;
+               }
+
+               req->msg[idx_offset] =
                                hnae_get_bit(node->flag, HNAE3_RING_TYPE_B);
-               req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 1] =
-                               node->tqp_index;
-               req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 2] =
-                               hnae_get_field(node->int_gl_idx,
-                                              HNAE3_RING_GL_IDX_M,
-                                              HNAE3_RING_GL_IDX_S);
-
-               if (i == (HCLGE_MBX_VF_MSG_DATA_NUM -
-                   HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM) /
-                   HCLGEVF_RING_NODE_VARIABLE_NUM) {
+               req->msg[idx_offset + 1] = node->tqp_index;
+               req->msg[idx_offset + 2] = hnae_get_field(node->int_gl_idx,
+                                                         HNAE3_RING_GL_IDX_M,
+                                                         HNAE3_RING_GL_IDX_S);
+
+               i++;
+               if ((i == (HCLGE_MBX_VF_MSG_DATA_NUM -
+                    HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) /
+                    HCLGE_MBX_RING_NODE_VARIABLE_NUM) ||
+                   !node->next) {
                        req->msg[2] = i;
 
                        status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
@@ -591,17 +611,6 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
                }
        }
 
-       if (i > 0) {
-               req->msg[2] = i;
-
-               status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
-               if (status) {
-                       dev_err(&hdev->pdev->dev,
-                               "Map TQP fail, status is %d.\n", status);
-                       return status;
-               }
-       }
-
        return 0;
 }
 
@@ -627,13 +636,18 @@ static int hclgevf_unmap_ring_from_vector(
        }
 
        ret = hclgevf_bind_ring_to_vector(handle, false, vector, ring_chain);
-       if (ret) {
+       if (ret)
                dev_err(&handle->pdev->dev,
                        "Unmap ring from vector fail. vector=%d, ret =%d\n",
                        vector_id,
                        ret);
-               return ret;
-       }
+
+       return ret;
+}
+
+static int hclgevf_put_vector(struct hnae3_handle *handle, int vector)
+{
+       struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
        hclgevf_free_vector(hdev, vector);
 
@@ -729,21 +743,25 @@ static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p)
        ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
-static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p)
+static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
+                               bool is_first)
 {
        struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
        u8 *old_mac_addr = (u8 *)hdev->hw.mac.mac_addr;
        u8 *new_mac_addr = (u8 *)p;
        u8 msg_data[ETH_ALEN * 2];
+       u16 subcode;
        int status;
 
        ether_addr_copy(msg_data, new_mac_addr);
        ether_addr_copy(&msg_data[ETH_ALEN], old_mac_addr);
 
+       subcode = is_first ? HCLGE_MBX_MAC_VLAN_UC_ADD :
+                       HCLGE_MBX_MAC_VLAN_UC_MODIFY;
+
        status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
-                                     HCLGE_MBX_MAC_VLAN_UC_MODIFY,
-                                     msg_data, ETH_ALEN * 2,
-                                     false, NULL, 0);
+                                     subcode, msg_data, ETH_ALEN * 2,
+                                     true, NULL, 0);
        if (!status)
                ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr);
 
@@ -816,11 +834,149 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
        struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
        u8 msg_data[2];
+       int ret;
 
        memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
 
-       hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data, 2, false,
-                            NULL, 0);
+       /* disable vf queue before send queue reset msg to PF */
+       ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
+       if (ret)
+               return;
+
+       hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
+                            2, true, NULL, 0);
+}
+
+static int hclgevf_notify_client(struct hclgevf_dev *hdev,
+                                enum hnae3_reset_notify_type type)
+{
+       struct hnae3_client *client = hdev->nic_client;
+       struct hnae3_handle *handle = &hdev->nic;
+
+       if (!client->ops->reset_notify)
+               return -EOPNOTSUPP;
+
+       return client->ops->reset_notify(handle, type);
+}
+
+static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_RESET_WAIT_MS  500
+#define HCLGEVF_RESET_WAIT_CNT 20
+       u32 val, cnt = 0;
+
+       /* wait to check the hardware reset completion status */
+       val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+       while (hnae_get_bit(val, HCLGEVF_FUN_RST_ING_B) &&
+                           (cnt < HCLGEVF_RESET_WAIT_CNT)) {
+               msleep(HCLGEVF_RESET_WAIT_MS);
+               val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+               cnt++;
+       }
+
+       /* hardware completion status should be available by this time */
+       if (cnt >= HCLGEVF_RESET_WAIT_CNT) {
+               dev_warn(&hdev->pdev->dev,
+                        "could'nt get reset done status from h/w, timeout!\n");
+               return -EBUSY;
+       }
+
+       /* we will wait a bit more to let reset of the stack to complete. This
+        * might happen in case reset assertion was made by PF. Yes, this also
+        * means we might end up waiting bit more even for VF reset.
+        */
+       msleep(5000);
+
+       return 0;
+}
+
+static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
+{
+       int ret;
+
+       /* uninitialize the nic client */
+       hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+       /* re-initialize the hclge device */
+       ret = hclgevf_init_hdev(hdev);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "hclge device re-init failed, VF is disabled!\n");
+               return ret;
+       }
+
+       /* bring up the nic client again */
+       hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT);
+
+       return 0;
+}
+
+static int hclgevf_reset(struct hclgevf_dev *hdev)
+{
+       int ret;
+
+       rtnl_lock();
+
+       /* bring down the nic to stop any ongoing TX/RX */
+       hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+
+       /* check if VF could successfully fetch the hardware reset completion
+        * status from the hardware
+        */
+       ret = hclgevf_reset_wait(hdev);
+       if (ret) {
+               /* can't do much in this situation, will disable VF */
+               dev_err(&hdev->pdev->dev,
+                       "VF failed(=%d) to fetch H/W reset completion status\n",
+                       ret);
+
+               dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n");
+               hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+               rtnl_unlock();
+               return ret;
+       }
+
+       /* now, re-initialize the nic client and ae device*/
+       ret = hclgevf_reset_stack(hdev);
+       if (ret)
+               dev_err(&hdev->pdev->dev, "failed to reset VF stack\n");
+
+       /* bring up the nic to enable TX/RX again */
+       hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+
+       rtnl_unlock();
+
+       return ret;
+}
+
+static int hclgevf_do_reset(struct hclgevf_dev *hdev)
+{
+       int status;
+       u8 respmsg;
+
+       status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
+                                     0, false, &respmsg, sizeof(u8));
+       if (status)
+               dev_err(&hdev->pdev->dev,
+                       "VF reset request to PF failed(=%d)\n", status);
+
+       return status;
+}
+
+static void hclgevf_reset_event(struct hnae3_handle *handle)
+{
+       struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+       dev_info(&hdev->pdev->dev, "received reset request from VF enet\n");
+
+       handle->reset_level = HNAE3_VF_RESET;
+
+       /* reset of this VF requested */
+       set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state);
+       hclgevf_reset_task_schedule(hdev);
+
+       handle->last_reset_time = jiffies;
 }
 
 static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
@@ -845,10 +1001,22 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
        hdev->num_msi_used += 1;
 }
 
-static void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
+{
+       if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
+           !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+               set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+               schedule_work(&hdev->rst_service_task);
+       }
+}
+
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
 {
-       if (!test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state))
+       if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) &&
+           !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) {
+               set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
                schedule_work(&hdev->mbx_service_task);
+       }
 }
 
 static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
@@ -858,6 +1026,16 @@ static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
                schedule_work(&hdev->service_task);
 }
 
+static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev)
+{
+       /* if we have any pending mailbox event then schedule the mbx task */
+       if (hdev->mbx_event_pending)
+               hclgevf_mbx_task_schedule(hdev);
+
+       if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state))
+               hclgevf_reset_task_schedule(hdev);
+}
+
 static void hclgevf_service_timer(struct timer_list *t)
 {
        struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
@@ -867,6 +1045,75 @@ static void hclgevf_service_timer(struct timer_list *t)
        hclgevf_task_schedule(hdev);
 }
 
+static void hclgevf_reset_service_task(struct work_struct *work)
+{
+       struct hclgevf_dev *hdev =
+               container_of(work, struct hclgevf_dev, rst_service_task);
+       int ret;
+
+       if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+               return;
+
+       clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+
+       if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
+                              &hdev->reset_state)) {
+               /* PF has initmated that it is about to reset the hardware.
+                * We now have to poll & check if harware has actually completed
+                * the reset sequence. On hardware reset completion, VF needs to
+                * reset the client and ae device.
+                */
+               hdev->reset_attempts = 0;
+
+               ret = hclgevf_reset(hdev);
+               if (ret)
+                       dev_err(&hdev->pdev->dev, "VF stack reset failed.\n");
+       } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
+                                     &hdev->reset_state)) {
+               /* we could be here when either of below happens:
+                * 1. reset was initiated due to watchdog timeout due to
+                *    a. IMP was earlier reset and our TX got choked down and
+                *       which resulted in watchdog reacting and inducing VF
+                *       reset. This also means our cmdq would be unreliable.
+                *    b. problem in TX due to other lower layer(example link
+                *       layer not functioning properly etc.)
+                * 2. VF reset might have been initiated due to some config
+                *    change.
+                *
+                * NOTE: Theres no clear way to detect above cases than to react
+                * to the response of PF for this reset request. PF will ack the
+                * 1b and 2. cases but we will not get any intimation about 1a
+                * from PF as cmdq would be in unreliable state i.e. mailbox
+                * communication between PF and VF would be broken.
+                */
+
+               /* if we are never geting into pending state it means either:
+                * 1. PF is not receiving our request which could be due to IMP
+                *    reset
+                * 2. PF is screwed
+                * We cannot do much for 2. but to check first we can try reset
+                * our PCIe + stack and see if it alleviates the problem.
+                */
+               if (hdev->reset_attempts > 3) {
+                       /* prepare for full reset of stack + pcie interface */
+                       hdev->nic.reset_level = HNAE3_VF_FULL_RESET;
+
+                       /* "defer" schedule the reset task again */
+                       set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+               } else {
+                       hdev->reset_attempts++;
+
+                       /* request PF for resetting this VF via mailbox */
+                       ret = hclgevf_do_reset(hdev);
+                       if (ret)
+                               dev_warn(&hdev->pdev->dev,
+                                        "VF rst fail, stack will call\n");
+               }
+       }
+
+       clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+}
+
 static void hclgevf_mailbox_service_task(struct work_struct *work)
 {
        struct hclgevf_dev *hdev;
@@ -878,7 +1125,7 @@ static void hclgevf_mailbox_service_task(struct work_struct *work)
 
        clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
 
-       hclgevf_mbx_handler(hdev);
+       hclgevf_mbx_async_handler(hdev);
 
        clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
 }
@@ -894,6 +1141,8 @@ static void hclgevf_service_task(struct work_struct *work)
         */
        hclgevf_request_link_info(hdev);
 
+       hclgevf_deferred_task_schedule(hdev);
+
        clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
 }
 
@@ -936,8 +1185,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
        if (!hclgevf_check_event_cause(hdev, &clearval))
                goto skip_sched;
 
-       /* schedule the VF mailbox service task, if not already scheduled */
-       hclgevf_mbx_task_schedule(hdev);
+       hclgevf_mbx_handler(hdev);
 
        hclgevf_clear_event_cause(hdev, clearval);
 
@@ -959,6 +1207,22 @@ static int hclgevf_configure(struct hclgevf_dev *hdev)
        return hclgevf_get_tc_info(hdev);
 }
 
+static int hclgevf_alloc_hdev(struct hnae3_ae_dev *ae_dev)
+{
+       struct pci_dev *pdev = ae_dev->pdev;
+       struct hclgevf_dev *hdev = ae_dev->priv;
+
+       hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
+       if (!hdev)
+               return -ENOMEM;
+
+       hdev->pdev = pdev;
+       hdev->ae_dev = ae_dev;
+       ae_dev->priv = hdev;
+
+       return 0;
+}
+
 static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev)
 {
        struct hnae3_handle *roce = &hdev->roce;
@@ -1057,10 +1321,17 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle)
 
        /* reset tqp stats */
        hclgevf_reset_tqp_stats(handle);
+       del_timer_sync(&hdev->service_timer);
+       cancel_work_sync(&hdev->service_task);
+       hclgevf_update_link_status(hdev, 0);
 }
 
 static void hclgevf_state_init(struct hclgevf_dev *hdev)
 {
+       /* if this is on going reset then skip this initialization */
+       if (hclgevf_dev_ongoing_reset(hdev))
+               return;
+
        /* setup tasks for the MBX */
        INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task);
        clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
@@ -1072,6 +1343,8 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
        INIT_WORK(&hdev->service_task, hclgevf_service_task);
        clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
 
+       INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task);
+
        mutex_init(&hdev->mbx_resp.mbx_mutex);
 
        /* bring the device down */
@@ -1088,6 +1361,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
                cancel_work_sync(&hdev->service_task);
        if (hdev->mbx_service_task.func)
                cancel_work_sync(&hdev->mbx_service_task);
+       if (hdev->rst_service_task.func)
+               cancel_work_sync(&hdev->rst_service_task);
 
        mutex_destroy(&hdev->mbx_resp.mbx_mutex);
 }
@@ -1098,6 +1373,10 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
        int vectors;
        int i;
 
+       /* if this is on going reset then skip this initialization */
+       if (hclgevf_dev_ongoing_reset(hdev))
+               return 0;
+
        hdev->num_msi = HCLGEVF_MAX_VF_VECTOR_NUM;
 
        vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
@@ -1148,6 +1427,10 @@ static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev)
 {
        int ret = 0;
 
+       /* if this is on going reset then skip this initialization */
+       if (hclgevf_dev_ongoing_reset(hdev))
+               return 0;
+
        hclgevf_get_misc_vector(hdev);
 
        ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle,
@@ -1258,6 +1541,14 @@ static int hclgevf_pci_init(struct hclgevf_dev *hdev)
        struct hclgevf_hw *hw;
        int ret;
 
+       /* check if we need to skip initialization of pci. This will happen if
+        * device is undergoing VF reset. Otherwise, we would need to
+        * re-initialize pci interface again i.e. when device is not going
+        * through *any* reset or actually undergoing full reset.
+        */
+       if (hclgevf_dev_ongoing_reset(hdev))
+               return 0;
+
        ret = pci_enable_device(pdev);
        if (ret) {
                dev_err(&pdev->dev, "failed to enable PCI device\n");
@@ -1309,19 +1600,16 @@ static void hclgevf_pci_uninit(struct hclgevf_dev *hdev)
        pci_set_drvdata(pdev, NULL);
 }
 
-static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 {
-       struct pci_dev *pdev = ae_dev->pdev;
-       struct hclgevf_dev *hdev;
+       struct pci_dev *pdev = hdev->pdev;
        int ret;
 
-       hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
-       if (!hdev)
-               return -ENOMEM;
-
-       hdev->pdev = pdev;
-       hdev->ae_dev = ae_dev;
-       ae_dev->priv = hdev;
+       /* check if device is on-going full reset(i.e. pcie as well) */
+       if (hclgevf_dev_ongoing_full_reset(hdev)) {
+               dev_warn(&pdev->dev, "device is going full reset\n");
+               hclgevf_uninit_hdev(hdev);
+       }
 
        ret = hclgevf_pci_init(hdev);
        if (ret) {
@@ -1406,15 +1694,38 @@ static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
        return ret;
 }
 
-static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
 {
-       struct hclgevf_dev *hdev = ae_dev->priv;
-
        hclgevf_cmd_uninit(hdev);
        hclgevf_misc_irq_uninit(hdev);
        hclgevf_state_uninit(hdev);
        hclgevf_uninit_msi(hdev);
        hclgevf_pci_uninit(hdev);
+}
+
+static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+       struct pci_dev *pdev = ae_dev->pdev;
+       int ret;
+
+       ret = hclgevf_alloc_hdev(ae_dev);
+       if (ret) {
+               dev_err(&pdev->dev, "hclge device allocation failed\n");
+               return ret;
+       }
+
+       ret = hclgevf_init_hdev(ae_dev->priv);
+       if (ret)
+               dev_err(&pdev->dev, "hclge device initialization failed\n");
+
+       return ret;
+}
+
+static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+       struct hclgevf_dev *hdev = ae_dev->priv;
+
+       hclgevf_uninit_hdev(hdev);
        ae_dev->priv = NULL;
 }
 
@@ -1447,6 +1758,43 @@ static void hclgevf_get_channels(struct hnae3_handle *handle,
        ch->combined_count = hdev->num_tqps;
 }
 
+static void hclgevf_get_tqps_and_rss_info(struct hnae3_handle *handle,
+                                         u16 *free_tqps, u16 *max_rss_size)
+{
+       struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+       *free_tqps = 0;
+       *max_rss_size = hdev->rss_size_max;
+}
+
+static int hclgevf_get_status(struct hnae3_handle *handle)
+{
+       struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+       return hdev->hw.mac.link;
+}
+
+static void hclgevf_get_ksettings_an_result(struct hnae3_handle *handle,
+                                           u8 *auto_neg, u32 *speed,
+                                           u8 *duplex)
+{
+       struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+       if (speed)
+               *speed = hdev->hw.mac.speed;
+       if (duplex)
+               *duplex = hdev->hw.mac.duplex;
+       if (auto_neg)
+               *auto_neg = AUTONEG_DISABLE;
+}
+
+void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
+                                u8 duplex)
+{
+       hdev->hw.mac.speed = speed;
+       hdev->hw.mac.duplex = duplex;
+}
+
 static const struct hnae3_ae_ops hclgevf_ops = {
        .init_ae_dev = hclgevf_init_ae_dev,
        .uninit_ae_dev = hclgevf_uninit_ae_dev,
@@ -1457,6 +1805,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
        .map_ring_to_vector = hclgevf_map_ring_to_vector,
        .unmap_ring_from_vector = hclgevf_unmap_ring_from_vector,
        .get_vector = hclgevf_get_vector,
+       .put_vector = hclgevf_put_vector,
        .reset_queue = hclgevf_reset_tqp,
        .set_promisc_mode = hclgevf_set_promisc_mode,
        .get_mac_addr = hclgevf_get_mac_addr,
@@ -1476,7 +1825,11 @@ static const struct hnae3_ae_ops hclgevf_ops = {
        .get_tc_size = hclgevf_get_tc_size,
        .get_fw_version = hclgevf_get_fw_version,
        .set_vlan_filter = hclgevf_set_vlan_filter,
+       .reset_event = hclgevf_reset_event,
        .get_channels = hclgevf_get_channels,
+       .get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
+       .get_status = hclgevf_get_status,
+       .get_ksettings_an_result = hclgevf_get_ksettings_an_result,
 };
 
 static struct hnae3_ae_algo ae_algovf = {
index a63bee4a36744c847d28ba4842795c3fa9a9f5bd..a477a7c36bbd30a2c2a9ffd19352da8166fb1c0d 100644 (file)
@@ -34,6 +34,9 @@
 #define HCLGEVF_VECTOR0_RX_CMDQ_INT_B  1
 
 #define HCLGEVF_TQP_RESET_TRY_TIMES    10
+/* Reset related Registers */
+#define HCLGEVF_FUN_RST_ING            0x20C00
+#define HCLGEVF_FUN_RST_ING_B          0
 
 #define HCLGEVF_RSS_IND_TBL_SIZE               512
 #define HCLGEVF_RSS_SET_BITMAP_MSK     0xffff
@@ -52,6 +55,8 @@ enum hclgevf_states {
        HCLGEVF_STATE_DISABLED,
        /* task states */
        HCLGEVF_STATE_SERVICE_SCHED,
+       HCLGEVF_STATE_RST_SERVICE_SCHED,
+       HCLGEVF_STATE_RST_HANDLING,
        HCLGEVF_STATE_MBX_SERVICE_SCHED,
        HCLGEVF_STATE_MBX_HANDLING,
 };
@@ -61,6 +66,8 @@ enum hclgevf_states {
 struct hclgevf_mac {
        u8 mac_addr[ETH_ALEN];
        int link;
+       u8 duplex;
+       u32 speed;
 };
 
 struct hclgevf_hw {
@@ -120,6 +127,11 @@ struct hclgevf_dev {
        struct hclgevf_rss_cfg rss_cfg;
        unsigned long state;
 
+#define HCLGEVF_RESET_REQUESTED                0
+#define HCLGEVF_RESET_PENDING          1
+       unsigned long reset_state;      /* requested, pending */
+       u32 reset_attempts;
+
        u32 fw_version;
        u16 num_tqps;           /* num task queue pairs of this PF */
 
@@ -140,10 +152,13 @@ struct hclgevf_dev {
        int *vector_irq;
 
        bool accept_mta_mc; /* whether to accept mta filter multicast */
+       bool mbx_event_pending;
        struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
+       struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
 
        struct timer_list service_timer;
        struct work_struct service_task;
+       struct work_struct rst_service_task;
        struct work_struct mbx_service_task;
 
        struct hclgevf_tqp *htqp;
@@ -156,9 +171,29 @@ struct hclgevf_dev {
        u32 flag;
 };
 
+static inline bool hclgevf_dev_ongoing_reset(struct hclgevf_dev *hdev)
+{
+       return (hdev &&
+               (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+               (hdev->nic.reset_level == HNAE3_VF_RESET));
+}
+
+static inline bool hclgevf_dev_ongoing_full_reset(struct hclgevf_dev *hdev)
+{
+       return (hdev &&
+               (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+               (hdev->nic.reset_level == HNAE3_VF_FULL_RESET));
+}
+
 int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
                         const u8 *msg_data, u8 msg_len, bool need_resp,
                         u8 *resp_data, u16 resp_len);
 void hclgevf_mbx_handler(struct hclgevf_dev *hdev);
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev);
+
 void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state);
+void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
+                                u8 duplex);
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev);
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev);
 #endif
index e39cad285fa9e97dfd7062e0cffa547980ced538..a286184283384c6aa49bb5c5080a30008ff297b0 100644 (file)
@@ -54,6 +54,10 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
        mbx_resp = &hdev->mbx_resp;
        r_code0 = (u16)(mbx_resp->origin_mbx_msg >> 16);
        r_code1 = (u16)(mbx_resp->origin_mbx_msg & 0xff);
+
+       if (mbx_resp->resp_status)
+               return mbx_resp->resp_status;
+
        if (resp_data)
                memcpy(resp_data, &mbx_resp->additional_info[0], resp_len);
 
@@ -128,7 +132,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
        struct hclge_mbx_pf_to_vf_cmd *req;
        struct hclgevf_cmq_ring *crq;
        struct hclgevf_desc *desc;
-       u16 link_status, flag;
+       u16 *msg_q;
+       u16 flag;
        u8 *temp;
        int i;
 
@@ -140,6 +145,12 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                desc = &crq->desc[crq->next_to_use];
                req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data;
 
+               /* synchronous messages are time critical and need preferential
+                * treatment. Therefore, we need to acknowledge all the sync
+                * responses as quickly as possible so that waiting tasks do not
+                * timeout and simultaneously queue the async messages for later
+                * prcessing in context of mailbox task i.e. the slow path.
+                */
                switch (req->msg[0]) {
                case HCLGE_MBX_PF_VF_RESP:
                        if (resp->received_resp)
@@ -159,10 +170,31 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                        }
                        break;
                case HCLGE_MBX_LINK_STAT_CHANGE:
-                       link_status = le16_to_cpu(req->msg[1]);
+               case HCLGE_MBX_ASSERTING_RESET:
+                       /* set this mbx event as pending. This is required as we
+                        * might loose interrupt event when mbx task is busy
+                        * handling. This shall be cleared when mbx task just
+                        * enters handling state.
+                        */
+                       hdev->mbx_event_pending = true;
 
-                       /* update upper layer with new link link status */
-                       hclgevf_update_link_status(hdev, link_status);
+                       /* we will drop the async msg if we find ARQ as full
+                        * and continue with next message
+                        */
+                       if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) {
+                               dev_warn(&hdev->pdev->dev,
+                                        "Async Q full, dropping msg(%d)\n",
+                                        req->msg[1]);
+                               break;
+                       }
+
+                       /* tail the async message in arq */
+                       msg_q = hdev->arq.msg_q[hdev->arq.tail];
+                       memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE);
+                       hclge_mbx_tail_ptr_move_arq(hdev->arq);
+                       hdev->arq.count++;
+
+                       hclgevf_mbx_task_schedule(hdev);
 
                        break;
                default:
@@ -171,6 +203,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                                req->msg[0]);
                        break;
                }
+               crq->desc[crq->next_to_use].flag = 0;
                hclge_mbx_ring_ptr_move_crq(crq);
                flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
        }
@@ -179,3 +212,57 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
        hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CRQ_HEAD_REG,
                          crq->next_to_use);
 }
+
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
+{
+       u16 link_status;
+       u16 *msg_q;
+       u8 duplex;
+       u32 speed;
+       u32 tail;
+
+       /* we can safely clear it now as we are at start of the async message
+        * processing
+        */
+       hdev->mbx_event_pending = false;
+
+       tail = hdev->arq.tail;
+
+       /* process all the async queue messages */
+       while (tail != hdev->arq.head) {
+               msg_q = hdev->arq.msg_q[hdev->arq.head];
+
+               switch (msg_q[0]) {
+               case HCLGE_MBX_LINK_STAT_CHANGE:
+                       link_status = le16_to_cpu(msg_q[1]);
+                       memcpy(&speed, &msg_q[2], sizeof(speed));
+                       duplex = (u8)le16_to_cpu(msg_q[4]);
+
+                       /* update upper layer with new link link status */
+                       hclgevf_update_link_status(hdev, link_status);
+                       hclgevf_update_speed_duplex(hdev, speed, duplex);
+
+                       break;
+               case HCLGE_MBX_ASSERTING_RESET:
+                       /* PF has asserted reset hence VF should go in pending
+                        * state and poll for the hardware reset status till it
+                        * has been completely reset. After this stack should
+                        * eventually be re-initialized.
+                        */
+                       hdev->nic.reset_level = HNAE3_VF_RESET;
+                       set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+                       hclgevf_reset_task_schedule(hdev);
+
+                       break;
+               default:
+                       dev_err(&hdev->pdev->dev,
+                               "fetched unsupported(%d) message from arq\n",
+                               msg_q[0]);
+                       break;
+               }
+
+               hclge_mbx_head_ptr_move_arq(hdev->arq);
+               hdev->arq.count--;
+               msg_q = hdev->arq.msg_q[hdev->arq.head];
+       }
+}
index 5a86a916492cc2f6d3301097fab008d2a6e31399..0389a7a521528f55776490f7549862d846841d3a 100644 (file)
@@ -111,7 +111,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int data);
 static void send_map_query(struct ibmvnic_adapter *adapter);
 static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
 static void send_request_unmap(struct ibmvnic_adapter *, u8);
-static void send_login(struct ibmvnic_adapter *adapter);
+static int send_login(struct ibmvnic_adapter *adapter);
 static void send_cap_queries(struct ibmvnic_adapter *adapter);
 static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
@@ -557,36 +557,41 @@ static int init_rx_pools(struct net_device *netdev)
        return 0;
 }
 
+static int reset_one_tx_pool(struct ibmvnic_adapter *adapter,
+                            struct ibmvnic_tx_pool *tx_pool)
+{
+       int rc, i;
+
+       rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+       if (rc)
+               return rc;
+
+       memset(tx_pool->tx_buff, 0,
+              tx_pool->num_buffers *
+              sizeof(struct ibmvnic_tx_buff));
+
+       for (i = 0; i < tx_pool->num_buffers; i++)
+               tx_pool->free_map[i] = i;
+
+       tx_pool->consumer_index = 0;
+       tx_pool->producer_index = 0;
+
+       return 0;
+}
+
 static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 {
-       struct ibmvnic_tx_pool *tx_pool;
        int tx_scrqs;
-       int i, j, rc;
+       int i, rc;
 
        tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
        for (i = 0; i < tx_scrqs; i++) {
-               netdev_dbg(adapter->netdev, "Re-setting tx_pool[%d]\n", i);
-
-               tx_pool = &adapter->tx_pool[i];
-
-               rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+               rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
                if (rc)
                        return rc;
-
-               rc = reset_long_term_buff(adapter, &tx_pool->tso_ltb);
+               rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]);
                if (rc)
                        return rc;
-
-               memset(tx_pool->tx_buff, 0,
-                      adapter->req_tx_entries_per_subcrq *
-                      sizeof(struct ibmvnic_tx_buff));
-
-               for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
-                       tx_pool->free_map[j] = j;
-
-               tx_pool->consumer_index = 0;
-               tx_pool->producer_index = 0;
-               tx_pool->tso_index = 0;
        }
 
        return 0;
@@ -603,35 +608,70 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter)
        adapter->vpd = NULL;
 }
 
+static void release_one_tx_pool(struct ibmvnic_adapter *adapter,
+                               struct ibmvnic_tx_pool *tx_pool)
+{
+       kfree(tx_pool->tx_buff);
+       kfree(tx_pool->free_map);
+       free_long_term_buff(adapter, &tx_pool->long_term_buff);
+}
+
 static void release_tx_pools(struct ibmvnic_adapter *adapter)
 {
-       struct ibmvnic_tx_pool *tx_pool;
        int i;
 
        if (!adapter->tx_pool)
                return;
 
        for (i = 0; i < adapter->num_active_tx_pools; i++) {
-               netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
-               tx_pool = &adapter->tx_pool[i];
-               kfree(tx_pool->tx_buff);
-               free_long_term_buff(adapter, &tx_pool->long_term_buff);
-               free_long_term_buff(adapter, &tx_pool->tso_ltb);
-               kfree(tx_pool->free_map);
+               release_one_tx_pool(adapter, &adapter->tx_pool[i]);
+               release_one_tx_pool(adapter, &adapter->tso_pool[i]);
        }
 
        kfree(adapter->tx_pool);
        adapter->tx_pool = NULL;
+       kfree(adapter->tso_pool);
+       adapter->tso_pool = NULL;
        adapter->num_active_tx_pools = 0;
 }
 
+static int init_one_tx_pool(struct net_device *netdev,
+                           struct ibmvnic_tx_pool *tx_pool,
+                           int num_entries, int buf_size)
+{
+       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+       int i;
+
+       tx_pool->tx_buff = kcalloc(num_entries,
+                                  sizeof(struct ibmvnic_tx_buff),
+                                  GFP_KERNEL);
+       if (!tx_pool->tx_buff)
+               return -1;
+
+       if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
+                                num_entries * buf_size))
+               return -1;
+
+       tx_pool->free_map = kcalloc(num_entries, sizeof(int), GFP_KERNEL);
+       if (!tx_pool->free_map)
+               return -1;
+
+       for (i = 0; i < num_entries; i++)
+               tx_pool->free_map[i] = i;
+
+       tx_pool->consumer_index = 0;
+       tx_pool->producer_index = 0;
+       tx_pool->num_buffers = num_entries;
+       tx_pool->buf_size = buf_size;
+
+       return 0;
+}
+
 static int init_tx_pools(struct net_device *netdev)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       struct device *dev = &adapter->vdev->dev;
-       struct ibmvnic_tx_pool *tx_pool;
        int tx_subcrqs;
-       int i, j;
+       int i, rc;
 
        tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
        adapter->tx_pool = kcalloc(tx_subcrqs,
@@ -639,53 +679,29 @@ static int init_tx_pools(struct net_device *netdev)
        if (!adapter->tx_pool)
                return -1;
 
+       adapter->tso_pool = kcalloc(tx_subcrqs,
+                                   sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
+       if (!adapter->tso_pool)
+               return -1;
+
        adapter->num_active_tx_pools = tx_subcrqs;
 
        for (i = 0; i < tx_subcrqs; i++) {
-               tx_pool = &adapter->tx_pool[i];
-
-               netdev_dbg(adapter->netdev,
-                          "Initializing tx_pool[%d], %lld buffs\n",
-                          i, adapter->req_tx_entries_per_subcrq);
-
-               tx_pool->tx_buff = kcalloc(adapter->req_tx_entries_per_subcrq,
-                                          sizeof(struct ibmvnic_tx_buff),
-                                          GFP_KERNEL);
-               if (!tx_pool->tx_buff) {
-                       dev_err(dev, "tx pool buffer allocation failed\n");
-                       release_tx_pools(adapter);
-                       return -1;
-               }
-
-               if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
-                                        adapter->req_tx_entries_per_subcrq *
-                                        adapter->req_mtu)) {
-                       release_tx_pools(adapter);
-                       return -1;
-               }
-
-               /* alloc TSO ltb */
-               if (alloc_long_term_buff(adapter, &tx_pool->tso_ltb,
-                                        IBMVNIC_TSO_BUFS *
-                                        IBMVNIC_TSO_BUF_SZ)) {
+               rc = init_one_tx_pool(netdev, &adapter->tx_pool[i],
+                                     adapter->req_tx_entries_per_subcrq,
+                                     adapter->req_mtu + VLAN_HLEN);
+               if (rc) {
                        release_tx_pools(adapter);
-                       return -1;
+                       return rc;
                }
 
-               tx_pool->tso_index = 0;
-
-               tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq,
-                                           sizeof(int), GFP_KERNEL);
-               if (!tx_pool->free_map) {
+               init_one_tx_pool(netdev, &adapter->tso_pool[i],
+                                IBMVNIC_TSO_BUFS,
+                                IBMVNIC_TSO_BUF_SZ);
+               if (rc) {
                        release_tx_pools(adapter);
-                       return -1;
+                       return rc;
                }
-
-               for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
-                       tx_pool->free_map[j] = j;
-
-               tx_pool->consumer_index = 0;
-               tx_pool->producer_index = 0;
        }
 
        return 0;
@@ -809,8 +825,11 @@ static int ibmvnic_login(struct net_device *netdev)
                }
 
                reinit_completion(&adapter->init_done);
-               send_login(adapter);
-               if (!wait_for_completion_timeout(&adapter->init_done,
+               rc = send_login(adapter);
+               if (rc) {
+                       dev_err(dev, "Unable to attempt device login\n");
+                       return rc;
+               } else if (!wait_for_completion_timeout(&adapter->init_done,
                                                 timeout)) {
                        dev_err(dev, "Login timeout\n");
                        return -1;
@@ -845,8 +864,6 @@ static void release_resources(struct ibmvnic_adapter *adapter)
        release_tx_pools(adapter);
        release_rx_pools(adapter);
 
-       release_stats_token(adapter);
-       release_stats_buffers(adapter);
        release_error_buffers(adapter);
        release_napi(adapter);
        release_login_rsp_buffer(adapter);
@@ -974,14 +991,6 @@ static int init_resources(struct ibmvnic_adapter *adapter)
        if (rc)
                return rc;
 
-       rc = init_stats_buffers(adapter);
-       if (rc)
-               return rc;
-
-       rc = init_stats_token(adapter);
-       if (rc)
-               return rc;
-
        adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
        if (!adapter->vpd)
                return -ENOMEM;
@@ -1091,6 +1100,7 @@ static int ibmvnic_open(struct net_device *netdev)
 static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_rx_pool *rx_pool;
+       struct ibmvnic_rx_buff *rx_buff;
        u64 rx_entries;
        int rx_scrqs;
        int i, j;
@@ -1104,56 +1114,64 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
        /* Free any remaining skbs in the rx buffer pools */
        for (i = 0; i < rx_scrqs; i++) {
                rx_pool = &adapter->rx_pool[i];
-               if (!rx_pool)
+               if (!rx_pool || !rx_pool->rx_buff)
                        continue;
 
                netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
                for (j = 0; j < rx_entries; j++) {
-                       if (rx_pool->rx_buff[j].skb) {
-                               dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
-                               rx_pool->rx_buff[j].skb = NULL;
+                       rx_buff = &rx_pool->rx_buff[j];
+                       if (rx_buff && rx_buff->skb) {
+                               dev_kfree_skb_any(rx_buff->skb);
+                               rx_buff->skb = NULL;
                        }
                }
        }
 }
 
-static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+static void clean_one_tx_pool(struct ibmvnic_adapter *adapter,
+                             struct ibmvnic_tx_pool *tx_pool)
 {
-       struct ibmvnic_tx_pool *tx_pool;
+       struct ibmvnic_tx_buff *tx_buff;
        u64 tx_entries;
+       int i;
+
+       if (!tx_pool || !tx_pool->tx_buff)
+               return;
+
+       tx_entries = tx_pool->num_buffers;
+
+       for (i = 0; i < tx_entries; i++) {
+               tx_buff = &tx_pool->tx_buff[i];
+               if (tx_buff && tx_buff->skb) {
+                       dev_kfree_skb_any(tx_buff->skb);
+                       tx_buff->skb = NULL;
+               }
+       }
+}
+
+static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+{
        int tx_scrqs;
-       int i, j;
+       int i;
 
-       if (!adapter->tx_pool)
+       if (!adapter->tx_pool || !adapter->tso_pool)
                return;
 
        tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
-       tx_entries = adapter->req_tx_entries_per_subcrq;
 
        /* Free any remaining skbs in the tx buffer pools */
        for (i = 0; i < tx_scrqs; i++) {
-               tx_pool = &adapter->tx_pool[i];
-               if (!tx_pool)
-                       continue;
-
                netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
-               for (j = 0; j < tx_entries; j++) {
-                       if (tx_pool->tx_buff[j].skb) {
-                               dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
-                               tx_pool->tx_buff[j].skb = NULL;
-                       }
-               }
+               clean_one_tx_pool(adapter, &adapter->tx_pool[i]);
+               clean_one_tx_pool(adapter, &adapter->tso_pool[i]);
        }
 }
 
-static int __ibmvnic_close(struct net_device *netdev)
+static void ibmvnic_cleanup(struct net_device *netdev)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       int rc = 0;
        int i;
 
-       adapter->state = VNIC_CLOSING;
-
        /* ensure that transmissions are stopped if called by do_reset */
        if (adapter->resetting)
                netif_tx_disable(netdev);
@@ -1165,30 +1183,16 @@ static int __ibmvnic_close(struct net_device *netdev)
        if (adapter->tx_scrq) {
                for (i = 0; i < adapter->req_tx_queues; i++)
                        if (adapter->tx_scrq[i]->irq) {
-                               netdev_dbg(adapter->netdev,
+                               netdev_dbg(netdev,
                                           "Disabling tx_scrq[%d] irq\n", i);
                                disable_irq(adapter->tx_scrq[i]->irq);
                        }
        }
 
-       rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
-       if (rc)
-               return rc;
-
        if (adapter->rx_scrq) {
                for (i = 0; i < adapter->req_rx_queues; i++) {
-                       int retries = 10;
-
-                       while (pending_scrq(adapter, adapter->rx_scrq[i])) {
-                               retries--;
-                               mdelay(100);
-
-                               if (retries == 0)
-                                       break;
-                       }
-
                        if (adapter->rx_scrq[i]->irq) {
-                               netdev_dbg(adapter->netdev,
+                               netdev_dbg(netdev,
                                           "Disabling rx_scrq[%d] irq\n", i);
                                disable_irq(adapter->rx_scrq[i]->irq);
                        }
@@ -1196,8 +1200,20 @@ static int __ibmvnic_close(struct net_device *netdev)
        }
        clean_rx_pools(adapter);
        clean_tx_pools(adapter);
+}
+
+static int __ibmvnic_close(struct net_device *netdev)
+{
+       struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+       int rc = 0;
+
+       adapter->state = VNIC_CLOSING;
+       rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+       if (rc)
+               return rc;
+       ibmvnic_cleanup(netdev);
        adapter->state = VNIC_CLOSED;
-       return rc;
+       return 0;
 }
 
 static int ibmvnic_close(struct net_device *netdev)
@@ -1229,7 +1245,10 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb,
        int len = 0;
        u8 *hdr;
 
-       hdr_len[0] = sizeof(struct ethhdr);
+       if (skb_vlan_tagged(skb) && !skb_vlan_tag_present(skb))
+               hdr_len[0] = sizeof(struct vlan_ethhdr);
+       else
+               hdr_len[0] = sizeof(struct ethhdr);
 
        if (skb->protocol == htons(ETH_P_IP)) {
                hdr_len[1] = ip_hdr(skb)->ihl * 4;
@@ -1345,6 +1364,21 @@ static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff,
                         txbuff->indir_arr + 1);
 }
 
+static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
+                                   struct net_device *netdev)
+{
+       /* For some backing devices, mishandling of small packets
+        * can result in a loss of connection or TX stall. Device
+        * architects recommend that no packet should be smaller
+        * than the minimum MTU value provided to the driver, so
+        * pad any packets to that length
+        */
+       if (skb->len < netdev->min_mtu)
+               return skb_put_padto(skb, netdev->min_mtu);
+
+       return 0;
+}
+
 static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -1382,7 +1416,17 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
                goto out;
        }
 
-       tx_pool = &adapter->tx_pool[queue_num];
+       if (ibmvnic_xmit_workarounds(skb, netdev)) {
+               tx_dropped++;
+               tx_send_failed++;
+               ret = NETDEV_TX_OK;
+               goto out;
+       }
+       if (skb_is_gso(skb))
+               tx_pool = &adapter->tso_pool[queue_num];
+       else
+               tx_pool = &adapter->tx_pool[queue_num];
+
        tx_scrq = adapter->tx_scrq[queue_num];
        txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
        handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
@@ -1390,21 +1434,21 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
        index = tx_pool->free_map[tx_pool->consumer_index];
 
-       if (skb_is_gso(skb)) {
-               offset = tx_pool->tso_index * IBMVNIC_TSO_BUF_SZ;
-               dst = tx_pool->tso_ltb.buff + offset;
-               memset(dst, 0, IBMVNIC_TSO_BUF_SZ);
-               data_dma_addr = tx_pool->tso_ltb.addr + offset;
-               tx_pool->tso_index++;
-               if (tx_pool->tso_index == IBMVNIC_TSO_BUFS)
-                       tx_pool->tso_index = 0;
-       } else {
-               offset = index * adapter->req_mtu;
-               dst = tx_pool->long_term_buff.buff + offset;
-               memset(dst, 0, adapter->req_mtu);
-               data_dma_addr = tx_pool->long_term_buff.addr + offset;
+       if (index == IBMVNIC_INVALID_MAP) {
+               dev_kfree_skb_any(skb);
+               tx_send_failed++;
+               tx_dropped++;
+               ret = NETDEV_TX_OK;
+               goto out;
        }
 
+       tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP;
+
+       offset = index * tx_pool->buf_size;
+       dst = tx_pool->long_term_buff.buff + offset;
+       memset(dst, 0, tx_pool->buf_size);
+       data_dma_addr = tx_pool->long_term_buff.addr + offset;
+
        if (skb_shinfo(skb)->nr_frags) {
                int cur, i;
 
@@ -1426,8 +1470,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
        }
 
        tx_pool->consumer_index =
-           (tx_pool->consumer_index + 1) %
-               adapter->req_tx_entries_per_subcrq;
+           (tx_pool->consumer_index + 1) % tx_pool->num_buffers;
 
        tx_buff = &tx_pool->tx_buff[index];
        tx_buff->skb = skb;
@@ -1443,11 +1486,13 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
        tx_crq.v1.n_crq_elem = 1;
        tx_crq.v1.n_sge = 1;
        tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED;
-       tx_crq.v1.correlator = cpu_to_be32(index);
+
        if (skb_is_gso(skb))
-               tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->tso_ltb.map_id);
+               tx_crq.v1.correlator =
+                       cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK);
        else
-               tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
+               tx_crq.v1.correlator = cpu_to_be32(index);
+       tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
        tx_crq.v1.sge_len = cpu_to_be32(skb->len);
        tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
 
@@ -1482,6 +1527,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
        if ((*hdrs >> 7) & 1) {
                build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
                tx_crq.v1.n_crq_elem = num_entries;
+               tx_buff->num_entries = num_entries;
                tx_buff->indir_arr[0] = tx_crq;
                tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
                                                    sizeof(tx_buff->indir_arr),
@@ -1494,24 +1540,18 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
                        tx_map_failed++;
                        tx_dropped++;
                        ret = NETDEV_TX_OK;
-                       goto out;
+                       goto tx_err_out;
                }
                lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
                                               (u64)tx_buff->indir_dma,
                                               (u64)num_entries);
        } else {
+               tx_buff->num_entries = num_entries;
                lpar_rc = send_subcrq(adapter, handle_array[queue_num],
                                      &tx_crq);
        }
        if (lpar_rc != H_SUCCESS) {
                dev_err(dev, "tx failed with code %ld\n", lpar_rc);
-
-               if (tx_pool->consumer_index == 0)
-                       tx_pool->consumer_index =
-                               adapter->req_tx_entries_per_subcrq - 1;
-               else
-                       tx_pool->consumer_index--;
-
                dev_kfree_skb_any(skb);
                tx_buff->skb = NULL;
 
@@ -1527,21 +1567,29 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
                tx_send_failed++;
                tx_dropped++;
                ret = NETDEV_TX_OK;
-               goto out;
+               goto tx_err_out;
        }
 
        if (atomic_add_return(num_entries, &tx_scrq->used)
                                        >= adapter->req_tx_entries_per_subcrq) {
-               netdev_info(netdev, "Stopping queue %d\n", queue_num);
+               netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
                netif_stop_subqueue(netdev, queue_num);
        }
 
-       tx_buff->num_entries = num_entries;
        tx_packets++;
        tx_bytes += skb->len;
        txq->trans_start = jiffies;
        ret = NETDEV_TX_OK;
+       goto out;
 
+tx_err_out:
+       /* roll back consumer index and map array*/
+       if (tx_pool->consumer_index == 0)
+               tx_pool->consumer_index =
+                       tx_pool->num_buffers - 1;
+       else
+               tx_pool->consumer_index--;
+       tx_pool->free_map[tx_pool->consumer_index] = index;
 out:
        netdev->stats.tx_dropped += tx_dropped;
        netdev->stats.tx_bytes += tx_bytes;
@@ -1660,12 +1708,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                rc = ibmvnic_reenable_crq_queue(adapter);
                if (rc)
                        return 0;
+               ibmvnic_cleanup(netdev);
+       } else if (rwi->reset_reason == VNIC_RESET_FAILOVER) {
+               ibmvnic_cleanup(netdev);
+       } else {
+               rc = __ibmvnic_close(netdev);
+               if (rc)
+                       return rc;
        }
 
-       rc = __ibmvnic_close(netdev);
-       if (rc)
-               return rc;
-
        if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
            adapter->wait_for_reset) {
                release_resources(adapter);
@@ -1718,12 +1769,14 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                        rc = reset_rx_pools(adapter);
                        if (rc)
                                return rc;
-
-                       if (reset_state == VNIC_CLOSED)
-                               return 0;
                }
        }
 
+       adapter->state = VNIC_CLOSED;
+
+       if (reset_state == VNIC_CLOSED)
+               return 0;
+
        rc = __ibmvnic_open(netdev);
        if (rc) {
                if (list_empty(&adapter->rwi_list))
@@ -2030,6 +2083,23 @@ static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
        return wait_for_reset(adapter);
 }
 
+static netdev_features_t ibmvnic_features_check(struct sk_buff *skb,
+                                               struct net_device *dev,
+                                               netdev_features_t features)
+{
+       /* Some backing hardware adapters can not
+        * handle packets with a MSS less than 224
+        * or with only one segment.
+        */
+       if (skb_is_gso(skb)) {
+               if (skb_shinfo(skb)->gso_size < 224 ||
+                   skb_shinfo(skb)->gso_segs == 1)
+                       features &= ~NETIF_F_GSO_MASK;
+       }
+
+       return features;
+}
+
 static const struct net_device_ops ibmvnic_netdev_ops = {
        .ndo_open               = ibmvnic_open,
        .ndo_stop               = ibmvnic_close,
@@ -2042,6 +2112,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
        .ndo_poll_controller    = ibmvnic_netpoll_controller,
 #endif
        .ndo_change_mtu         = ibmvnic_change_mtu,
+       .ndo_features_check     = ibmvnic_features_check,
 };
 
 /* ethtool functions */
@@ -2490,6 +2561,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
                               struct ibmvnic_sub_crq_queue *scrq)
 {
        struct device *dev = &adapter->vdev->dev;
+       struct ibmvnic_tx_pool *tx_pool;
        struct ibmvnic_tx_buff *txbuff;
        union sub_crq *next;
        int index;
@@ -2509,7 +2581,14 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
                                continue;
                        }
                        index = be32_to_cpu(next->tx_comp.correlators[i]);
-                       txbuff = &adapter->tx_pool[pool].tx_buff[index];
+                       if (index & IBMVNIC_TSO_POOL_MASK) {
+                               tx_pool = &adapter->tso_pool[pool];
+                               index &= ~IBMVNIC_TSO_POOL_MASK;
+                       } else {
+                               tx_pool = &adapter->tx_pool[pool];
+                       }
+
+                       txbuff = &tx_pool->tx_buff[index];
 
                        for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) {
                                if (!txbuff->data_dma[j])
@@ -2532,11 +2611,10 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 
                        num_entries += txbuff->num_entries;
 
-                       adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
-                                                    producer_index] = index;
-                       adapter->tx_pool[pool].producer_index =
-                           (adapter->tx_pool[pool].producer_index + 1) %
-                           adapter->req_tx_entries_per_subcrq;
+                       tx_pool->free_map[tx_pool->producer_index] = index;
+                       tx_pool->producer_index =
+                               (tx_pool->producer_index + 1) %
+                                       tx_pool->num_buffers;
                }
                /* remove tx_comp scrq*/
                next->tx_comp.first = 0;
@@ -2546,8 +2624,8 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
                    __netif_subqueue_stopped(adapter->netdev,
                                             scrq->pool_index)) {
                        netif_wake_subqueue(adapter->netdev, scrq->pool_index);
-                       netdev_info(adapter->netdev, "Started queue %d\n",
-                                   scrq->pool_index);
+                       netdev_dbg(adapter->netdev, "Started queue %d\n",
+                                  scrq->pool_index);
                }
        }
 
@@ -3079,7 +3157,7 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
        strncpy(&vlcd->name, adapter->netdev->name, len);
 }
 
-static void send_login(struct ibmvnic_adapter *adapter)
+static int send_login(struct ibmvnic_adapter *adapter)
 {
        struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
        struct ibmvnic_login_buffer *login_buffer;
@@ -3095,6 +3173,12 @@ static void send_login(struct ibmvnic_adapter *adapter)
        struct vnic_login_client_data *vlcd;
        int i;
 
+       if (!adapter->tx_scrq || !adapter->rx_scrq) {
+               netdev_err(adapter->netdev,
+                          "RX or TX queues are not allocated, device login failed\n");
+               return -1;
+       }
+
        release_login_rsp_buffer(adapter);
        client_data_len = vnic_client_data_len(adapter);
 
@@ -3192,7 +3276,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
        crq.login.len = cpu_to_be32(buffer_size);
        ibmvnic_send_crq(adapter, &crq);
 
-       return;
+       return 0;
 
 buf_rsp_map_failed:
        kfree(login_rsp_buffer);
@@ -3201,7 +3285,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
 buf_map_failed:
        kfree(login_buffer);
 buf_alloc_failed:
-       return;
+       return -1;
 }
 
 static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
@@ -4430,6 +4514,14 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
                release_crq_queue(adapter);
        }
 
+       rc = init_stats_buffers(adapter);
+       if (rc)
+               return rc;
+
+       rc = init_stats_token(adapter);
+       if (rc)
+               return rc;
+
        return rc;
 }
 
@@ -4537,6 +4629,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
        release_sub_crqs(adapter, 1);
        release_crq_queue(adapter);
 
+       release_stats_token(adapter);
+       release_stats_buffers(adapter);
+
        adapter->state = VNIC_REMOVED;
 
        mutex_unlock(&adapter->reset_lock);
index 099c89d49945d08735eda8f5cf63298185f86afa..89efe700eafe796667147b58bbe617cc57761722 100644 (file)
@@ -43,6 +43,7 @@
 
 #define IBMVNIC_TSO_BUF_SZ     65536
 #define IBMVNIC_TSO_BUFS       64
+#define IBMVNIC_TSO_POOL_MASK  0x80000000
 
 #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
 #define IBMVNIC_BUFFER_HLEN 500
@@ -917,11 +918,9 @@ struct ibmvnic_tx_pool {
        int *free_map;
        int consumer_index;
        int producer_index;
-       wait_queue_head_t ibmvnic_tx_comp_q;
-       struct task_struct *work_thread;
        struct ibmvnic_long_term_buff long_term_buff;
-       struct ibmvnic_long_term_buff tso_ltb;
-       int tso_index;
+       int num_buffers;
+       int buf_size;
 };
 
 struct ibmvnic_rx_buff {
@@ -1044,6 +1043,7 @@ struct ibmvnic_adapter {
        u64 promisc;
 
        struct ibmvnic_tx_pool *tx_pool;
+       struct ibmvnic_tx_pool *tso_pool;
        struct completion init_done;
        int init_done_rc;
 
index 29486478836e6a946553458911421976aec15a6f..41ad56edfb96da5362b76ae00722731cfbe2a107 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel PRO/100 Linux driver
index 4a6ab1522451ca77dbd83b2186125cee0c86a803..c7caadd3c8af9e7e443327a9acdc430c01a6a2fa 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel PRO/1000 Linux driver
index 8fd2458060a088d6475622e4ae68cd40df23c116..3a0feea2df54463822346bc45c70c67aad914f01 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
index dc71e87c32603564a63fed9b366670793220f1b0..3e80ca170dd78cc74e1843576a061772c06c0731 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  * Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2006 Intel Corporation.
index 3bac9df1c09942a0806681db5afe64d8110f1ecb..6e7e923d57bf47d078481ffd5185fcc82c6e5aa3 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 *
   Intel PRO/1000 Linux driver
index 5cf7268cc4e13697631f75000b5194319a5af5df..f09c569ec19b7609209f2cb14ce05621afcd016d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
index 3dd4aeb2706d393cd8fbf4998a7582d33b9bafcd..d5eb19b86a0acf5bc0422ee87df341717d8a3fd2 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
index 33e7c45a4fe4831bf306d501b93ff12ce673b065..ae0559b8b011ab25412810573800863f31f05318 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
index c9cde352b1c885acd3c45fee6460afa6fd64deaf..345f23927bcc8a06a03c3dc84e4b3253b7d8328a 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
index cd391376036c9c6e2045f8e9dbf1c3a2c6d921da..953e99df420c03f069ebbf7b97312727eb3692f3 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index a2162e11673eef45b72e631cf1b30a0748064718..ee6d1256fda4559510126edc855d463dcd5cbe7a 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 6b03c8553e59710b5cad2cb903f0e4e05d0cae5e..924f2c8dfa6cbb8179532dd6da5e6301b82650c5 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index abc6a9abff980227e6936c3ae2efa7e62aae96fb..9a24c645f726b8557ef76e571679ac8723a2a688 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 106de493373ce6c73e85e7e2be6f6e8c3e61df0c..24e391a4ac68e92a2b55f5cdb4f6dbf0722ff003 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel PRO/1000 Linux driver
index afb7ebe20b2438e9500f5dff2b1126ccde9c4670..22883015a6959266657478d07094d5e05e53e623 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
 #define E1000_ICR_RXDMT0        0x00000010 /* Rx desc min. threshold (0) */
 #define E1000_ICR_RXO           0x00000040 /* Receiver Overrun */
 #define E1000_ICR_RXT0          0x00000080 /* Rx timer intr (ring 0) */
+#define E1000_ICR_MDAC          0x00000200 /* MDIO Access Complete */
+#define E1000_ICR_SRPD          0x00010000 /* Small Receive Packet Detected */
+#define E1000_ICR_ACK           0x00020000 /* Receive ACK Frame Detected */
+#define E1000_ICR_MNG           0x00040000 /* Manageability Event Detected */
 #define E1000_ICR_ECCER         0x00400000 /* Uncorrectable ECC Error */
 /* If this bit asserted, the driver should claim the interrupt */
 #define E1000_ICR_INT_ASSERTED 0x80000000
 #define E1000_ICR_RXQ1          0x00200000 /* Rx Queue 1 Interrupt */
 #define E1000_ICR_TXQ0          0x00400000 /* Tx Queue 0 Interrupt */
 #define E1000_ICR_TXQ1          0x00800000 /* Tx Queue 1 Interrupt */
-#define E1000_ICR_OTHER         0x01000000 /* Other Interrupts */
+#define E1000_ICR_OTHER         0x01000000 /* Other Interrupt */
 
 /* PBA ECC Register */
 #define E1000_PBA_ECC_COUNTER_MASK  0xFFF00000 /* ECC counter mask */
        E1000_IMS_RXSEQ  |    \
        E1000_IMS_LSC)
 
+/* These are all of the events related to the OTHER interrupt.
+ */
+#define IMS_OTHER_MASK ( \
+       E1000_IMS_LSC  | \
+       E1000_IMS_RXO  | \
+       E1000_IMS_MDAC | \
+       E1000_IMS_SRPD | \
+       E1000_IMS_ACK  | \
+       E1000_IMS_MNG)
+
 /* Interrupt Mask Set */
 #define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
 #define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
 #define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* Rx sequence error */
 #define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* Rx desc min. threshold */
+#define E1000_IMS_RXO       E1000_ICR_RXO       /* Receiver Overrun */
 #define E1000_IMS_RXT0      E1000_ICR_RXT0      /* Rx timer intr */
+#define E1000_IMS_MDAC      E1000_ICR_MDAC      /* MDIO Access Complete */
+#define E1000_IMS_SRPD      E1000_ICR_SRPD      /* Small Receive Packet */
+#define E1000_IMS_ACK       E1000_ICR_ACK       /* Receive ACK Frame Detected */
+#define E1000_IMS_MNG       E1000_ICR_MNG       /* Manageability Event */
 #define E1000_IMS_ECCER     E1000_ICR_ECCER     /* Uncorrectable ECC Error */
 #define E1000_IMS_RXQ0      E1000_ICR_RXQ0      /* Rx Queue 0 Interrupt */
 #define E1000_IMS_RXQ1      E1000_ICR_RXQ1      /* Rx Queue 1 Interrupt */
index 2311b31bdcac91f1b559b2627ac24471890d69c2..da88555ba1fdf3ba97f2171618b4b438484ba4f0 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 003cbd6057990050dd3bda971b9ad77ee6c5e17e..64dc0c11147faba8c6a5df1e9a4d2d20b2ad025e 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index d803b1a123495c91532834e30ebc0fe6ce4933eb..21802396bed68f2b662b4148c515c486ea41555a 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 31277d3bb7dc1241032695d2d9424779654f4f5f..1551d6ce5341022d3d752f6209bf3207e821cb11 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
@@ -1367,9 +1368,6 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
  *  Checks to see of the link status of the hardware has changed.  If a
  *  change in link status has been detected, then we read the PHY registers
  *  to get the current speed/duplex if link exists.
- *
- *  Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
- *  up).
  **/
 static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 {
@@ -1385,7 +1383,8 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
         * Change or Rx Sequence Error interrupt.
         */
        if (!mac->get_link_status)
-               return 1;
+               return 0;
+       mac->get_link_status = false;
 
        /* First we want to see if the MII Status Register reports
         * link.  If so, then we want to get the current speed/duplex
@@ -1393,12 +1392,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
         */
        ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
        if (ret_val)
-               return ret_val;
+               goto out;
 
        if (hw->mac.type == e1000_pchlan) {
                ret_val = e1000_k1_gig_workaround_hv(hw, link);
                if (ret_val)
-                       return ret_val;
+                       goto out;
        }
 
        /* When connected at 10Mbps half-duplex, some parts are excessively
@@ -1431,7 +1430,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 
                ret_val = hw->phy.ops.acquire(hw);
                if (ret_val)
-                       return ret_val;
+                       goto out;
 
                if (hw->mac.type == e1000_pch2lan)
                        emi_addr = I82579_RX_CONFIG;
@@ -1453,7 +1452,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
                hw->phy.ops.release(hw);
 
                if (ret_val)
-                       return ret_val;
+                       goto out;
 
                if (hw->mac.type >= e1000_pch_spt) {
                        u16 data;
@@ -1462,14 +1461,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
                        if (speed == SPEED_1000) {
                                ret_val = hw->phy.ops.acquire(hw);
                                if (ret_val)
-                                       return ret_val;
+                                       goto out;
 
                                ret_val = e1e_rphy_locked(hw,
                                                          PHY_REG(776, 20),
                                                          &data);
                                if (ret_val) {
                                        hw->phy.ops.release(hw);
-                                       return ret_val;
+                                       goto out;
                                }
 
                                ptr_gap = (data & (0x3FF << 2)) >> 2;
@@ -1483,18 +1482,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
                                }
                                hw->phy.ops.release(hw);
                                if (ret_val)
-                                       return ret_val;
+                                       goto out;
                        } else {
                                ret_val = hw->phy.ops.acquire(hw);
                                if (ret_val)
-                                       return ret_val;
+                                       goto out;
 
                                ret_val = e1e_wphy_locked(hw,
                                                          PHY_REG(776, 20),
                                                          0xC023);
                                hw->phy.ops.release(hw);
                                if (ret_val)
-                                       return ret_val;
+                                       goto out;
 
                        }
                }
@@ -1521,7 +1520,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
            (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) {
                ret_val = e1000_k1_workaround_lpt_lp(hw, link);
                if (ret_val)
-                       return ret_val;
+                       goto out;
        }
        if (hw->mac.type >= e1000_pch_lpt) {
                /* Set platform power management values for
@@ -1529,7 +1528,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
                 */
                ret_val = e1000_platform_pm_pch_lpt(hw, link);
                if (ret_val)
-                       return ret_val;
+                       goto out;
        }
 
        /* Clear link partner's EEE ability */
@@ -1552,9 +1551,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
        }
 
        if (!link)
-               return 0;       /* No link detected */
-
-       mac->get_link_status = false;
+               goto out;
 
        switch (hw->mac.type) {
        case e1000_pch2lan:
@@ -1616,12 +1613,14 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
         * different link partner.
         */
        ret_val = e1000e_config_fc_after_link_up(hw);
-       if (ret_val) {
+       if (ret_val)
                e_dbg("Error configuring flow control\n");
-               return ret_val;
-       }
 
-       return 1;
+       return ret_val;
+
+out:
+       mac->get_link_status = true;
+       return ret_val;
 }
 
 static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
index 00a36df02a3fd917e40989577af5b43f4bd064f5..3c4f82c21084a44c5962ede1e01863ee6bd4fe34 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index f457c5703d0c45d4c9f661395acca1a9814de686..b293464a9f2738fb7bc10399ad3bb258056102e3 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
@@ -410,9 +411,6 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw)
  *  Checks to see of the link status of the hardware has changed.  If a
  *  change in link status has been detected, then we read the PHY registers
  *  to get the current speed/duplex if link exists.
- *
- *  Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
- *  up).
  **/
 s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
 {
@@ -426,20 +424,16 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
         * Change or Rx Sequence Error interrupt.
         */
        if (!mac->get_link_status)
-               return 1;
+               return 0;
+       mac->get_link_status = false;
 
        /* First we want to see if the MII Status Register reports
         * link.  If so, then we want to get the current speed/duplex
         * of the PHY.
         */
        ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
-       if (ret_val)
-               return ret_val;
-
-       if (!link)
-               return 0;       /* No link detected */
-
-       mac->get_link_status = false;
+       if (ret_val || !link)
+               goto out;
 
        /* Check if there was DownShift, must be checked
         * immediately after link-up
@@ -464,12 +458,14 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
         * different link partner.
         */
        ret_val = e1000e_config_fc_after_link_up(hw);
-       if (ret_val) {
+       if (ret_val)
                e_dbg("Error configuring flow control\n");
-               return ret_val;
-       }
 
-       return 1;
+       return ret_val;
+
+out:
+       mac->get_link_status = true;
+       return ret_val;
 }
 
 /**
index 8284618af9ff2dbd75b8e19b6377415778b0f532..cb0abf6c76a5ca323af21b5eeb4e6c4649a4cd32 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index cc9b3befc2bc0596ea3ae13da69b5afc29fd459d..e027660aeb92b62e34ba52d5d483d80c5bc8bdd0 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 0b9ea5952b0719c0517f44443de2ef96199456d5..3268f2e58593f7eab6651e86b49da8bf380c0cc8 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 1298b69f990b40628ef1fbb353e6d9f1bfafdd76..ec4a9759a6f26be5222a09d0b3cf405c35a23958 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
@@ -1914,30 +1915,20 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
        struct net_device *netdev = data;
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
-       u32 icr;
-       bool enable = true;
-
-       icr = er32(ICR);
-       if (icr & E1000_ICR_RXO) {
-               ew32(ICR, E1000_ICR_RXO);
-               enable = false;
-               /* napi poll will re-enable Other, make sure it runs */
-               if (napi_schedule_prep(&adapter->napi)) {
-                       adapter->total_rx_bytes = 0;
-                       adapter->total_rx_packets = 0;
-                       __napi_schedule(&adapter->napi);
-               }
-       }
+       u32 icr = er32(ICR);
+
+       if (icr & adapter->eiac_mask)
+               ew32(ICS, (icr & adapter->eiac_mask));
+
        if (icr & E1000_ICR_LSC) {
-               ew32(ICR, E1000_ICR_LSC);
                hw->mac.get_link_status = true;
                /* guard against interrupt when we're going down */
                if (!test_bit(__E1000_DOWN, &adapter->state))
                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
        }
 
-       if (enable && !test_bit(__E1000_DOWN, &adapter->state))
-               ew32(IMS, E1000_IMS_OTHER);
+       if (!test_bit(__E1000_DOWN, &adapter->state))
+               ew32(IMS, E1000_IMS_OTHER | IMS_OTHER_MASK);
 
        return IRQ_HANDLED;
 }
@@ -2040,7 +2031,6 @@ static void e1000_configure_msix(struct e1000_adapter *adapter)
                       hw->hw_addr + E1000_EITR_82574(vector));
        else
                writel(1, hw->hw_addr + E1000_EITR_82574(vector));
-       adapter->eiac_mask |= E1000_IMS_OTHER;
 
        /* Cause Tx interrupts on every write back */
        ivar |= BIT(31);
@@ -2265,7 +2255,8 @@ static void e1000_irq_enable(struct e1000_adapter *adapter)
 
        if (adapter->msix_entries) {
                ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
-               ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC);
+               ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER |
+                    IMS_OTHER_MASK);
        } else if (hw->mac.type >= e1000_pch_lpt) {
                ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
        } else {
@@ -2333,8 +2324,8 @@ static int e1000_alloc_ring_dma(struct e1000_adapter *adapter,
 {
        struct pci_dev *pdev = adapter->pdev;
 
-       ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma,
-                                       GFP_KERNEL);
+       ring->desc = dma_zalloc_coherent(&pdev->dev, ring->size, &ring->dma,
+                                        GFP_KERNEL);
        if (!ring->desc)
                return -ENOMEM;
 
@@ -2707,8 +2698,7 @@ static int e1000e_poll(struct napi_struct *napi, int weight)
                napi_complete_done(napi, work_done);
                if (!test_bit(__E1000_DOWN, &adapter->state)) {
                        if (adapter->msix_entries)
-                               ew32(IMS, adapter->rx_ring->ims_val |
-                                    E1000_IMS_OTHER);
+                               ew32(IMS, adapter->rx_ring->ims_val);
                        else
                                e1000_irq_enable(adapter);
                }
@@ -5101,7 +5091,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter)
        case e1000_media_type_copper:
                if (hw->mac.get_link_status) {
                        ret_val = hw->mac.ops.check_for_link(hw);
-                       link_active = ret_val > 0;
+                       link_active = !hw->mac.get_link_status;
                } else {
                        link_active = true;
                }
index 2efd80dfd88e8dfbbc942fc12d926ab8e4718084..68949bb41b7b592ad3194d9f60dfa8d762342166 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 5d46967e0d1f47b4f8782d0518bdec959fd88feb..8e082028be7dd1738e97cc2b5cbb30ce8d87af9e 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 47da518645437c271b963ece477754516a540fbf..2def33eba9e67041b760a56c0d637c2b47d52756 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 86ff0969efb6e0c71cfb2e4a2b7380afb166e43b..b8226ed0e338f45e71f35a4036d97964d8491591 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 3027f63ee793c37a4f802bc288e009fc5f4814f2..d4180b5e91960c4f6b7532b8d313a823d3d4d15a 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index b366885487a8cbf884997042edf1303533c79869..f941e5085f44dd1399d2883f5aa3c32e934f7106 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index 0cb4d365e5ad72dd6f2a10bd18d22645de3c8dbf..16afc3c2a986cd6373118e027d3f33257b902690 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
index cac645329cea19a6bd25c665f6f0327d7ec3b85f..93277cb99cb7c9ec798858ed19df34c7ae348f5e 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel(R) Ethernet Switch Host Interface Driver
index 46973fb234c56cb9913fdc7c60a1ad40ac50363a..a9cdf763c59de2460c84cfdefbe9da55c170bbc5 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
index 736a9f087bc9f5c644593c2f6a8ec14435a75576..e303d88720efad8b8f23240be7fee0f533f46fd6 100644 (file)
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -262,6 +263,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw)
  *  fm10k_read_hw_stats_32b - Reads value of 32-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing a 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of the register and returns the delta
  *  between the base and the current value.
@@ -281,6 +283,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
  *  fm10k_read_hw_stats_48b - Reads value of 48-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing the lower 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of 2 registers, combined to represent a 48-bit
  *  statistical value. Extra processing is required to handle overflowing.
@@ -461,7 +464,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
 
 /**
  *  fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
- *  @hw: pointer to the hardware structure
  *  @q: pointer to the ring of hardware statistics queue
  *  @idx: index pointing to the start of the ring iteration
  *  @count: number of queues to iterate over
index d51f9c7a47ff4d4ec80b758650cfc59bf240794b..2bdb24d2ca9d860508ebae72c45bcdddcc79372f 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
index db4bd8bf9722420eb8979108add209d75c9fadd3..c4f733452ef2918825091fb63a26a12ff21698ad 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
index 14df09e2d964f92025cddbac28e510410bf194ed..43e8d839831f0916cc7d337d48eb3d6c6d008ecc 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
index c7234f35f8ff3462dc667a01dafd10208dcc9d82..28b6b4e564879e6e6b2029c174ea4ffa7bf28079 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
index 760cfa52d02cc891f43912cdb2d9ce38d9c0ef30..30395f5e5e87ad3b5a7b4cb40832fb8d4f3c0be1 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
index 8e12aae065d82f2ae39ee5c559c83da394a5fe19..df8607097e4abda3330c3ffda7d85c1284edbf36 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
 
 #include "fm10k.h"
 
-#define DRV_VERSION    "0.22.1-k"
+#define DRV_VERSION    "0.23.4-k"
 #define DRV_SUMMARY    "Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
 static const char fm10k_driver_string[] = DRV_SUMMARY;
 static const char fm10k_copyright[] =
-       "Copyright(c) 2013 - 2017 Intel Corporation.";
+       "Copyright(c) 2013 - 2018 Intel Corporation.";
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION(DRV_SUMMARY);
index 244d3ad58ca728235450bccfc01e1801b4c5645e..c01bf30a0c9e1def9b6aecac61ef5676e7249f85 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
index 35c1dbad13307f727188bc526f748379c5c82174..007e1dfa9b7a082d3030b0de6a72345385c27c35 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
index a38ae5c54da36c150f934d3fcdf275f2ad64de2b..45793491d4ba34dea2ffa632579ead116d4e9add 100644 (file)
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -486,7 +487,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports,
 
 /**
  * fm10k_udp_tunnel_add
- * @netdev: network interface device structure
+ * @dev: network interface device structure
  * @ti: Tunnel endpoint information
  *
  * This function is called when a new UDP tunnel port has been added.
@@ -518,8 +519,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev,
 
 /**
  * fm10k_udp_tunnel_del
- * @netdev: network interface device structure
- * @ti: Tunnel endpoint information
+ * @dev: network interface device structure
+ * @ti: Tunnel end point information
  *
  * This function is called when a new UDP tunnel port is deleted. The freed
  * port will be removed from the list, then we reprogram the offloaded port
@@ -803,7 +804,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
  * @glort: the target glort for this update
  * @addr: the address to update
  * @vid: the vid to update
- * @sync: whether to add or remove
+ * @set: whether to add or remove
  *
  * This function queues up a MAC request for sending to the switch manager.
  * A separate thread monitors the queue and sends updates to the switch
index a434fecfdfeb6aaf74719d0182d89c5008efcd02..cffcb187cb7661b08902b244f610f79bf53d2ae6 100644 (file)
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -29,7 +30,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
        [fm10k_device_vf] = &fm10k_vf_info,
 };
 
-/**
+/*
  * fm10k_pci_tbl - PCI Device ID Table
  *
  * Wildcard entries (PCI_ANY_ID) should come last
@@ -211,7 +212,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface)
 
 /**
  * fm10k_service_timer - Timer Call-back
- * @data: pointer to interface cast into an unsigned long
+ * @t: pointer to timer data
  **/
 static void fm10k_service_timer(struct timer_list *t)
 {
@@ -649,7 +650,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_flush_tx - flush queues on host not ready
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 {
@@ -679,7 +680,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_subtask - check and bring link up
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 {
@@ -703,7 +704,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 
 /**
  * fm10k_check_hang_subtask - check for hung queues and dropped interrupts
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  *
  * This function serves two purposes.  First it strobes the interrupt lines
  * in order to make certain interrupts are occurring.  Secondly it sets the
@@ -1995,6 +1996,7 @@ void fm10k_down(struct fm10k_intfc *interface)
 /**
  * fm10k_sw_init - Initialize general software structures
  * @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
  *
  * fm10k_sw_init initializes the interface private data structure.
  * Fields are initialized based on PCI device information and
index d6406fc31ffb58a75350e6e4a10ea2d6fb338e5c..7ba54c534f8cbcd4f742aaad4940f183f45d77de 100644 (file)
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1180,7 +1181,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
 
 /**
  * fm10k_iov_select_vid - Select correct default VLAN ID
- * @hw: Pointer to hardware structure
+ * @vf_info: pointer to VF information structure
  * @vid: VLAN ID to correct
  *
  * Will report an error if the VLAN ID is out of range. For VID = 0, it will
index e04d41f1a5325f377a6644d57ebcdac8867c0055..ae81f9a16602a50bd6df59ab58e72a4675e636cd 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
index f8e87bf086b938fea1ca776cd4162dfe62587402..725ecb7abccd9a82180f90bde99ec6eb0c96b4de 100644 (file)
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -120,6 +121,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
  *  @msg: Pointer to message block
  *  @attr_id: Attribute ID
  *  @mac_addr: MAC address to be stored
+ *  @vlan: VLAN to be stored
  *
  *  This function will reorder a MAC address to be CPU endian and store it
  *  in the attribute buffer.  It will return success if provided with a
@@ -155,8 +157,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id,
 /**
  *  fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
  *  @attr: Pointer to attribute
- *  @attr_id: Attribute ID
  *  @mac_addr: location of buffer to store MAC address
+ *  @vlan: location of buffer to store VLAN
  *
  *  This function pulls the MAC address back out of the attribute and will
  *  place it in the array pointed by by mac_addr.  It will return success
@@ -549,7 +551,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
  *  @hw: Pointer to hardware structure
  *  @msg: Pointer to message
  *  @mbx: Pointer to mailbox information structure
- *  @func: Function array containing list of message handling functions
+ *  @data: Pointer to message handler data structure
  *
  *  This function should be the first function called upon receiving a
  *  message.  The handler will identify the message type and call the correct
index a1f1027fe18402acffc05c507a3252968807e2c6..5d2ee759507ee6635ffe149d67e37a4b7fcb5e62 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
index 6bb16c13d9d6c957fd3eec3fc4431381966a4dfe..dd23af11e2c132ae8749af94cc818f360d56e5a4 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
index 337ba65a9411edf5acdbde0352971daad00e719d..f06913630b39b964cab61ad5bdc34689329337f5 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
index 2662f33c0c714071da9d654fb391966e4d4f8223..66a66b73a2f1f6beee1e2c92e189276d510b314e 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
index 3da482c3d68db4bcf32688dca09a8d1132ad157f..75437768a07c759b5905c24b5ee230deaf316c6e 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel Ethernet Controller XL710 Family Linux Driver
index 36d9401a62589b894471ace383320041af4a0abf..1d33a8b3ef54449b40ee0b9174a1d9d4605c3627 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -1041,6 +1042,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
 void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
 void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
 void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
+void i40e_client_update_msix_info(struct i40e_pf *pf);
 int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id);
 /**
  * i40e_irq_dynamic_enable - Enable default interrupt generation settings
index e78971605e0bf44be9965e3dfa80ea7d071a8b6c..843fc7781ef8b80f1f665328f62ef5437168cd6b 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 2349fbe04bd251322a93b6c968afac519ad0564b..0a8749ee9fd3184ec07f0310e668d28ecdf6b93e 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index a852775d30592d78a016f5b40027bd45701f1ee4..0244923edeb8bb7aeadc76a883451d09d666e972 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -1914,6 +1915,43 @@ enum i40e_aq_phy_type {
        I40E_PHY_TYPE_DEFAULT                   = 0xFF,
 };
 
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_SFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+                               BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+                               BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+
 #define I40E_LINK_SPEED_100MB_SHIFT    0x1
 #define I40E_LINK_SPEED_1000MB_SHIFT   0x2
 #define I40E_LINK_SPEED_10GB_SHIFT     0x3
index 926811ad44ac2acdb3300a83b629596f39f54d1d..abed0c52e782d7047a1475deb4ca3d6146a7430f 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 0de9610c1d8db1e95250f51f1987a7d6608699d9..999dea5a7c9e1a8dbbbb9afdfc823f7ad27037e0 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -287,6 +288,17 @@ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id)
        return capable;
 }
 
+void i40e_client_update_msix_info(struct i40e_pf *pf)
+{
+       struct i40e_client_instance *cdev = pf->cinst;
+
+       if (!cdev || !cdev->client)
+               return;
+
+       cdev->lan_info.msix_count = pf->num_iwarp_msix;
+       cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+}
+
 /**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
@@ -328,9 +340,6 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
                return;
        }
 
-       cdev->lan_info.msix_count = pf->num_iwarp_msix;
-       cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
-
        mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
                               struct netdev_hw_addr, list);
        if (mac)
@@ -340,6 +349,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 
        cdev->client = registered_client;
        pf->cinst = cdev;
+
+       i40e_client_update_msix_info(pf);
 }
 
 /**
index ba55c889e4c5a1c0bcb4758feb693e43560aff12..9d464d40bc1731ea38b1358d9feff0750b42bb3f 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index ef5a868aae462deeb1f11012b6a011245c860fcb..c0a3dae8a2db336555ac21b73a9c88de47604b6d 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -1208,6 +1209,29 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
        return media;
 }
 
+/**
+ * i40e_poll_globr - Poll for Global Reset completion
+ * @hw: pointer to the hardware structure
+ * @retry_limit: how many times to retry before failure
+ **/
+static i40e_status i40e_poll_globr(struct i40e_hw *hw,
+                                  u32 retry_limit)
+{
+       u32 cnt, reg = 0;
+
+       for (cnt = 0; cnt < retry_limit; cnt++) {
+               reg = rd32(hw, I40E_GLGEN_RSTAT);
+               if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
+                       return 0;
+               msleep(100);
+       }
+
+       hw_dbg(hw, "Global reset failed.\n");
+       hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg);
+
+       return I40E_ERR_RESET_FAILED;
+}
+
 #define I40E_PF_RESET_WAIT_COUNT_A0    200
 #define I40E_PF_RESET_WAIT_COUNT       200
 /**
@@ -1284,14 +1308,14 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw)
                        if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK))
                                break;
                        reg2 = rd32(hw, I40E_GLGEN_RSTAT);
-                       if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
-                               hw_dbg(hw, "Core reset upcoming. Skipping PF reset request.\n");
-                               hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg2);
-                               return I40E_ERR_NOT_READY;
-                       }
+                       if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK)
+                               break;
                        usleep_range(1000, 2000);
                }
-               if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
+               if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
+                       if (i40e_poll_globr(hw, grst_del))
+                               return I40E_ERR_RESET_FAILED;
+               } else if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
                        hw_dbg(hw, "PF reset polling failed to complete.\n");
                        return I40E_ERR_RESET_FAILED;
                }
@@ -2415,6 +2439,7 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
  * i40e_aq_set_switch_config
  * @hw: pointer to the hardware structure
  * @flags: bit flag values to set
+ * @mode: cloud filter mode
  * @valid_flags: which bit flags to set
  * @mode: cloud filter mode
  * @cmd_details: pointer to command details structure or NULL
@@ -3200,9 +3225,10 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
        u32 valid_functions, num_functions;
        u32 number, logical_id, phys_id;
        struct i40e_hw_capabilities *p;
+       u16 id, ocp_cfg_word0;
+       i40e_status status;
        u8 major_rev;
        u32 i = 0;
-       u16 id;
 
        cap = (struct i40e_aqc_list_capabilities_element_resp *) buff;
 
@@ -3389,6 +3415,26 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
                        hw->num_ports++;
        }
 
+       /* OCP cards case: if a mezz is removed the Ethernet port is at
+        * disabled state in PRTGEN_CNF register. Additional NVM read is
+        * needed in order to check if we are dealing with OCP card.
+        * Those cards have 4 PFs at minimum, so using PRTGEN_CNF for counting
+        * physical ports results in wrong partition id calculation and thus
+        * not supporting WoL.
+        */
+       if (hw->mac.type == I40E_MAC_X722) {
+               if (!i40e_acquire_nvm(hw, I40E_RESOURCE_READ)) {
+                       status = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR,
+                                                 2 * I40E_SR_OCP_CFG_WORD0,
+                                                 sizeof(ocp_cfg_word0),
+                                                 &ocp_cfg_word0, true, NULL);
+                       if (!status &&
+                           (ocp_cfg_word0 & I40E_SR_OCP_ENABLED))
+                               hw->num_ports = 4;
+                       i40e_release_nvm(hw);
+               }
+       }
+
        valid_functions = p->valid_functions;
        num_functions = 0;
        while (valid_functions) {
@@ -5531,7 +5577,7 @@ i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid,
  * function.
  *
  **/
-i40e_status
+enum i40e_status_code
 i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
                             struct i40e_aqc_cloud_filters_element_bb *filters,
                             u8 filter_count)
@@ -5625,7 +5671,7 @@ i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
  * function.
  *
  **/
-i40e_status
+enum i40e_status_code
 i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
                             struct i40e_aqc_cloud_filters_element_bb *filters,
                             u8 filter_count)
index 55079fe3ed63382aa16d9c06711358c70a94bab2..9fec728dc4b9a06e795058a4024252575c8d4952 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 92d01042c1f6f91b96561f4a22616f4015bf803e..4f806386cb227f2d9fc2645439865b0584d03696 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 886e667f2f1c8a54c4a48586e10626f7ce3accf5..502818e3da7888c16b959987f2c08180974905f2 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index b829fd3656932a47a0add2bb77b2cf7087355125..d494dcaf18d0d6a9c2da0fd69e642985853668c0 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 8e46098bad574a1f572292537296301f2332430a..ad6a66ccb57683a31bae15d2500765ea1d9a6f6d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 76ed56641864426e1cd108ff6812c25c733ee36f..df3e60470f8bc9689ac7f7dad8121b4479207ea7 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 0b5911652084709e2c8ce1b25da296bfe487c81b..be8341763475af813e6365bf7b31ed34763e9354 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 0dcbbda164c47789516cbc1402d5e7e0c8a845f2..846a9d597e01496f50e3eb4a90df8e8f0edd2705 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -230,6 +231,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
        I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
        I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
        I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+       I40E_PRIV_FLAG("link-down-on-close",
+                      I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
        I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
        I40E_PRIV_FLAG("disable-source-pruning",
                       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
@@ -857,7 +860,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
        if (hw->device_id == I40E_DEV_ID_KX_B ||
            hw->device_id == I40E_DEV_ID_KX_C ||
            hw->device_id == I40E_DEV_ID_20G_KR2 ||
-           hw->device_id == I40E_DEV_ID_20G_KR2_A) {
+           hw->device_id == I40E_DEV_ID_20G_KR2_A ||
+           hw->device_id == I40E_DEV_ID_25G_B ||
+           hw->device_id == I40E_DEV_ID_KX_X722) {
                netdev_info(netdev, "Changing settings is not supported on backplane.\n");
                return -EOPNOTSUPP;
        }
@@ -868,23 +873,21 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
        /* save autoneg out of ksettings */
        autoneg = copy_ks.base.autoneg;
 
-       memset(&safe_ks, 0, sizeof(safe_ks));
+       /* get our own copy of the bits to check against */
+       memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
+       safe_ks.base.cmd = copy_ks.base.cmd;
+       safe_ks.base.link_mode_masks_nwords =
+               copy_ks.base.link_mode_masks_nwords;
+       i40e_get_link_ksettings(netdev, &safe_ks);
+
        /* Get link modes supported by hardware and check against modes
         * requested by the user.  Return an error if unsupported mode was set.
         */
-       i40e_phy_type_to_ethtool(pf, &safe_ks);
        if (!bitmap_subset(copy_ks.link_modes.advertising,
                           safe_ks.link_modes.supported,
                           __ETHTOOL_LINK_MODE_MASK_NBITS))
                return -EINVAL;
 
-       /* get our own copy of the bits to check against */
-       memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
-       safe_ks.base.cmd = copy_ks.base.cmd;
-       safe_ks.base.link_mode_masks_nwords =
-               copy_ks.base.link_mode_masks_nwords;
-       i40e_get_link_ksettings(netdev, &safe_ks);
-
        /* set autoneg back to what it currently is */
        copy_ks.base.autoneg = safe_ks.base.autoneg;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
deleted file mode 100644 (file)
index 2d1253c..0000000
+++ /dev/null
@@ -1,1571 +0,0 @@
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
-
-#include <linux/if_ether.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/fc/fc_fs.h>
-#include <scsi/fc/fc_fip.h>
-#include <scsi/fc/fc_fcoe.h>
-#include <scsi/libfc.h>
-#include <scsi/libfcoe.h>
-#include <uapi/linux/dcbnl.h>
-
-#include "i40e.h"
-#include "i40e_fcoe.h"
-
-/**
- * i40e_fcoe_sof_is_class2 - returns true if this is a FC Class 2 SOF
- * @sof: the FCoE start of frame delimiter
- **/
-static inline bool i40e_fcoe_sof_is_class2(u8 sof)
-{
-       return (sof == FC_SOF_I2) || (sof == FC_SOF_N2);
-}
-
-/**
- * i40e_fcoe_sof_is_class3 - returns true if this is a FC Class 3 SOF
- * @sof: the FCoE start of frame delimiter
- **/
-static inline bool i40e_fcoe_sof_is_class3(u8 sof)
-{
-       return (sof == FC_SOF_I3) || (sof == FC_SOF_N3);
-}
-
-/**
- * i40e_fcoe_sof_is_supported - returns true if the FC SOF is supported by HW
- * @sof: the input SOF value from the frame
- **/
-static inline bool i40e_fcoe_sof_is_supported(u8 sof)
-{
-       return i40e_fcoe_sof_is_class2(sof) ||
-              i40e_fcoe_sof_is_class3(sof);
-}
-
-/**
- * i40e_fcoe_fc_sof - pull the SOF from FCoE header in the frame
- * @skb: the frame whose EOF is to be pulled from
- **/
-static inline int i40e_fcoe_fc_sof(struct sk_buff *skb, u8 *sof)
-{
-       *sof = ((struct fcoe_hdr *)skb_network_header(skb))->fcoe_sof;
-
-       if (!i40e_fcoe_sof_is_supported(*sof))
-               return -EINVAL;
-       return 0;
-}
-
-/**
- * i40e_fcoe_eof_is_supported - returns true if the EOF is supported by HW
- * @eof:     the input EOF value from the frame
- **/
-static inline bool i40e_fcoe_eof_is_supported(u8 eof)
-{
-       return (eof == FC_EOF_N) || (eof == FC_EOF_T) ||
-              (eof == FC_EOF_NI) || (eof == FC_EOF_A);
-}
-
-/**
- * i40e_fcoe_fc_eof - pull EOF from FCoE trailer in the frame
- * @skb: the frame whose EOF is to be pulled from
- **/
-static inline int i40e_fcoe_fc_eof(struct sk_buff *skb, u8 *eof)
-{
-       /* the first byte of the last dword is EOF */
-       skb_copy_bits(skb, skb->len - 4, eof, 1);
-
-       if (!i40e_fcoe_eof_is_supported(*eof))
-               return -EINVAL;
-       return 0;
-}
-
-/**
- * i40e_fcoe_ctxt_eof - convert input FC EOF for descriptor programming
- * @eof: the input eof value from the frame
- *
- * The FC EOF is converted to the value understood by HW for descriptor
- * programming. Never call this w/o calling i40e_fcoe_eof_is_supported()
- * first and that already checks for all supported valid eof values.
- **/
-static inline u32 i40e_fcoe_ctxt_eof(u8 eof)
-{
-       switch (eof) {
-       case FC_EOF_N:
-               return I40E_TX_DESC_CMD_L4T_EOFT_EOF_N;
-       case FC_EOF_T:
-               return I40E_TX_DESC_CMD_L4T_EOFT_EOF_T;
-       case FC_EOF_NI:
-               return I40E_TX_DESC_CMD_L4T_EOFT_EOF_NI;
-       case FC_EOF_A:
-               return I40E_TX_DESC_CMD_L4T_EOFT_EOF_A;
-       default:
-               /* Supported valid eof shall be already checked by
-                * calling i40e_fcoe_eof_is_supported() first,
-                * therefore this default case shall never hit.
-                */
-               WARN_ON(1);
-               return -EINVAL;
-       }
-}
-
-/**
- * i40e_fcoe_xid_is_valid - returns true if the exchange id is valid
- * @xid: the exchange id
- **/
-static inline bool i40e_fcoe_xid_is_valid(u16 xid)
-{
-       return (xid != FC_XID_UNKNOWN) && (xid < I40E_FCOE_DDP_MAX);
-}
-
-/**
- * i40e_fcoe_ddp_unmap - unmap the mapped sglist associated
- * @pf: pointer to PF
- * @ddp: sw DDP context
- *
- * Unmap the scatter-gather list associated with the given SW DDP context
- *
- * Returns: data length already ddp-ed in bytes
- *
- **/
-static inline void i40e_fcoe_ddp_unmap(struct i40e_pf *pf,
-                                      struct i40e_fcoe_ddp *ddp)
-{
-       if (test_and_set_bit(__I40E_FCOE_DDP_UNMAPPED, &ddp->flags))
-               return;
-
-       if (ddp->sgl) {
-               dma_unmap_sg(&pf->pdev->dev, ddp->sgl, ddp->sgc,
-                            DMA_FROM_DEVICE);
-               ddp->sgl = NULL;
-               ddp->sgc = 0;
-       }
-
-       if (ddp->pool) {
-               dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
-               ddp->pool = NULL;
-       }
-}
-
-/**
- * i40e_fcoe_ddp_clear - clear the given SW DDP context
- * @ddp - SW DDP context
- **/
-static inline void i40e_fcoe_ddp_clear(struct i40e_fcoe_ddp *ddp)
-{
-       memset(ddp, 0, sizeof(struct i40e_fcoe_ddp));
-       ddp->xid = FC_XID_UNKNOWN;
-       ddp->flags = __I40E_FCOE_DDP_NONE;
-}
-
-/**
- * i40e_fcoe_progid_is_fcoe - check if the prog_id is for FCoE
- * @id: the prog id for the programming status Rx descriptor write-back
- **/
-static inline bool i40e_fcoe_progid_is_fcoe(u8 id)
-{
-       return (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
-              (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS);
-}
-
-/**
- * i40e_fcoe_fc_get_xid - get xid from the frame header
- * @fh: the fc frame header
- *
- * In case the incoming frame's exchange is originated from
- * the initiator, then received frame's exchange id is ANDed
- * with fc_cpu_mask bits to get the same cpu on which exchange
- * was originated, otherwise just use the current cpu.
- *
- * Returns ox_id if exchange originator, rx_id if responder
- **/
-static inline u16 i40e_fcoe_fc_get_xid(struct fc_frame_header *fh)
-{
-       u32 f_ctl = ntoh24(fh->fh_f_ctl);
-
-       return (f_ctl & FC_FC_EX_CTX) ?
-               be16_to_cpu(fh->fh_ox_id) :
-               be16_to_cpu(fh->fh_rx_id);
-}
-
-/**
- * i40e_fcoe_fc_frame_header - get fc frame header from skb
- * @skb: packet
- *
- * This checks if there is a VLAN header and returns the data
- * pointer to the start of the fc_frame_header.
- *
- * Returns pointer to the fc_frame_header
- **/
-static inline struct fc_frame_header *i40e_fcoe_fc_frame_header(
-       struct sk_buff *skb)
-{
-       void *fh = skb->data + sizeof(struct fcoe_hdr);
-
-       if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
-               fh += sizeof(struct vlan_hdr);
-
-       return (struct fc_frame_header *)fh;
-}
-
-/**
- * i40e_fcoe_ddp_put - release the DDP context for a given exchange id
- * @netdev: the corresponding net_device
- * @xid: the exchange id that corresponding DDP context will be released
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_done
- * and it is expected to be called by ULD, i.e., FCP layer of libfc
- * to release the corresponding ddp context when the I/O is done.
- *
- * Returns : data length already ddp-ed in bytes
- **/
-static int i40e_fcoe_ddp_put(struct net_device *netdev, u16 xid)
-{
-       struct i40e_netdev_priv *np = netdev_priv(netdev);
-       struct i40e_pf *pf = np->vsi->back;
-       struct i40e_fcoe *fcoe = &pf->fcoe;
-       int len = 0;
-       struct i40e_fcoe_ddp *ddp = &fcoe->ddp[xid];
-
-       if (!fcoe || !ddp)
-               goto out;
-
-       if (test_bit(__I40E_FCOE_DDP_DONE, &ddp->flags))
-               len = ddp->len;
-       i40e_fcoe_ddp_unmap(pf, ddp);
-out:
-       return len;
-}
-
-/**
- * i40e_fcoe_sw_init - sets up the HW for FCoE
- * @pf: pointer to PF
- **/
-void i40e_init_pf_fcoe(struct i40e_pf *pf)
-{
-       struct i40e_hw *hw = &pf->hw;
-       u32 val;
-
-       pf->flags &= ~I40E_FLAG_FCOE_ENABLED;
-       pf->num_fcoe_qps = 0;
-       pf->fcoe_hmc_cntx_num = 0;
-       pf->fcoe_hmc_filt_num = 0;
-
-       if (!pf->hw.func_caps.fcoe) {
-               dev_dbg(&pf->pdev->dev, "FCoE capability is disabled\n");
-               return;
-       }
-
-       if (!pf->hw.func_caps.dcb) {
-               dev_warn(&pf->pdev->dev,
-                        "Hardware is not DCB capable not enabling FCoE.\n");
-               return;
-       }
-
-       /* enable FCoE hash filter */
-       val = i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1));
-       val |= BIT(I40E_FILTER_PCTYPE_FCOE_OX - 32);
-       val |= BIT(I40E_FILTER_PCTYPE_FCOE_RX - 32);
-       val &= I40E_PFQF_HENA_PTYPE_ENA_MASK;
-       i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), val);
-
-       /* enable flag */
-       pf->flags |= I40E_FLAG_FCOE_ENABLED;
-       pf->num_fcoe_qps = I40E_DEFAULT_FCOE;
-
-       /* Reserve 4K DDP contexts and 20K filter size for FCoE */
-       pf->fcoe_hmc_cntx_num = BIT(I40E_DMA_CNTX_SIZE_4K) *
-                               I40E_DMA_CNTX_BASE_SIZE;
-       pf->fcoe_hmc_filt_num = pf->fcoe_hmc_cntx_num +
-                               BIT(I40E_HASH_FILTER_SIZE_16K) *
-                               I40E_HASH_FILTER_BASE_SIZE;
-
-       /* FCoE object: max 16K filter buckets and 4K DMA contexts */
-       pf->filter_settings.fcoe_filt_num = I40E_HASH_FILTER_SIZE_16K;
-       pf->filter_settings.fcoe_cntx_num = I40E_DMA_CNTX_SIZE_4K;
-
-       /* Setup max frame with FCoE_MTU plus L2 overheads */
-       val = i40e_read_rx_ctl(hw, I40E_GLFCOE_RCTL);
-       val &= ~I40E_GLFCOE_RCTL_MAX_SIZE_MASK;
-       val |= ((FCOE_MTU + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
-                << I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT);
-       i40e_write_rx_ctl(hw, I40E_GLFCOE_RCTL, val);
-
-       dev_info(&pf->pdev->dev, "FCoE is supported.\n");
-}
-
-/**
- * i40e_get_fcoe_tc_map - Return TC map for FCoE APP
- * @pf: pointer to PF
- *
- **/
-u8 i40e_get_fcoe_tc_map(struct i40e_pf *pf)
-{
-       struct i40e_dcb_app_priority_table app;
-       struct i40e_hw *hw = &pf->hw;
-       u8 enabled_tc = 0;
-       u8 tc, i;
-       /* Get the FCoE APP TLV */
-       struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
-
-       for (i = 0; i < dcbcfg->numapps; i++) {
-               app = dcbcfg->app[i];
-               if (app.selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
-                   app.protocolid == ETH_P_FCOE) {
-                       tc = dcbcfg->etscfg.prioritytable[app.priority];
-                       enabled_tc |= BIT(tc);
-                       break;
-               }
-       }
-
-       /* TC0 if there is no TC defined for FCoE APP TLV */
-       enabled_tc = enabled_tc ? enabled_tc : 0x1;
-
-       return enabled_tc;
-}
-
-/**
- * i40e_fcoe_vsi_init - prepares the VSI context for creating a FCoE VSI
- * @vsi: pointer to the associated VSI struct
- * @ctxt: pointer to the associated VSI context to be passed to HW
- *
- * Returns 0 on success or < 0 on error
- **/
-int i40e_fcoe_vsi_init(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt)
-{
-       struct i40e_aqc_vsi_properties_data *info = &ctxt->info;
-       struct i40e_pf *pf = vsi->back;
-       struct i40e_hw *hw = &pf->hw;
-       u8 enabled_tc = 0;
-
-       if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-               dev_err(&pf->pdev->dev,
-                       "FCoE is not enabled for this device\n");
-               return -EPERM;
-       }
-
-       /* initialize the hardware for FCoE */
-       ctxt->pf_num = hw->pf_id;
-       ctxt->vf_num = 0;
-       ctxt->uplink_seid = vsi->uplink_seid;
-       ctxt->connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
-       ctxt->flags = I40E_AQ_VSI_TYPE_PF;
-
-       /* FCoE VSI would need the following sections */
-       info->valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
-
-       /* FCoE VSI does not need these sections */
-       info->valid_sections &= cpu_to_le16(~(I40E_AQ_VSI_PROP_SECURITY_VALID |
-                                           I40E_AQ_VSI_PROP_VLAN_VALID |
-                                           I40E_AQ_VSI_PROP_CAS_PV_VALID |
-                                           I40E_AQ_VSI_PROP_INGRESS_UP_VALID |
-                                           I40E_AQ_VSI_PROP_EGRESS_UP_VALID));
-
-       if (i40e_is_vsi_uplink_mode_veb(vsi)) {
-               info->valid_sections |=
-                               cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
-               info->switch_id =
-                               cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
-       }
-       enabled_tc = i40e_get_fcoe_tc_map(pf);
-       i40e_vsi_setup_queue_map(vsi, ctxt, enabled_tc, true);
-
-       /* set up queue option section: only enable FCoE */
-       info->queueing_opt_flags = I40E_AQ_VSI_QUE_OPT_FCOE_ENA;
-
-       return 0;
-}
-
-/**
- * i40e_fcoe_enable - this is the implementation of ndo_fcoe_enable,
- * indicating the upper FCoE protocol stack is ready to use FCoE
- * offload features.
- *
- * @netdev: pointer to the netdev that FCoE is created on
- *
- * Returns 0 on success
- *
- * in RTNL
- *
- **/
-int i40e_fcoe_enable(struct net_device *netdev)
-{
-       struct i40e_netdev_priv *np = netdev_priv(netdev);
-       struct i40e_vsi *vsi = np->vsi;
-       struct i40e_pf *pf = vsi->back;
-       struct i40e_fcoe *fcoe = &pf->fcoe;
-
-       if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-               netdev_err(netdev, "HW does not support FCoE.\n");
-               return -ENODEV;
-       }
-
-       if (vsi->type != I40E_VSI_FCOE) {
-               netdev_err(netdev, "interface does not support FCoE.\n");
-               return -EBUSY;
-       }
-
-       atomic_inc(&fcoe->refcnt);
-
-       return 0;
-}
-
-/**
- * i40e_fcoe_disable- disables FCoE for upper FCoE protocol stack.
- * @dev: pointer to the netdev that FCoE is created on
- *
- * Returns 0 on success
- *
- **/
-int i40e_fcoe_disable(struct net_device *netdev)
-{
-       struct i40e_netdev_priv *np = netdev_priv(netdev);
-       struct i40e_vsi *vsi = np->vsi;
-       struct i40e_pf *pf = vsi->back;
-       struct i40e_fcoe *fcoe = &pf->fcoe;
-
-       if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-               netdev_err(netdev, "device does not support FCoE\n");
-               return -ENODEV;
-       }
-       if (vsi->type != I40E_VSI_FCOE)
-               return -EBUSY;
-
-       if (!atomic_dec_and_test(&fcoe->refcnt))
-               return -EINVAL;
-
-       netdev_info(netdev, "FCoE disabled\n");
-
-       return 0;
-}
-
-/**
- * i40e_fcoe_dma_pool_free - free the per cpu pool for FCoE DDP
- * @fcoe: the FCoE sw object
- * @dev: the device that the pool is associated with
- * @cpu: the cpu for this pool
- *
- **/
-static void i40e_fcoe_dma_pool_free(struct i40e_fcoe *fcoe,
-                                   struct device *dev,
-                                   unsigned int cpu)
-{
-       struct i40e_fcoe_ddp_pool *ddp_pool;
-
-       ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
-       if (!ddp_pool->pool) {
-               dev_warn(dev, "DDP pool already freed for cpu %d\n", cpu);
-               return;
-       }
-       dma_pool_destroy(ddp_pool->pool);
-       ddp_pool->pool = NULL;
-}
-
-/**
- * i40e_fcoe_dma_pool_create - per cpu pool for FCoE DDP
- * @fcoe: the FCoE sw object
- * @dev: the device that the pool is associated with
- * @cpu: the cpu for this pool
- *
- * Returns 0 on successful or non zero on failure
- *
- **/
-static int i40e_fcoe_dma_pool_create(struct i40e_fcoe *fcoe,
-                                    struct device *dev,
-                                    unsigned int cpu)
-{
-       struct i40e_fcoe_ddp_pool *ddp_pool;
-       struct dma_pool *pool;
-       char pool_name[32];
-
-       ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
-       if (ddp_pool && ddp_pool->pool) {
-               dev_warn(dev, "DDP pool already allocated for cpu %d\n", cpu);
-               return 0;
-       }
-       snprintf(pool_name, sizeof(pool_name), "i40e_fcoe_ddp_%d", cpu);
-       pool = dma_pool_create(pool_name, dev, I40E_FCOE_DDP_PTR_MAX,
-                              I40E_FCOE_DDP_PTR_ALIGN, PAGE_SIZE);
-       if (!pool) {
-               dev_err(dev, "dma_pool_create %s failed\n", pool_name);
-               return -ENOMEM;
-       }
-       ddp_pool->pool = pool;
-       return 0;
-}
-
-/**
- * i40e_fcoe_free_ddp_resources - release FCoE DDP resources
- * @vsi: the vsi FCoE is associated with
- *
- **/
-void i40e_fcoe_free_ddp_resources(struct i40e_vsi *vsi)
-{
-       struct i40e_pf *pf = vsi->back;
-       struct i40e_fcoe *fcoe = &pf->fcoe;
-       int cpu, i;
-
-       /* do nothing if not FCoE VSI */
-       if (vsi->type != I40E_VSI_FCOE)
-               return;
-
-       /* do nothing if no DDP pools were allocated */
-       if (!fcoe->ddp_pool)
-               return;
-
-       for (i = 0; i < I40E_FCOE_DDP_MAX; i++)
-               i40e_fcoe_ddp_put(vsi->netdev, i);
-
-       for_each_possible_cpu(cpu)
-               i40e_fcoe_dma_pool_free(fcoe, &pf->pdev->dev, cpu);
-
-       free_percpu(fcoe->ddp_pool);
-       fcoe->ddp_pool = NULL;
-
-       netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources released\n",
-                   vsi->id, vsi->seid);
-}
-
-/**
- * i40e_fcoe_setup_ddp_resources - allocate per cpu DDP resources
- * @vsi: the VSI FCoE is associated with
- *
- * Returns 0 on successful or non zero on failure
- *
- **/
-int i40e_fcoe_setup_ddp_resources(struct i40e_vsi *vsi)
-{
-       struct i40e_pf *pf = vsi->back;
-       struct device *dev = &pf->pdev->dev;
-       struct i40e_fcoe *fcoe = &pf->fcoe;
-       unsigned int cpu;
-       int i;
-
-       if (vsi->type != I40E_VSI_FCOE)
-               return -ENODEV;
-
-       /* do nothing if no DDP pools were allocated */
-       if (fcoe->ddp_pool)
-               return -EEXIST;
-
-       /* allocate per CPU memory to track DDP pools */
-       fcoe->ddp_pool = alloc_percpu(struct i40e_fcoe_ddp_pool);
-       if (!fcoe->ddp_pool) {
-               dev_err(&pf->pdev->dev, "failed to allocate percpu DDP\n");
-               return -ENOMEM;
-       }
-
-       /* allocate pci pool for each cpu */
-       for_each_possible_cpu(cpu) {
-               if (!i40e_fcoe_dma_pool_create(fcoe, dev, cpu))
-                       continue;
-
-               dev_err(dev, "failed to alloc DDP pool on cpu:%d\n", cpu);
-               i40e_fcoe_free_ddp_resources(vsi);
-               return -ENOMEM;
-       }
-
-       /* initialize the sw context */
-       for (i = 0; i < I40E_FCOE_DDP_MAX; i++)
-               i40e_fcoe_ddp_clear(&fcoe->ddp[i]);
-
-       netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources allocated\n",
-                   vsi->id, vsi->seid);
-
-       return 0;
-}
-
-/**
- * i40e_fcoe_handle_status - check the Programming Status for FCoE
- * @rx_ring: the Rx ring for this descriptor
- * @rx_desc: the Rx descriptor for Programming Status, not a packet descriptor.
- *
- * Check if this is the Rx Programming Status descriptor write-back for FCoE.
- * This is used to verify if the context/filter programming or invalidation
- * requested by SW to the HW is successful or not and take actions accordingly.
- **/
-void i40e_fcoe_handle_status(struct i40e_ring *rx_ring,
-                            union i40e_rx_desc *rx_desc, u8 prog_id)
-{
-       struct i40e_pf *pf = rx_ring->vsi->back;
-       struct i40e_fcoe *fcoe = &pf->fcoe;
-       struct i40e_fcoe_ddp *ddp;
-       u32 error;
-       u16 xid;
-       u64 qw;
-
-       /* we only care for FCoE here */
-       if (!i40e_fcoe_progid_is_fcoe(prog_id))
-               return;
-
-       xid = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param) &
-             (I40E_FCOE_DDP_MAX - 1);
-
-       if (!i40e_fcoe_xid_is_valid(xid))
-               return;
-
-       ddp = &fcoe->ddp[xid];
-       WARN_ON(xid != ddp->xid);
-
-       qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-       error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
-               I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
-
-       /* DDP context programming status: failure or success */
-       if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) {
-               if (I40E_RX_PROG_FCOE_ERROR_TBL_FULL(error)) {
-                       dev_err(&pf->pdev->dev, "xid %x ddp->xid %x TABLE FULL\n",
-                               xid, ddp->xid);
-                       ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT;
-               }
-               if (I40E_RX_PROG_FCOE_ERROR_CONFLICT(error)) {
-                       dev_err(&pf->pdev->dev, "xid %x ddp->xid %x CONFLICT\n",
-                               xid, ddp->xid);
-                       ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT;
-               }
-       }
-
-       /* DDP context invalidation status: failure or success */
-       if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS) {
-               if (I40E_RX_PROG_FCOE_ERROR_INVLFAIL(error)) {
-                       dev_err(&pf->pdev->dev, "xid %x ddp->xid %x INVALIDATION FAILURE\n",
-                               xid, ddp->xid);
-                       ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT;
-               }
-               /* clear the flag so we can retry invalidation */
-               clear_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags);
-       }
-
-       /* unmap DMA */
-       i40e_fcoe_ddp_unmap(pf, ddp);
-       i40e_fcoe_ddp_clear(ddp);
-}
-
-/**
- * i40e_fcoe_handle_offload - check ddp status and mark it done
- * @adapter: i40e adapter
- * @rx_desc: advanced rx descriptor
- * @skb: the skb holding the received data
- *
- * This checks ddp status.
- *
- * Returns : < 0 indicates an error or not a FCOE ddp, 0 indicates
- * not passing the skb to ULD, > 0 indicates is the length of data
- * being ddped.
- *
- **/
-int i40e_fcoe_handle_offload(struct i40e_ring *rx_ring,
-                            union i40e_rx_desc *rx_desc,
-                            struct sk_buff *skb)
-{
-       struct i40e_pf *pf = rx_ring->vsi->back;
-       struct i40e_fcoe *fcoe = &pf->fcoe;
-       struct fc_frame_header *fh = NULL;
-       struct i40e_fcoe_ddp *ddp = NULL;
-       u32 status, fltstat;
-       u32 error, fcerr;
-       int rc = -EINVAL;
-       u16 ptype;
-       u16 xid;
-       u64 qw;
-
-       /* check this rxd is for programming status */
-       qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-       /* packet descriptor, check packet type */
-       ptype = (qw & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
-       if (!i40e_rx_is_fcoe(ptype))
-               goto out_no_ddp;
-
-       error = (qw & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT;
-       fcerr = (error >> I40E_RX_DESC_ERROR_L3L4E_SHIFT) &
-                I40E_RX_DESC_FCOE_ERROR_MASK;
-
-       /* check stateless offload error */
-       if (unlikely(fcerr == I40E_RX_DESC_ERROR_L3L4E_PROT)) {
-               dev_err(&pf->pdev->dev, "Protocol Error\n");
-               skb->ip_summed = CHECKSUM_NONE;
-       } else {
-               skb->ip_summed = CHECKSUM_UNNECESSARY;
-       }
-
-       /* check hw status on ddp */
-       status = (qw & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT;
-       fltstat = (status >> I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
-                  I40E_RX_DESC_FLTSTAT_FCMASK;
-
-       /* now we are ready to check DDP */
-       fh = i40e_fcoe_fc_frame_header(skb);
-       xid = i40e_fcoe_fc_get_xid(fh);
-       if (!i40e_fcoe_xid_is_valid(xid))
-               goto out_no_ddp;
-
-       /* non DDP normal receive, return to the protocol stack */
-       if (fltstat == I40E_RX_DESC_FLTSTAT_NOMTCH)
-               goto out_no_ddp;
-
-       /* do we have a sw ddp context setup ? */
-       ddp = &fcoe->ddp[xid];
-       if (!ddp->sgl)
-               goto out_no_ddp;
-
-       /* fetch xid from hw rxd wb, which should match up the sw ctxt */
-       xid = le16_to_cpu(rx_desc->wb.qword0.lo_dword.mirr_fcoe.fcoe_ctx_id);
-       if (ddp->xid != xid) {
-               dev_err(&pf->pdev->dev, "xid 0x%x does not match ctx_xid 0x%x\n",
-                       ddp->xid, xid);
-               goto out_put_ddp;
-       }
-
-       /* the same exchange has already errored out */
-       if (ddp->fcerr) {
-               dev_err(&pf->pdev->dev, "xid 0x%x fcerr 0x%x reported fcer 0x%x\n",
-                       xid, ddp->fcerr, fcerr);
-               goto out_put_ddp;
-       }
-
-       /* fcoe param is valid by now with correct DDPed length */
-       ddp->len = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param);
-       ddp->fcerr = fcerr;
-       /* header posting only, useful only for target mode and debugging */
-       if (fltstat == I40E_RX_DESC_FLTSTAT_DDP) {
-               /* For target mode, we get header of the last packet but it
-                * does not have the FCoE trailer field, i.e., CRC and EOF
-                * Ordered Set since they are offloaded by the HW, so fill
-                * it up correspondingly to allow the packet to pass through
-                * to the upper protocol stack.
-                */
-               u32 f_ctl = ntoh24(fh->fh_f_ctl);
-
-               if ((f_ctl & FC_FC_END_SEQ) &&
-                   (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA)) {
-                       struct fcoe_crc_eof *crc = NULL;
-
-                       crc = skb_put(skb, sizeof(*crc));
-                       crc->fcoe_eof = FC_EOF_T;
-               } else {
-                       /* otherwise, drop the header only frame */
-                       rc = 0;
-                       goto out_no_ddp;
-               }
-       }
-
-out_put_ddp:
-       /* either we got RSP or we have an error, unmap DMA in both cases */
-       i40e_fcoe_ddp_unmap(pf, ddp);
-       if (ddp->len && !ddp->fcerr) {
-               int pkts;
-
-               rc = ddp->len;
-               i40e_fcoe_ddp_clear(ddp);
-               ddp->len = rc;
-               pkts = DIV_ROUND_UP(rc, 2048);
-               rx_ring->stats.bytes += rc;
-               rx_ring->stats.packets += pkts;
-               rx_ring->q_vector->rx.total_bytes += rc;
-               rx_ring->q_vector->rx.total_packets += pkts;
-               set_bit(__I40E_FCOE_DDP_DONE, &ddp->flags);
-       }
-
-out_no_ddp:
-       return rc;
-}
-
-/**
- * i40e_fcoe_ddp_setup - called to set up ddp context
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- * @target_mode: indicates this is a DDP request for target
- *
- * Returns : 1 for success and 0 for no DDP on this I/O
- **/
-static int i40e_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
-                              struct scatterlist *sgl, unsigned int sgc,
-                              int target_mode)
-{
-       static const unsigned int bufflen = I40E_FCOE_DDP_BUF_MIN;
-       struct i40e_netdev_priv *np = netdev_priv(netdev);
-       struct i40e_fcoe_ddp_pool *ddp_pool;
-       struct i40e_pf *pf = np->vsi->back;
-       struct i40e_fcoe *fcoe = &pf->fcoe;
-       unsigned int i, j, dmacount;
-       struct i40e_fcoe_ddp *ddp;
-       unsigned int firstoff = 0;
-       unsigned int thisoff = 0;
-       unsigned int thislen = 0;
-       struct scatterlist *sg;
-       dma_addr_t addr = 0;
-       unsigned int len;
-
-       if (xid >= I40E_FCOE_DDP_MAX) {
-               dev_warn(&pf->pdev->dev, "xid=0x%x out-of-range\n", xid);
-               return 0;
-       }
-
-       /* no DDP if we are already down or resetting */
-       if (test_bit(__I40E_DOWN, &pf->state) ||
-           test_bit(__I40E_NEEDS_RESTART, &pf->state)) {
-               dev_info(&pf->pdev->dev, "xid=0x%x device in reset/down\n",
-                        xid);
-               return 0;
-       }
-
-       ddp = &fcoe->ddp[xid];
-       if (ddp->sgl) {
-               dev_info(&pf->pdev->dev, "xid 0x%x w/ non-null sgl=%p nents=%d\n",
-                        xid, ddp->sgl, ddp->sgc);
-               return 0;
-       }
-       i40e_fcoe_ddp_clear(ddp);
-
-       if (!fcoe->ddp_pool) {
-               dev_info(&pf->pdev->dev, "No DDP pool, xid 0x%x\n", xid);
-               return 0;
-       }
-
-       ddp_pool = per_cpu_ptr(fcoe->ddp_pool, get_cpu());
-       if (!ddp_pool->pool) {
-               dev_info(&pf->pdev->dev, "No percpu ddp pool, xid 0x%x\n", xid);
-               goto out_noddp;
-       }
-
-       /* setup dma from scsi command sgl */
-       dmacount = dma_map_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
-       if (dmacount == 0) {
-               dev_info(&pf->pdev->dev, "dma_map_sg for sgl %p, sgc %d failed\n",
-                        sgl, sgc);
-               goto out_noddp_unmap;
-       }
-
-       /* alloc the udl from our ddp pool */
-       ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp);
-       if (!ddp->udl) {
-               dev_info(&pf->pdev->dev,
-                        "Failed allocated ddp context, xid 0x%x\n", xid);
-               goto out_noddp_unmap;
-       }
-
-       j = 0;
-       ddp->len = 0;
-       for_each_sg(sgl, sg, dmacount, i) {
-               addr = sg_dma_address(sg);
-               len = sg_dma_len(sg);
-               ddp->len += len;
-               while (len) {
-                       /* max number of buffers allowed in one DDP context */
-                       if (j >= I40E_FCOE_DDP_BUFFCNT_MAX) {
-                               dev_info(&pf->pdev->dev,
-                                        "xid=%x:%d,%d,%d:addr=%llx not enough descriptors\n",
-                                        xid, i, j, dmacount, (u64)addr);
-                               goto out_noddp_free;
-                       }
-
-                       /* get the offset of length of current buffer */
-                       thisoff = addr & ((dma_addr_t)bufflen - 1);
-                       thislen = min_t(unsigned int, (bufflen - thisoff), len);
-                       /* all but the 1st buffer (j == 0)
-                        * must be aligned on bufflen
-                        */
-                       if ((j != 0) && (thisoff))
-                               goto out_noddp_free;
-
-                       /* all but the last buffer
-                        * ((i == (dmacount - 1)) && (thislen == len))
-                        * must end at bufflen
-                        */
-                       if (((i != (dmacount - 1)) || (thislen != len)) &&
-                           ((thislen + thisoff) != bufflen))
-                               goto out_noddp_free;
-
-                       ddp->udl[j] = (u64)(addr - thisoff);
-                       /* only the first buffer may have none-zero offset */
-                       if (j == 0)
-                               firstoff = thisoff;
-                       len -= thislen;
-                       addr += thislen;
-                       j++;
-               }
-       }
-       /* only the last buffer may have non-full bufflen */
-       ddp->lastsize = thisoff + thislen;
-       ddp->firstoff = firstoff;
-       ddp->list_len = j;
-       ddp->pool = ddp_pool->pool;
-       ddp->sgl = sgl;
-       ddp->sgc = sgc;
-       ddp->xid = xid;
-       if (target_mode)
-               set_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags);
-       set_bit(__I40E_FCOE_DDP_INITALIZED, &ddp->flags);
-
-       put_cpu();
-       return 1; /* Success */
-
-out_noddp_free:
-       dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
-       i40e_fcoe_ddp_clear(ddp);
-
-out_noddp_unmap:
-       dma_unmap_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
-out_noddp:
-       put_cpu();
-       return 0;
-}
-
-/**
- * i40e_fcoe_ddp_get - called to set up ddp context in initiator mode
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_setup
- * and is expected to be called from ULD, e.g., FCP layer of libfc
- * to set up ddp for the corresponding xid of the given sglist for
- * the corresponding I/O.
- *
- * Returns : 1 for success and 0 for no ddp
- **/
-static int i40e_fcoe_ddp_get(struct net_device *netdev, u16 xid,
-                            struct scatterlist *sgl, unsigned int sgc)
-{
-       return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 0);
-}
-
-/**
- * i40e_fcoe_ddp_target - called to set up ddp context in target mode
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_target
- * and is expected to be called from ULD, e.g., FCP layer of libfc
- * to set up ddp for the corresponding xid of the given sglist for
- * the corresponding I/O. The DDP in target mode is a write I/O request
- * from the initiator.
- *
- * Returns : 1 for success and 0 for no ddp
- **/
-static int i40e_fcoe_ddp_target(struct net_device *netdev, u16 xid,
-                               struct scatterlist *sgl, unsigned int sgc)
-{
-       return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 1);
-}
-
-/**
- * i40e_fcoe_program_ddp - programs the HW DDP related descriptors
- * @tx_ring: transmit ring for this packet
- * @skb:     the packet to be sent out
- * @sof: the SOF to indicate class of service
- *
- * Determine if it is READ/WRITE command, and finds out if there is
- * a matching SW DDP context for this command. DDP is applicable
- * only in case of READ if initiator or WRITE in case of
- * responder (via checking XFER_RDY).
- *
- * Note: caller checks sof and ddp sw context
- *
- * Returns : none
- *
- **/
-static void i40e_fcoe_program_ddp(struct i40e_ring *tx_ring,
-                                 struct sk_buff *skb,
-                                 struct i40e_fcoe_ddp *ddp, u8 sof)
-{
-       struct i40e_fcoe_filter_context_desc *filter_desc = NULL;
-       struct i40e_fcoe_queue_context_desc *queue_desc = NULL;
-       struct i40e_fcoe_ddp_context_desc *ddp_desc = NULL;
-       struct i40e_pf *pf = tx_ring->vsi->back;
-       u16 i = tx_ring->next_to_use;
-       struct fc_frame_header *fh;
-       u64 flags_rsvd_lanq = 0;
-       bool target_mode;
-
-       /* check if abort is still pending */
-       if (test_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags)) {
-               dev_warn(&pf->pdev->dev,
-                        "DDP abort is still pending xid:%hx and ddp->flags:%lx:\n",
-                        ddp->xid, ddp->flags);
-               return;
-       }
-
-       /* set the flag to indicate this is programmed */
-       if (test_and_set_bit(__I40E_FCOE_DDP_PROGRAMMED, &ddp->flags)) {
-               dev_warn(&pf->pdev->dev,
-                        "DDP is already programmed for xid:%hx and ddp->flags:%lx:\n",
-                        ddp->xid, ddp->flags);
-               return;
-       }
-
-       /* Prepare the DDP context descriptor */
-       ddp_desc = I40E_DDP_CONTEXT_DESC(tx_ring, i);
-       i++;
-       if (i == tx_ring->count)
-               i = 0;
-
-       ddp_desc->type_cmd_foff_lsize =
-                               cpu_to_le64(I40E_TX_DESC_DTYPE_DDP_CTX  |
-                               ((u64)I40E_FCOE_DDP_CTX_DESC_BSIZE_4K  <<
-                               I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT)        |
-                               ((u64)ddp->firstoff                    <<
-                               I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT)       |
-                               ((u64)ddp->lastsize                    <<
-                               I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT));
-       ddp_desc->rsvd = cpu_to_le64(0);
-
-       /* target mode needs last packet in the sequence  */
-       target_mode = test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags);
-       if (target_mode)
-               ddp_desc->type_cmd_foff_lsize |=
-                       cpu_to_le64(I40E_FCOE_DDP_CTX_DESC_LASTSEQH);
-
-       /* Prepare queue_context descriptor */
-       queue_desc = I40E_QUEUE_CONTEXT_DESC(tx_ring, i++);
-       if (i == tx_ring->count)
-               i = 0;
-       queue_desc->dmaindx_fbase = cpu_to_le64(ddp->xid | ((u64)ddp->udp));
-       queue_desc->flen_tph = cpu_to_le64(ddp->list_len |
-                               ((u64)(I40E_FCOE_QUEUE_CTX_DESC_TPHRDESC |
-                               I40E_FCOE_QUEUE_CTX_DESC_TPHDATA) <<
-                               I40E_FCOE_QUEUE_CTX_QW1_TPH_SHIFT));
-
-       /* Prepare filter_context_desc */
-       filter_desc = I40E_FILTER_CONTEXT_DESC(tx_ring, i);
-       i++;
-       if (i == tx_ring->count)
-               i = 0;
-
-       fh = (struct fc_frame_header *)skb_transport_header(skb);
-       filter_desc->param = cpu_to_le32(ntohl(fh->fh_parm_offset));
-       filter_desc->seqn = cpu_to_le16(ntohs(fh->fh_seq_cnt));
-       filter_desc->rsvd_dmaindx = cpu_to_le16(ddp->xid <<
-                               I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT);
-
-       flags_rsvd_lanq = I40E_FCOE_FILTER_CTX_DESC_CTYP_DDP;
-       flags_rsvd_lanq |= (u64)(target_mode ?
-                       I40E_FCOE_FILTER_CTX_DESC_ENODE_RSP :
-                       I40E_FCOE_FILTER_CTX_DESC_ENODE_INIT);
-
-       flags_rsvd_lanq |= (u64)((sof == FC_SOF_I2 || sof == FC_SOF_N2) ?
-                       I40E_FCOE_FILTER_CTX_DESC_FC_CLASS2 :
-                       I40E_FCOE_FILTER_CTX_DESC_FC_CLASS3);
-
-       flags_rsvd_lanq |= ((u64)skb->queue_mapping <<
-                               I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT);
-       filter_desc->flags_rsvd_lanq = cpu_to_le64(flags_rsvd_lanq);
-
-       /* By this time, all offload related descriptors has been programmed */
-       tx_ring->next_to_use = i;
-}
-
-/**
- * i40e_fcoe_invalidate_ddp - invalidates DDP in case of abort
- * @tx_ring: transmit ring for this packet
- * @skb: the packet associated w/ this DDP invalidation, i.e., ABTS
- * @ddp: the SW DDP context for this DDP
- *
- * Programs the Tx context descriptor to do DDP invalidation.
- **/
-static void i40e_fcoe_invalidate_ddp(struct i40e_ring *tx_ring,
-                                    struct sk_buff *skb,
-                                    struct i40e_fcoe_ddp *ddp)
-{
-       struct i40e_tx_context_desc *context_desc;
-       int i;
-
-       if (test_and_set_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags))
-               return;
-
-       i = tx_ring->next_to_use;
-       context_desc = I40E_TX_CTXTDESC(tx_ring, i);
-       i++;
-       if (i == tx_ring->count)
-               i = 0;
-
-       context_desc->tunneling_params = cpu_to_le32(0);
-       context_desc->l2tag2 = cpu_to_le16(0);
-       context_desc->rsvd = cpu_to_le16(0);
-       context_desc->type_cmd_tso_mss = cpu_to_le64(
-               I40E_TX_DESC_DTYPE_FCOE_CTX |
-               (I40E_FCOE_TX_CTX_DESC_OPCODE_DDP_CTX_INVL <<
-               I40E_TXD_CTX_QW1_CMD_SHIFT) |
-               (I40E_FCOE_TX_CTX_DESC_OPCODE_SINGLE_SEND <<
-               I40E_TXD_CTX_QW1_CMD_SHIFT));
-       tx_ring->next_to_use = i;
-}
-
-/**
- * i40e_fcoe_handle_ddp - check we should setup or invalidate DDP
- * @tx_ring: transmit ring for this packet
- * @skb: the packet to be sent out
- * @sof: the SOF to indicate class of service
- *
- * Determine if it is ABTS/READ/XFER_RDY, and finds out if there is
- * a matching SW DDP context for this command. DDP is applicable
- * only in case of READ if initiator or WRITE in case of
- * responder (via checking XFER_RDY). In case this is an ABTS, send
- * just invalidate the context.
- **/
-static void i40e_fcoe_handle_ddp(struct i40e_ring *tx_ring,
-                                struct sk_buff *skb, u8 sof)
-{
-       struct i40e_pf *pf = tx_ring->vsi->back;
-       struct i40e_fcoe *fcoe = &pf->fcoe;
-       struct fc_frame_header *fh;
-       struct i40e_fcoe_ddp *ddp;
-       u32 f_ctl;
-       u8 r_ctl;
-       u16 xid;
-
-       fh = (struct fc_frame_header *)skb_transport_header(skb);
-       f_ctl = ntoh24(fh->fh_f_ctl);
-       r_ctl = fh->fh_r_ctl;
-       ddp = NULL;
-
-       if ((r_ctl == FC_RCTL_DD_DATA_DESC) && (f_ctl & FC_FC_EX_CTX)) {
-               /* exchange responder? if so, XFER_RDY for write */
-               xid = ntohs(fh->fh_rx_id);
-               if (i40e_fcoe_xid_is_valid(xid)) {
-                       ddp = &fcoe->ddp[xid];
-                       if ((ddp->xid == xid) &&
-                           (test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-                               i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof);
-               }
-       } else if (r_ctl == FC_RCTL_DD_UNSOL_CMD) {
-               /* exchange originator, check READ cmd */
-               xid = ntohs(fh->fh_ox_id);
-               if (i40e_fcoe_xid_is_valid(xid)) {
-                       ddp = &fcoe->ddp[xid];
-                       if ((ddp->xid == xid) &&
-                           (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-                               i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof);
-               }
-       } else if (r_ctl == FC_RCTL_BA_ABTS) {
-               /* exchange originator, check ABTS */
-               xid = ntohs(fh->fh_ox_id);
-               if (i40e_fcoe_xid_is_valid(xid)) {
-                       ddp = &fcoe->ddp[xid];
-                       if ((ddp->xid == xid) &&
-                           (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-                               i40e_fcoe_invalidate_ddp(tx_ring, skb, ddp);
-               }
-       }
-}
-
-/**
- * i40e_fcoe_tso - set up FCoE TSO
- * @tx_ring:  ring to send buffer on
- * @skb:      send buffer
- * @tx_flags: collected send information
- * @hdr_len:  the tso header length
- * @sof: the SOF to indicate class of service
- *
- * Note must already have sof checked to be either class 2 or class 3 before
- * calling this function.
- *
- * Returns 1 to indicate sequence segmentation offload is properly setup
- * or returns 0 to indicate no tso is needed, otherwise returns error
- * code to drop the frame.
- **/
-static int i40e_fcoe_tso(struct i40e_ring *tx_ring,
-                        struct sk_buff *skb,
-                        u32 tx_flags, u8 *hdr_len, u8 sof)
-{
-       struct i40e_tx_context_desc *context_desc;
-       u32 cd_type, cd_cmd, cd_tso_len, cd_mss;
-       struct fc_frame_header *fh;
-       u64 cd_type_cmd_tso_mss;
-
-       /* must match gso type as FCoE */
-       if (!skb_is_gso(skb))
-               return 0;
-
-       /* is it the expected gso type for FCoE ?*/
-       if (skb_shinfo(skb)->gso_type != SKB_GSO_FCOE) {
-               netdev_err(skb->dev,
-                          "wrong gso type %d:expecting SKB_GSO_FCOE\n",
-                          skb_shinfo(skb)->gso_type);
-               return -EINVAL;
-       }
-
-       /* header and trailer are inserted by hw */
-       *hdr_len = skb_transport_offset(skb) + sizeof(struct fc_frame_header) +
-                  sizeof(struct fcoe_crc_eof);
-
-       /* check sof to decide a class 2 or 3 TSO */
-       if (likely(i40e_fcoe_sof_is_class3(sof)))
-               cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS3;
-       else
-               cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS2;
-
-       /* param field valid? */
-       fh = (struct fc_frame_header *)skb_transport_header(skb);
-       if (fh->fh_f_ctl[2] & FC_FC_REL_OFF)
-               cd_cmd |= I40E_FCOE_TX_CTX_DESC_RELOFF;
-
-       /* fill the field values */
-       cd_type = I40E_TX_DESC_DTYPE_FCOE_CTX;
-       cd_tso_len = skb->len - *hdr_len;
-       cd_mss = skb_shinfo(skb)->gso_size;
-       cd_type_cmd_tso_mss =
-               ((u64)cd_type  << I40E_TXD_CTX_QW1_DTYPE_SHIFT)     |
-               ((u64)cd_cmd     << I40E_TXD_CTX_QW1_CMD_SHIFT)     |
-               ((u64)cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
-               ((u64)cd_mss     << I40E_TXD_CTX_QW1_MSS_SHIFT);
-
-       /* grab the next descriptor */
-       context_desc = I40E_TX_CTXTDESC(tx_ring, tx_ring->next_to_use);
-       tx_ring->next_to_use++;
-       if (tx_ring->next_to_use == tx_ring->count)
-               tx_ring->next_to_use = 0;
-
-       context_desc->tunneling_params = 0;
-       context_desc->l2tag2 = cpu_to_le16((tx_flags & I40E_TX_FLAGS_VLAN_MASK)
-                                           >> I40E_TX_FLAGS_VLAN_SHIFT);
-       context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
-
-       return 1;
-}
-
-/**
- * i40e_fcoe_tx_map - build the tx descriptor
- * @tx_ring:  ring to send buffer on
- * @skb:      send buffer
- * @first:    first buffer info buffer to use
- * @tx_flags: collected send information
- * @hdr_len:  ptr to the size of the packet header
- * @eof:      the frame eof value
- *
- * Note, for FCoE, sof and eof are already checked
- **/
-static void i40e_fcoe_tx_map(struct i40e_ring *tx_ring,
-                            struct sk_buff *skb,
-                            struct i40e_tx_buffer *first,
-                            u32 tx_flags, u8 hdr_len, u8 eof)
-{
-       u32 td_offset = 0;
-       u32 td_cmd = 0;
-       u32 maclen;
-
-       /* insert CRC */
-       td_cmd = I40E_TX_DESC_CMD_ICRC;
-
-       /* setup MACLEN */
-       maclen = skb_network_offset(skb);
-       if (tx_flags & I40E_TX_FLAGS_SW_VLAN)
-               maclen += sizeof(struct vlan_hdr);
-
-       if (skb->protocol == htons(ETH_P_FCOE)) {
-               /* for FCoE, maclen should exclude ether type */
-               maclen -= 2;
-               /* setup type as FCoE and EOF insertion */
-               td_cmd |= (I40E_TX_DESC_CMD_FCOET | i40e_fcoe_ctxt_eof(eof));
-               /* setup FCoELEN and FCLEN */
-               td_offset |= ((((sizeof(struct fcoe_hdr) + 2) >> 2) <<
-                               I40E_TX_DESC_LENGTH_IPLEN_SHIFT) |
-                             ((sizeof(struct fc_frame_header) >> 2) <<
-                               I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT));
-               /* trim to exclude trailer */
-               pskb_trim(skb, skb->len - sizeof(struct fcoe_crc_eof));
-       }
-
-       /* MACLEN is ether header length in words not bytes */
-       td_offset |= (maclen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
-
-       i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, td_cmd, td_offset);
-}
-
-/**
- * i40e_fcoe_set_skb_header - adjust skb header point for FIP/FCoE/FC
- * @skb: the skb to be adjusted
- *
- * Returns true if this skb is a FCoE/FIP or VLAN carried FCoE/FIP and then
- * adjusts the skb header pointers correspondingly. Otherwise, returns false.
- **/
-static inline int i40e_fcoe_set_skb_header(struct sk_buff *skb)
-{
-       __be16 protocol = skb->protocol;
-
-       skb_reset_mac_header(skb);
-       skb->mac_len = sizeof(struct ethhdr);
-       if (protocol == htons(ETH_P_8021Q)) {
-               struct vlan_ethhdr *veth = (struct vlan_ethhdr *)eth_hdr(skb);
-
-               protocol = veth->h_vlan_encapsulated_proto;
-               skb->mac_len += sizeof(struct vlan_hdr);
-       }
-
-       /* FCoE or FIP only */
-       if ((protocol != htons(ETH_P_FIP)) &&
-           (protocol != htons(ETH_P_FCOE)))
-               return -EINVAL;
-
-       /* set header to L2 of FCoE/FIP */
-       skb_set_network_header(skb, skb->mac_len);
-       if (protocol == htons(ETH_P_FIP))
-               return 0;
-
-       /* set header to L3 of FC */
-       skb_set_transport_header(skb, skb->mac_len + sizeof(struct fcoe_hdr));
-       return 0;
-}
-
-/**
- * i40e_fcoe_xmit_frame - transmit buffer
- * @skb:     send buffer
- * @netdev:  the fcoe netdev
- *
- * Returns 0 if sent, else an error code
- **/
-static netdev_tx_t i40e_fcoe_xmit_frame(struct sk_buff *skb,
-                                       struct net_device *netdev)
-{
-       struct i40e_netdev_priv *np = netdev_priv(skb->dev);
-       struct i40e_vsi *vsi = np->vsi;
-       struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
-       struct i40e_tx_buffer *first;
-       u32 tx_flags = 0;
-       int fso, count;
-       u8 hdr_len = 0;
-       u8 sof = 0;
-       u8 eof = 0;
-
-       if (i40e_fcoe_set_skb_header(skb))
-               goto out_drop;
-
-       count = i40e_xmit_descriptor_count(skb);
-       if (i40e_chk_linearize(skb, count)) {
-               if (__skb_linearize(skb))
-                       goto out_drop;
-               count = i40e_txd_use_count(skb->len);
-               tx_ring->tx_stats.tx_linearize++;
-       }
-
-       /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
-        *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
-        *       + 4 desc gap to avoid the cache line where head is,
-        *       + 1 desc for context descriptor,
-        * otherwise try next time
-        */
-       if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
-               tx_ring->tx_stats.tx_busy++;
-               return NETDEV_TX_BUSY;
-       }
-
-       /* prepare the xmit flags */
-       if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
-               goto out_drop;
-
-       /* record the location of the first descriptor for this packet */
-       first = &tx_ring->tx_bi[tx_ring->next_to_use];
-
-       /* FIP is a regular L2 traffic w/o offload */
-       if (skb->protocol == htons(ETH_P_FIP))
-               goto out_send;
-
-       /* check sof and eof, only supports FC Class 2 or 3 */
-       if (i40e_fcoe_fc_sof(skb, &sof) || i40e_fcoe_fc_eof(skb, &eof)) {
-               netdev_err(netdev, "SOF/EOF error:%02x - %02x\n", sof, eof);
-               goto out_drop;
-       }
-
-       /* always do FCCRC for FCoE */
-       tx_flags |= I40E_TX_FLAGS_FCCRC;
-
-       /* check we should do sequence offload */
-       fso = i40e_fcoe_tso(tx_ring, skb, tx_flags, &hdr_len, sof);
-       if (fso < 0)
-               goto out_drop;
-       else if (fso)
-               tx_flags |= I40E_TX_FLAGS_FSO;
-       else
-               i40e_fcoe_handle_ddp(tx_ring, skb, sof);
-
-out_send:
-       /* send out the packet */
-       i40e_fcoe_tx_map(tx_ring, skb, first, tx_flags, hdr_len, eof);
-
-       i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
-       return NETDEV_TX_OK;
-
-out_drop:
-       dev_kfree_skb_any(skb);
-       return NETDEV_TX_OK;
-}
-
-/**
- * i40e_fcoe_change_mtu - NDO callback to change the Maximum Transfer Unit
- * @netdev: network interface device structure
- * @new_mtu: new value for maximum frame size
- *
- * Returns error as operation not permitted
- *
- **/
-static int i40e_fcoe_change_mtu(struct net_device *netdev, int new_mtu)
-{
-       netdev_warn(netdev, "MTU change is not supported on FCoE interfaces\n");
-       return -EPERM;
-}
-
-/**
- * i40e_fcoe_set_features - set the netdev feature flags
- * @netdev: ptr to the netdev being adjusted
- * @features: the feature set that the stack is suggesting
- *
- **/
-static int i40e_fcoe_set_features(struct net_device *netdev,
-                                 netdev_features_t features)
-{
-       struct i40e_netdev_priv *np = netdev_priv(netdev);
-       struct i40e_vsi *vsi = np->vsi;
-
-       if (features & NETIF_F_HW_VLAN_CTAG_RX)
-               i40e_vlan_stripping_enable(vsi);
-       else
-               i40e_vlan_stripping_disable(vsi);
-
-       return 0;
-}
-
-static const struct net_device_ops i40e_fcoe_netdev_ops = {
-       .ndo_open               = i40e_open,
-       .ndo_stop               = i40e_close,
-       .ndo_get_stats64        = i40e_get_netdev_stats_struct,
-       .ndo_set_rx_mode        = i40e_set_rx_mode,
-       .ndo_validate_addr      = eth_validate_addr,
-       .ndo_set_mac_address    = i40e_set_mac,
-       .ndo_change_mtu         = i40e_fcoe_change_mtu,
-       .ndo_do_ioctl           = i40e_ioctl,
-       .ndo_tx_timeout         = i40e_tx_timeout,
-       .ndo_vlan_rx_add_vid    = i40e_vlan_rx_add_vid,
-       .ndo_vlan_rx_kill_vid   = i40e_vlan_rx_kill_vid,
-       .ndo_setup_tc           = __i40e_setup_tc,
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-       .ndo_poll_controller    = i40e_netpoll,
-#endif
-       .ndo_start_xmit         = i40e_fcoe_xmit_frame,
-       .ndo_fcoe_enable        = i40e_fcoe_enable,
-       .ndo_fcoe_disable       = i40e_fcoe_disable,
-       .ndo_fcoe_ddp_setup     = i40e_fcoe_ddp_get,
-       .ndo_fcoe_ddp_done      = i40e_fcoe_ddp_put,
-       .ndo_fcoe_ddp_target    = i40e_fcoe_ddp_target,
-       .ndo_set_features       = i40e_fcoe_set_features,
-};
-
-/* fcoe network device type */
-static struct device_type fcoe_netdev_type = {
-       .name = "fcoe",
-};
-
-/**
- * i40e_fcoe_config_netdev - prepares the VSI context for creating a FCoE VSI
- * @vsi: pointer to the associated VSI struct
- * @ctxt: pointer to the associated VSI context to be passed to HW
- *
- * Returns 0 on success or < 0 on error
- **/
-void i40e_fcoe_config_netdev(struct net_device *netdev, struct i40e_vsi *vsi)
-{
-       struct i40e_hw *hw = &vsi->back->hw;
-       struct i40e_pf *pf = vsi->back;
-
-       if (vsi->type != I40E_VSI_FCOE)
-               return;
-
-       netdev->features = (NETIF_F_HW_VLAN_CTAG_TX |
-                           NETIF_F_HW_VLAN_CTAG_RX |
-                           NETIF_F_HW_VLAN_CTAG_FILTER);
-
-       netdev->vlan_features = netdev->features;
-       netdev->vlan_features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
-                                  NETIF_F_HW_VLAN_CTAG_RX |
-                                  NETIF_F_HW_VLAN_CTAG_FILTER);
-       netdev->fcoe_ddp_xid = I40E_FCOE_DDP_MAX - 1;
-       netdev->features |= NETIF_F_ALL_FCOE;
-       netdev->vlan_features |= NETIF_F_ALL_FCOE;
-       netdev->hw_features |= netdev->features;
-       netdev->priv_flags |= IFF_UNICAST_FLT;
-       netdev->priv_flags |= IFF_SUPP_NOFCS;
-
-       strlcpy(netdev->name, "fcoe%d", IFNAMSIZ-1);
-       netdev->mtu = FCOE_MTU;
-       SET_NETDEV_DEV(netdev, &pf->pdev->dev);
-       SET_NETDEV_DEVTYPE(netdev, &fcoe_netdev_type);
-       /* set different dev_port value 1 for FCoE netdev than the default
-        * zero dev_port value for PF netdev, this helps biosdevname user
-        * tool to differentiate them correctly while both attached to the
-        * same PCI function.
-        */
-       netdev->dev_port = 1;
-       spin_lock_bh(&vsi->mac_filter_hash_lock);
-       i40e_add_filter(vsi, hw->mac.san_addr, 0);
-       i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0);
-       i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0);
-       i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0);
-       spin_unlock_bh(&vsi->mac_filter_hash_lock);
-
-       /* use san mac */
-       ether_addr_copy(netdev->dev_addr, hw->mac.san_addr);
-       ether_addr_copy(netdev->perm_addr, hw->mac.san_addr);
-       /* fcoe netdev ops */
-       netdev->netdev_ops = &i40e_fcoe_netdev_ops;
-}
-
-/**
- * i40e_fcoe_vsi_setup - allocate and set up FCoE VSI
- * @pf: the PF that VSI is associated with
- *
- **/
-void i40e_fcoe_vsi_setup(struct i40e_pf *pf)
-{
-       struct i40e_vsi *vsi;
-       u16 seid;
-       int i;
-
-       if (!(pf->flags & I40E_FLAG_FCOE_ENABLED))
-               return;
-
-       for (i = 0; i < pf->num_alloc_vsi; i++) {
-               vsi = pf->vsi[i];
-               if (vsi && vsi->type == I40E_VSI_FCOE) {
-                       dev_warn(&pf->pdev->dev,
-                                "FCoE VSI already created\n");
-                       return;
-               }
-       }
-
-       seid = pf->vsi[pf->lan_vsi]->seid;
-       vsi = i40e_vsi_setup(pf, I40E_VSI_FCOE, seid, 0);
-       if (vsi) {
-               dev_dbg(&pf->pdev->dev,
-                       "Successfully created FCoE VSI seid %d id %d uplink_seid %d PF seid %d\n",
-                       vsi->seid, vsi->id, vsi->uplink_seid, seid);
-       } else {
-               dev_info(&pf->pdev->dev, "Failed to create FCoE VSI\n");
-       }
-}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.h b/drivers/net/ethernet/intel/i40e/i40e_fcoe.h
deleted file mode 100644 (file)
index a93174d..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
-
-#ifndef _I40E_FCOE_H_
-#define _I40E_FCOE_H_
-
-/* FCoE HW context helper macros */
-#define I40E_DDP_CONTEXT_DESC(R, i)     \
-       (&(((struct i40e_fcoe_ddp_context_desc *)((R)->desc))[i]))
-
-#define I40E_QUEUE_CONTEXT_DESC(R, i)   \
-       (&(((struct i40e_fcoe_queue_context_desc *)((R)->desc))[i]))
-
-#define I40E_FILTER_CONTEXT_DESC(R, i)  \
-       (&(((struct i40e_fcoe_filter_context_desc *)((R)->desc))[i]))
-
-/* receive queue descriptor filter status for FCoE */
-#define I40E_RX_DESC_FLTSTAT_FCMASK    0x3
-#define I40E_RX_DESC_FLTSTAT_NOMTCH    0x0     /* no ddp context match */
-#define I40E_RX_DESC_FLTSTAT_NODDP     0x1     /* no ddp due to error */
-#define I40E_RX_DESC_FLTSTAT_DDP       0x2     /* DDPed payload, post header */
-#define I40E_RX_DESC_FLTSTAT_FCPRSP    0x3     /* FCP_RSP */
-
-/* receive queue descriptor error codes for FCoE */
-#define I40E_RX_DESC_FCOE_ERROR_MASK           \
-       (I40E_RX_DESC_ERROR_L3L4E_PROT |        \
-        I40E_RX_DESC_ERROR_L3L4E_FC |          \
-        I40E_RX_DESC_ERROR_L3L4E_DMAC_ERR |    \
-        I40E_RX_DESC_ERROR_L3L4E_DMAC_WARN)
-
-/* receive queue descriptor programming error */
-#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL(e)    \
-       (((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT) & 0x1)
-
-#define I40E_RX_PROG_FCOE_ERROR_CONFLICT(e)    \
-       (((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT) & 0x1)
-
-#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT   \
-       BIT(I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT)
-#define I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT   \
-       BIT(I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT)
-
-#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL(e)    \
-       I40E_RX_PROG_FCOE_ERROR_CONFLICT(e)
-#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT   \
-       I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT
-
-/* FCoE DDP related definitions */
-#define I40E_FCOE_MIN_XID      0x0000  /* the min xid supported by fcoe_sw */
-#define I40E_FCOE_MAX_XID      0x0FFF  /* the max xid supported by fcoe_sw */
-#define I40E_FCOE_DDP_BUFFCNT_MAX      512     /* 9 bits bufcnt */
-#define I40E_FCOE_DDP_PTR_ALIGN                16
-#define I40E_FCOE_DDP_PTR_MAX  (I40E_FCOE_DDP_BUFFCNT_MAX * sizeof(dma_addr_t))
-#define I40E_FCOE_DDP_BUF_MIN  4096
-#define I40E_FCOE_DDP_MAX      2048
-#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT  8
-
-/* supported netdev features for FCoE */
-#define I40E_FCOE_NETIF_FEATURES (NETIF_F_ALL_FCOE | \
-       NETIF_F_HW_VLAN_CTAG_TX | \
-       NETIF_F_HW_VLAN_CTAG_RX | \
-       NETIF_F_HW_VLAN_CTAG_FILTER)
-
-/* DDP context flags */
-enum i40e_fcoe_ddp_flags {
-       __I40E_FCOE_DDP_NONE = 1,
-       __I40E_FCOE_DDP_TARGET,
-       __I40E_FCOE_DDP_INITALIZED,
-       __I40E_FCOE_DDP_PROGRAMMED,
-       __I40E_FCOE_DDP_DONE,
-       __I40E_FCOE_DDP_ABORTED,
-       __I40E_FCOE_DDP_UNMAPPED,
-};
-
-/* DDP SW context struct */
-struct i40e_fcoe_ddp {
-       int len;
-       u16 xid;
-       u16 firstoff;
-       u16 lastsize;
-       u16 list_len;
-       u8 fcerr;
-       u8 prerr;
-       unsigned long flags;
-       unsigned int sgc;
-       struct scatterlist *sgl;
-       dma_addr_t udp;
-       u64 *udl;
-       struct dma_pool *pool;
-
-};
-
-struct i40e_fcoe_ddp_pool {
-       struct dma_pool *pool;
-};
-
-struct i40e_fcoe {
-       unsigned long mode;
-       atomic_t refcnt;
-       struct i40e_fcoe_ddp_pool __percpu *ddp_pool;
-       struct i40e_fcoe_ddp ddp[I40E_FCOE_DDP_MAX];
-};
-
-#endif /* _I40E_FCOE_H_ */
index a7c7b1d9b7c81851d10b0de02cde2a7fc635a128..6d4b590f851b95322f8989134740e66ee18c7bd8 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index d906692113929e412df7806fd8795922586c9f78..7b5fd33d70ae7eb315115fb119ee7e31f9091a5a 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index daa9204426d42b9cc74749800d62413bc0ddbf5a..cd40dc487b38d8ff3b4b74306060004927d25688 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index e74128db5be5480aef4eaa4761890c0325c82827..79e1396735d90eef0124f442b4a221d2a0a2d0b1 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index f6d37456f3b72f157605af23d0f70eaaa64eaf8e..536ed8e8a96fb09dbbac2e17b7de9592af4dbada 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -2718,22 +2719,6 @@ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
        }
 }
 
-/**
- * i40e_vlan_rx_register - Setup or shutdown vlan offload
- * @netdev: network interface to be adjusted
- * @features: netdev features to test if VLAN offload is enabled or not
- **/
-static void i40e_vlan_rx_register(struct net_device *netdev, u32 features)
-{
-       struct i40e_netdev_priv *np = netdev_priv(netdev);
-       struct i40e_vsi *vsi = np->vsi;
-
-       if (features & NETIF_F_HW_VLAN_CTAG_RX)
-               i40e_vlan_stripping_enable(vsi);
-       else
-               i40e_vlan_stripping_disable(vsi);
-}
-
 /**
  * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address
  * @vsi: the vsi being configured
@@ -2909,7 +2894,10 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi)
        if (!vsi->netdev)
                return;
 
-       i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features);
+       if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+               i40e_vlan_stripping_enable(vsi);
+       else
+               i40e_vlan_stripping_disable(vsi);
 
        for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID)
                i40e_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q),
@@ -6546,6 +6534,75 @@ int i40e_up(struct i40e_vsi *vsi)
        return err;
 }
 
+/**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+       struct i40e_aq_get_phy_abilities_resp abilities;
+       struct i40e_aq_set_phy_config config = {0};
+       struct i40e_hw *hw = &pf->hw;
+       i40e_status err;
+       u64 mask;
+
+       /* Get the current phy config */
+       err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+                                          NULL);
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "failed to get phy cap., ret =  %s last_status =  %s\n",
+                       i40e_stat_str(hw, err),
+                       i40e_aq_str(hw, hw->aq.asq_last_status));
+               return err;
+       }
+
+       /* If link needs to go up, but was not forced to go down,
+        * no need for a flap
+        */
+       if (is_up && abilities.phy_type != 0)
+               return I40E_SUCCESS;
+
+       /* To force link we need to set bits for all supported PHY types,
+        * but there are now more than 32, so we need to split the bitmap
+        * across two fields.
+        */
+       mask = I40E_PHY_TYPES_BITMASK;
+       config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+       config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
+       /* Copy the old settings, except of phy_type */
+       config.abilities = abilities.abilities;
+       config.link_speed = abilities.link_speed;
+       config.eee_capability = abilities.eee_capability;
+       config.eeer = abilities.eeer_val;
+       config.low_power_ctrl = abilities.d3_lpan;
+       err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "set phy config ret =  %s last_status =  %s\n",
+                       i40e_stat_str(&pf->hw, err),
+                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+               return err;
+       }
+
+       /* Update the link info */
+       err = i40e_update_link_info(hw);
+       if (err) {
+               /* Wait a little bit (on 40G cards it sometimes takes a really
+                * long time for link to come back from the atomic reset)
+                * and try once more
+                */
+               msleep(1000);
+               i40e_update_link_info(hw);
+       }
+
+       i40e_aq_set_link_restart_an(hw, true, NULL);
+
+       return I40E_SUCCESS;
+}
+
 /**
  * i40e_down - Shutdown the connection processing
  * @vsi: the VSI being stopped
@@ -6563,6 +6620,9 @@ void i40e_down(struct i40e_vsi *vsi)
        }
        i40e_vsi_disable_irq(vsi);
        i40e_vsi_stop_rings(vsi);
+       if (vsi->type == I40E_VSI_MAIN &&
+           vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED)
+               i40e_force_link_state(vsi->back, false);
        i40e_napi_disable_all(vsi);
 
        for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -7524,6 +7584,9 @@ int i40e_open(struct net_device *netdev)
 
        netif_carrier_off(netdev);
 
+       if (i40e_force_link_state(pf, true))
+               return -EAGAIN;
+
        err = i40e_vsi_open(vsi);
        if (err)
                return err;
@@ -8073,6 +8136,88 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf)
        return fcnt_prog;
 }
 
+/**
+ * i40e_reenable_fdir_sb - Restore FDir SB capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
+{
+       if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
+               pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED;
+               if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+                   (I40E_DEBUG_FD & pf->hw.debug_mask))
+                       dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
+       }
+}
+
+/**
+ * i40e_reenable_fdir_atr - Restore FDir ATR capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
+{
+       if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
+               /* ATR uses the same filtering logic as SB rules. It only
+                * functions properly if the input set mask is at the default
+                * settings. It is safe to restore the default input set
+                * because there are no active TCPv4 filter rules.
+                */
+               i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
+                                       I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+                                       I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+               pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
+               if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+                   (I40E_DEBUG_FD & pf->hw.debug_mask))
+                       dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
+       }
+}
+
+/**
+ * i40e_delete_invalid_filter - Delete an invalid FDIR filter
+ * @pf: board private structure
+ * @filter: FDir filter to remove
+ */
+static void i40e_delete_invalid_filter(struct i40e_pf *pf,
+                                      struct i40e_fdir_filter *filter)
+{
+       /* Update counters */
+       pf->fdir_pf_active_filters--;
+       pf->fd_inv = 0;
+
+       switch (filter->flow_type) {
+       case TCP_V4_FLOW:
+               pf->fd_tcp4_filter_cnt--;
+               break;
+       case UDP_V4_FLOW:
+               pf->fd_udp4_filter_cnt--;
+               break;
+       case SCTP_V4_FLOW:
+               pf->fd_sctp4_filter_cnt--;
+               break;
+       case IP_USER_FLOW:
+               switch (filter->ip4_proto) {
+               case IPPROTO_TCP:
+                       pf->fd_tcp4_filter_cnt--;
+                       break;
+               case IPPROTO_UDP:
+                       pf->fd_udp4_filter_cnt--;
+                       break;
+               case IPPROTO_SCTP:
+                       pf->fd_sctp4_filter_cnt--;
+                       break;
+               case IPPROTO_IP:
+                       pf->fd_ip4_filter_cnt--;
+                       break;
+               }
+               break;
+       }
+
+       /* Remove the filter from the list and free memory */
+       hlist_del(&filter->fdir_node);
+       kfree(filter);
+}
+
 /**
  * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled
  * @pf: board private structure
@@ -8091,40 +8236,23 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
        fcnt_avail = pf->fdir_pf_filter_count;
        if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) ||
            (pf->fd_add_err == 0) ||
-           (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) {
-               if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
-                       pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED;
-                       if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
-                           (I40E_DEBUG_FD & pf->hw.debug_mask))
-                               dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
-               }
-       }
+           (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt))
+               i40e_reenable_fdir_sb(pf);
 
        /* We should wait for even more space before re-enabling ATR.
         * Additionally, we cannot enable ATR as long as we still have TCP SB
         * rules active.
         */
        if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) &&
-           (pf->fd_tcp4_filter_cnt == 0)) {
-               if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
-                       pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
-                       if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-                           (I40E_DEBUG_FD & pf->hw.debug_mask))
-                               dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
-               }
-       }
+           (pf->fd_tcp4_filter_cnt == 0))
+               i40e_reenable_fdir_atr(pf);
 
        /* if hw had a problem adding a filter, delete it */
        if (pf->fd_inv > 0) {
                hlist_for_each_entry_safe(filter, node,
-                                         &pf->fdir_filter_list, fdir_node) {
-                       if (filter->fd_id == pf->fd_inv) {
-                               hlist_del(&filter->fdir_node);
-                               kfree(filter);
-                               pf->fdir_pf_active_filters--;
-                               pf->fd_inv = 0;
-                       }
-               }
+                                         &pf->fdir_filter_list, fdir_node)
+                       if (filter->fd_id == pf->fd_inv)
+                               i40e_delete_invalid_filter(pf, filter);
        }
 }
 
@@ -10467,6 +10595,9 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
        if (err)
                goto err_unwind;
 
+       if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+               i40e_client_update_msix_info(pf);
+
        return 0;
 
 err_unwind:
@@ -14217,6 +14348,11 @@ static int __maybe_unused i40e_suspend(struct device *dev)
        del_timer_sync(&pf->service_timer);
        cancel_work_sync(&pf->service_task);
 
+       /* Client close must be called explicitly here because the timer
+        * has been stopped.
+        */
+       i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
+
        if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
                i40e_enable_mc_magic_wake(pf);
 
index 76a5cb04e4fe42ef2f67b6fc2301269765a7dc8f..ba9687c037950acd9dbe531f314f55c1480f4c8a 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 80e66da6b145e07ac4faaf761b9788d67118fc40..9c3c3b0d3ac46ed2c9be2f5bfb9aae751b5ed9bc 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index 83798b7841b9770773e5069af68306560f7e564c..2ec24188d6e221ca08a239021e99bb0c1c5865a9 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -287,7 +288,7 @@ i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
                struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw,
                                   struct i40e_asq_cmd_details *cmd_details);
-i40e_status
+enum i40e_status_code
 i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
                             struct i40e_aqc_cloud_filters_element_bb *filters,
                             u8 filter_count);
@@ -299,7 +300,7 @@ enum i40e_status_code
 i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi,
                          struct i40e_aqc_cloud_filters_element_data *filters,
                          u8 filter_count);
-i40e_status
+enum i40e_status_code
 i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
                             struct i40e_aqc_cloud_filters_element_bb *filters,
                             u8 filter_count);
index 97381238eb7c168f0f6887d039ae4d00f1e3a044..5b47dd1f75a56b09f8ca69d9dc4268f319998f6a 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index c234758dad156bd437904486d25800ca1ad6ab1c..b3e206e49cc2f907ebd2d3769a3b545511e28949 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index afb72e711d43b865ef4490174d48b52236da9f8e..10c86f63dc52d86326c4b86130baf9d4220ebc2c 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index d3e55f54a05e2007399f457e01c7424b5196c9ed..410ba13bcf21e2c0f1871cbfb9d6c275db6fa10c 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel(R) 40-10 Gigabit Ethernet Connection Network Driver
index 1ec9b1d8023da96da0dad3ccad94d9cbe1ab23b0..7ccd05bf4b06704a2d2f89d386fb855450a102c1 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -708,16 +709,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
 /**
  * i40e_get_tx_pending - how many tx descriptors not processed
  * @tx_ring: the ring of descriptors
+ * @in_sw: use SW variables
  *
  * Since there is no access to the ring head register
  * in XL710, we need to use our local copies
  **/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 {
        u32 head, tail;
 
-       head = i40e_get_head(ring);
-       tail = readl(ring->tail);
+       if (!in_sw) {
+               head = i40e_get_head(ring);
+               tail = readl(ring->tail);
+       } else {
+               head = ring->next_to_clean;
+               tail = ring->next_to_use;
+       }
 
        if (head != tail)
                return (head < tail) ?
@@ -774,7 +781,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
                         */
                        smp_rmb();
                        tx_ring->tx_stats.prev_pkt_ctr =
-                           i40e_get_tx_pending(tx_ring) ? packets : -1;
+                           i40e_get_tx_pending(tx_ring, true) ? packets : -1;
                }
        }
 }
@@ -898,7 +905,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                 * them to be written back in case we stay in NAPI.
                 * In this mode on X722 we do not enable Interrupt.
                 */
-               unsigned int j = i40e_get_tx_pending(tx_ring);
+               unsigned int j = i40e_get_tx_pending(tx_ring, false);
 
                if (budget &&
                    ((j / WB_STRIDE) == 0) && (j > 0) &&
index f75a8fe68fcf0d60a1ab242a58db9ff262f009ed..7f8220e653740888a655f1188887b759c1cbbd84 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -505,7 +506,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
 void i40e_free_rx_resources(struct i40e_ring *rx_ring);
 int i40e_napi_poll(struct napi_struct *napi, int budget);
 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40e_get_tx_pending(struct i40e_ring *ring);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
index b0eed8c0b2f25e88d822c07d8909d2c34587227e..bfb80092b3525e72404a648c521f437063d9e29b 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -1336,6 +1337,9 @@ struct i40e_hw_port_stats {
 #define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE       1024
 #define I40E_SR_CONTROL_WORD_1_SHIFT           0x06
 #define I40E_SR_CONTROL_WORD_1_MASK    (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+#define I40E_PTR_TYPE                          BIT(15)
+#define I40E_SR_OCP_CFG_WORD0                  0x2B
+#define I40E_SR_OCP_ENABLED                    BIT(15)
 
 /* Shadow RAM related */
 #define I40E_SR_SECTOR_SIZE_IN_WORDS   0x800
index 5cca083da93c7c416722fb68c3c90b67d82b9b60..35173cbe80f7b13b7f58604f04137483ec70fbf4 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
@@ -2368,25 +2369,47 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 /**
  * i40e_check_vf_permission
  * @vf: pointer to the VF info
- * @macaddr: pointer to the MAC Address being checked
+ * @al: MAC address list from virtchnl
  *
- * Check if the VF has permission to add or delete unicast MAC address
- * filters and return error code -EPERM if not.  Then check if the
- * address filter requested is broadcast or zero and if so return
- * an invalid MAC address error code.
+ * Check that the given list of MAC addresses is allowed. Will return -EPERM
+ * if any address in the list is not valid. Checks the following conditions:
+ *
+ * 1) broadcast and zero addresses are never valid
+ * 2) unicast addresses are not allowed if the VMM has administratively set
+ *    the VF MAC address, unless the VF is marked as privileged.
+ * 3) There is enough space to add all the addresses.
+ *
+ * Note that to guarantee consistency, it is expected this function be called
+ * while holding the mac_filter_hash_lock, as otherwise the current number of
+ * addresses might not be accurate.
  **/
-static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr)
+static inline int i40e_check_vf_permission(struct i40e_vf *vf,
+                                          struct virtchnl_ether_addr_list *al)
 {
        struct i40e_pf *pf = vf->pf;
-       int ret = 0;
+       int i;
+
+       /* If this VF is not privileged, then we can't add more than a limited
+        * number of addresses. Check to make sure that the additions do not
+        * push us over the limit.
+        */
+       if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+           (vf->num_mac + al->num_elements) > I40E_VC_MAX_MAC_ADDR_PER_VF) {
+               dev_err(&pf->pdev->dev,
+                       "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+               return -EPERM;
+       }
+
+       for (i = 0; i < al->num_elements; i++) {
+               u8 *addr = al->list[i].addr;
+
+               if (is_broadcast_ether_addr(addr) ||
+                   is_zero_ether_addr(addr)) {
+                       dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n",
+                               addr);
+                       return I40E_ERR_INVALID_MAC_ADDR;
+               }
 
-       if (is_broadcast_ether_addr(macaddr) ||
-                  is_zero_ether_addr(macaddr)) {
-               dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", macaddr);
-               ret = I40E_ERR_INVALID_MAC_ADDR;
-       } else if (vf->pf_set_mac && !is_multicast_ether_addr(macaddr) &&
-                  !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
-                  !ether_addr_equal(macaddr, vf->default_lan_addr.addr)) {
                /* If the host VMM administrator has set the VF MAC address
                 * administratively via the ndo_set_vf_mac command then deny
                 * permission to the VF to add or delete unicast MAC addresses.
@@ -2394,16 +2417,16 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr)
                 * The VF may request to set the MAC address filter already
                 * assigned to it so do not return an error in that case.
                 */
-               dev_err(&pf->pdev->dev,
-                       "VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
-               ret = -EPERM;
-       } else if ((vf->num_mac >= I40E_VC_MAX_MAC_ADDR_PER_VF) &&
-                  !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
-               dev_err(&pf->pdev->dev,
-                       "VF is not trusted, switch the VF to trusted to add more functionality\n");
-               ret = -EPERM;
+               if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+                   !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
+                   !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+                       dev_err(&pf->pdev->dev,
+                               "VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
+                       return -EPERM;
+               }
        }
-       return ret;
+
+       return 0;
 }
 
 /**
@@ -2430,11 +2453,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                goto error_param;
        }
 
-       for (i = 0; i < al->num_elements; i++) {
-               ret = i40e_check_vf_permission(vf, al->list[i].addr);
-               if (ret)
-                       goto error_param;
-       }
        vsi = pf->vsi[vf->lan_vsi_idx];
 
        /* Lock once, because all function inside for loop accesses VSI's
@@ -2442,6 +2460,12 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
         */
        spin_lock_bh(&vsi->mac_filter_hash_lock);
 
+       ret = i40e_check_vf_permission(vf, al);
+       if (ret) {
+               spin_unlock_bh(&vsi->mac_filter_hash_lock);
+               goto error_param;
+       }
+
        /* add new addresses to the list */
        for (i = 0; i < al->num_elements; i++) {
                struct i40e_mac_filter *f;
@@ -3062,7 +3086,7 @@ static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
 
        for (i = 0; i < vf->num_tc ; i++) {
                vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
-               if (vsi->seid == seid)
+               if (vsi && vsi->seid == seid)
                        return vsi;
        }
        return NULL;
@@ -3146,8 +3170,8 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
                dev_info(&pf->pdev->dev,
                         "VF %d: Invalid input, can't apply cloud filter\n",
                         vf->vf_id);
-                       aq_ret = I40E_ERR_PARAM;
-                       goto err;
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
        }
 
        memset(&cfilter, 0, sizeof(cfilter));
@@ -3741,6 +3765,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
        int ret = 0;
        struct hlist_node *h;
        int bkt;
+       u8 i;
 
        /* validate the request */
        if (vf_id >= pf->num_alloc_vfs) {
@@ -3752,6 +3777,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 
        vf = &(pf->vf[vf_id]);
        vsi = pf->vsi[vf->lan_vsi_idx];
+
+       /* When the VF is resetting wait until it is done.
+        * It can take up to 200 milliseconds,
+        * but wait for up to 300 milliseconds to be safe.
+        */
+       for (i = 0; i < 15; i++) {
+               if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states))
+                       break;
+               msleep(20);
+       }
        if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
                dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
                        vf_id);
index 6852599b2379c9e95ac634f0aa65a9a3a6b21e16..57f727bb9e36e154d817018b5b6e32c9f1dee165 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
index a393f4a07f06dbf6f9ec18446f6d7f0c4c76ca23..1e89c5487676a06543b265f6ace4feb22ebbcbc1 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index d1aab6b8bfb1dd1a360e6e47ec560f0578b86ac9..6fd677efa9da0ce414c1dda5263af3e65c0e9d6f 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index e0bfaa3d4a21341efc4bcddf2b23290e9492a5e6..a7137c1652567e14b76972b05c3d836b924d760a 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 815de8d9c3fb2a8f008e7d6e9382053a1adf0c80..439e718820495a27b819c3ebb91aabb1e55186d6 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 8e6a6dd9212bb0812f42378ad208ccd451e43ee4..7e0fddd8af36e83700481bfd3dba076b11471835 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 67bf5cebb76f06f93fa452641057c8f756c25f9d..67140cdbcd7ae91aa433f195a13af95d14fadd2c 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 0469e4bfd3ec2437d2fc7eebaaeb6ff199ab8537..352dd3f3eb6a3c58a1b17bbeb2ee9f5c7a9c8b66 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 00ed24bfce1347f0b80b077139a754e58c6da376..7432596164f41bcf69ce1482b7d77bf309f51b97 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index a5d79877354cc6be02cd7777ffd1577b468f9cc3..ddac0e4908d3850f5f841f12bb9b4540f8883abf 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index a90737786c3433a223330e6902e52adb13014218..8668ad6c1a6552d95fb3252ed4981a3a84d68f64 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 47c429931a5784e11df8e509139cd310ab264a0d..72501bd0f1a9c36753d9772f02e8711dae78009f 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 10febcfd7cd82655254df47d3b5cc8885faf82cc..c9c9356597583c0465a0c57b2cf391fbbda85612 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 5b222246e08b3ac6b2669ae0df84c3f3eb36af51..0d7993ecb99a6bce68a3c79913bd9e4fac55d842 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 9a5100b2b7c74fb4da000ab3182b3dcf21efe4ae..ece01dd12a3c43a2f872248986a913d47c3d9277 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel(R) 40-10 Gigabit Ethernet Virtual Function Driver
index eb8f3e327f6baf5742f0d8a1c725b736ac5827ad..12bd937861e794185295429f8d6f820ce5da94d6 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
@@ -196,7 +197,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
                         */
                        smp_rmb();
                        tx_ring->tx_stats.prev_pkt_ctr =
-                         i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+                         i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
                }
        }
 }
index 9129447d079b161c115dd2b92d2eae47bd765b2e..5790897eae2e722aafe53eb6ff4073d7799dcbcb 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 54951c84a4817f056c5bcc0807a799dc399b7127..449de4b0058e6593c00979da82671c77a67c26aa 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index e46555ad712218ccf6d9b66e47c299cb31730cb0..3a7a1e77bf39f67013f5660de14c1cf411b28732 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
@@ -280,13 +281,10 @@ struct i40evf_adapter {
 
        u32 flags;
 #define I40EVF_FLAG_RX_CSUM_ENABLED            BIT(0)
-#define I40EVF_FLAG_IMIR_ENABLED               BIT(1)
-#define I40EVF_FLAG_MQ_CAPABLE                 BIT(2)
 #define I40EVF_FLAG_PF_COMMS_FAILED            BIT(3)
 #define I40EVF_FLAG_RESET_PENDING              BIT(4)
 #define I40EVF_FLAG_RESET_NEEDED               BIT(5)
 #define I40EVF_FLAG_WB_ON_ITR_CAPABLE          BIT(6)
-#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE     BIT(7)
 #define I40EVF_FLAG_ADDR_SET_BY_PF             BIT(8)
 #define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED   BIT(9)
 #define I40EVF_FLAG_CLIENT_NEEDS_OPEN          BIT(10)
index e6793255de0b38ef36fe1663151d8f35b3369693..dc4cde274fb82872a7d3b4dc7021c9e909e40e86 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
index 4955ce3ab6a2c82783653f872e9030d1f061e5d1..5f71532be7f143cbaea948e7ece7e82bc3d19280 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
@@ -815,13 +816,11 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
        if (!macaddr)
                return NULL;
 
-       spin_lock_bh(&adapter->mac_vlan_list_lock);
-
        f = i40evf_find_filter(adapter, macaddr);
        if (!f) {
                f = kzalloc(sizeof(*f), GFP_ATOMIC);
                if (!f)
-                       goto clearout;
+                       return f;
 
                ether_addr_copy(f->macaddr, macaddr);
 
@@ -832,8 +831,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
                f->remove = false;
        }
 
-clearout:
-       spin_unlock_bh(&adapter->mac_vlan_list_lock);
        return f;
 }
 
@@ -868,9 +865,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
                adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
        }
 
+       f = i40evf_add_filter(adapter, addr->sa_data);
+
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-       f = i40evf_add_filter(adapter, addr->sa_data);
        if (f) {
                ether_addr_copy(hw->mac.addr, addr->sa_data);
                ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
@@ -2493,6 +2491,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
        u16 addr_type = 0;
        u16 n_proto = 0;
        int i = 0;
+       struct virtchnl_filter *vf = &filter->f;
 
        if (f->dissector->used_keys &
            ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
@@ -2540,7 +2539,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                        return -EINVAL;
                if (n_proto == ETH_P_IPV6) {
                        /* specify flow type as TCP IPv6 */
-                       filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW;
+                       vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
                }
 
                if (key->ip_proto != IPPROTO_TCP) {
@@ -2585,9 +2584,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                            is_multicast_ether_addr(key->dst)) {
                                /* set the mask if a valid dst_mac address */
                                for (i = 0; i < ETH_ALEN; i++)
-                                       filter->f.mask.tcp_spec.dst_mac[i] |=
-                                                                       0xff;
-                               ether_addr_copy(filter->f.data.tcp_spec.dst_mac,
+                                       vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+                               ether_addr_copy(vf->data.tcp_spec.dst_mac,
                                                key->dst);
                        }
 
@@ -2596,9 +2594,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                            is_multicast_ether_addr(key->src)) {
                                /* set the mask if a valid dst_mac address */
                                for (i = 0; i < ETH_ALEN; i++)
-                                       filter->f.mask.tcp_spec.src_mac[i] |=
-                                                                       0xff;
-                               ether_addr_copy(filter->f.data.tcp_spec.src_mac,
+                                       vf->mask.tcp_spec.src_mac[i] |= 0xff;
+                               ether_addr_copy(vf->data.tcp_spec.src_mac,
                                                key->src);
                }
        }
@@ -2622,8 +2619,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                                return I40E_ERR_CONFIG;
                        }
                }
-               filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
-               filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+               vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+               vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
        }
 
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
@@ -2670,14 +2667,12 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                        return I40E_ERR_CONFIG;
                }
                if (key->dst) {
-                       filter->f.mask.tcp_spec.dst_ip[0] |=
-                                                       cpu_to_be32(0xffffffff);
-                       filter->f.data.tcp_spec.dst_ip[0] = key->dst;
+                       vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+                       vf->data.tcp_spec.dst_ip[0] = key->dst;
                }
                if (key->src) {
-                       filter->f.mask.tcp_spec.src_ip[0] |=
-                                                       cpu_to_be32(0xffffffff);
-                       filter->f.data.tcp_spec.src_ip[0] = key->src;
+                       vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+                       vf->data.tcp_spec.src_ip[0] = key->src;
                }
        }
 
@@ -2710,22 +2705,14 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
                        field_flags |= I40EVF_CLOUD_FIELD_IIP;
 
-               if (key->dst.s6_addr) {
-                       for (i = 0; i < 4; i++)
-                               filter->f.mask.tcp_spec.dst_ip[i] |=
-                                                       cpu_to_be32(0xffffffff);
-                       memcpy(&filter->f.data.tcp_spec.dst_ip,
-                              &key->dst.s6_addr32,
-                              sizeof(filter->f.data.tcp_spec.dst_ip));
-               }
-               if (key->src.s6_addr) {
-                       for (i = 0; i < 4; i++)
-                               filter->f.mask.tcp_spec.src_ip[i] |=
-                                                       cpu_to_be32(0xffffffff);
-                       memcpy(&filter->f.data.tcp_spec.src_ip,
-                              &key->src.s6_addr32,
-                              sizeof(filter->f.data.tcp_spec.src_ip));
-               }
+               for (i = 0; i < 4; i++)
+                       vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+               memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+                      sizeof(vf->data.tcp_spec.dst_ip));
+               for (i = 0; i < 4; i++)
+                       vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+               memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+                      sizeof(vf->data.tcp_spec.src_ip));
        }
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
                struct flow_dissector_key_ports *key =
@@ -2757,16 +2744,16 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
                        }
                }
                if (key->dst) {
-                       filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
-                       filter->f.data.tcp_spec.dst_port = key->dst;
+                       vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+                       vf->data.tcp_spec.dst_port = key->dst;
                }
 
                if (key->src) {
-                       filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
-                       filter->f.data.tcp_spec.src_port = key->dst;
+                       vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+                       vf->data.tcp_spec.src_port = key->src;
                }
        }
-       filter->f.field_flags = field_flags;
+       vf->field_flags = field_flags;
 
        return 0;
 }
@@ -2805,14 +2792,7 @@ static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
 {
        int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
        struct i40evf_cloud_filter *filter = NULL;
-       int err = 0, count = 50;
-
-       while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
-                               &adapter->crit_section)) {
-               udelay(1);
-               if (--count == 0)
-                       return -EINVAL;
-       }
+       int err = -EINVAL, count = 50;
 
        if (tc < 0) {
                dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
@@ -2820,10 +2800,16 @@ static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
        }
 
        filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-       if (!filter) {
-               err = -ENOMEM;
-               goto clearout;
+       if (!filter)
+               return -ENOMEM;
+
+       while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
+                               &adapter->crit_section)) {
+               if (--count == 0)
+                       goto err;
+               udelay(1);
        }
+
        filter->cookie = cls_flower->cookie;
 
        /* set the mask to all zeroes to begin with */
@@ -2848,7 +2834,7 @@ static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
 err:
        if (err)
                kfree(filter);
-clearout:
+
        clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
        return err;
 }
@@ -3040,7 +3026,12 @@ static int i40evf_open(struct net_device *netdev)
        if (err)
                goto err_req_irq;
 
+       spin_lock_bh(&adapter->mac_vlan_list_lock);
+
        i40evf_add_filter(adapter, adapter->hw.mac.addr);
+
+       spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
        i40evf_configure(adapter);
 
        i40evf_up_complete(adapter);
index 6134b61e0938525bb421037d3ef7fe37a246203f..26a59890532f725ea6d4c951db00fc6295f5ec03 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
@@ -1048,24 +1049,28 @@ void i40evf_disable_channels(struct i40evf_adapter *adapter)
  * Print the cloud filter
  **/
 static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
-                                     struct virtchnl_filter f)
+                                     struct virtchnl_filter *f)
 {
-       switch (f.flow_type) {
+       switch (f->flow_type) {
        case VIRTCHNL_TCP_V4_FLOW:
                dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
-                        &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
-                        ntohs(f.data.tcp_spec.vlan_id),
-                        &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0],
-                        ntohs(f.data.tcp_spec.dst_port),
-                        ntohs(f.data.tcp_spec.src_port));
+                        &f->data.tcp_spec.dst_mac,
+                        &f->data.tcp_spec.src_mac,
+                        ntohs(f->data.tcp_spec.vlan_id),
+                        &f->data.tcp_spec.dst_ip[0],
+                        &f->data.tcp_spec.src_ip[0],
+                        ntohs(f->data.tcp_spec.dst_port),
+                        ntohs(f->data.tcp_spec.src_port));
                break;
        case VIRTCHNL_TCP_V6_FLOW:
                dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
-                        &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
-                        ntohs(f.data.tcp_spec.vlan_id),
-                        &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip,
-                        ntohs(f.data.tcp_spec.dst_port),
-                        ntohs(f.data.tcp_spec.src_port));
+                        &f->data.tcp_spec.dst_mac,
+                        &f->data.tcp_spec.src_mac,
+                        ntohs(f->data.tcp_spec.vlan_id),
+                        &f->data.tcp_spec.dst_ip,
+                        &f->data.tcp_spec.src_ip,
+                        ntohs(f->data.tcp_spec.dst_port),
+                        ntohs(f->data.tcp_spec.src_port));
                break;
        }
 }
@@ -1303,7 +1308,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                                                 i40evf_stat_str(&adapter->hw,
                                                                 v_retval));
                                        i40evf_print_cloud_filter(adapter,
-                                                                 cf->f);
+                                                                 &cf->f);
                                        list_del(&cf->list);
                                        kfree(cf);
                                        adapter->num_cloud_filters--;
@@ -1322,7 +1327,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                                                 i40evf_stat_str(&adapter->hw,
                                                                 v_retval));
                                        i40evf_print_cloud_filter(adapter,
-                                                                 cf->f);
+                                                                 &cf->f);
                                }
                        }
                        }
index 5bcb2de75933ecad8866b6988f6c5d4cf6ad34c8..c48583e98ac1b45b2f39f31fe721a8c4c5f1dcd6 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel 82575 PCI-Express Ethernet Linux driver
index c37cc8bccf47760481481b09dcb61a48de1ed2fc..dd9b6cac220d40bcfc837a8317138e9089fdf6ca 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2015 Intel Corporation.
  *
index acf06051e111cb9dd1be2386bd84ce8ca6ad220d..e53ebe97d709d743d9436f96416cf1aea2deda09 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 83cabff1e0ab68f025db099658549796dfc68535..98534f765e0eae25e2975cbc027fec0831d44583 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 6c9485ab4b574a83ca067d3adef34b389794cb2b..ff835e1e853d9baa3486a0856d81a7523c699bb1 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 07d48f2e33699e45ea385e148dd4771a4ac04e1f..6f548247e6d86226d228abb80064657888f8a9b1 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index b2964a2a60b1ac2df4cb4465c47ea7c80b018353..56f015ccb2060b7a77f0046080bf0346c30bf027 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 5eff82678f0ba41f7e171f2101b856a594b2264b..298afa0d9159b04d4843e5f896eca3cb274873a9 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 90c8893c3eeda728f1f18979088d2ecd7f5431d6..04d80c765aeea291db5ae67999a1a9cd101b3329 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index bffd58f7b2a1d8e1e49850f37dbf97916a876772..ef42f1689b3b43ec96aef4bfaab56d3324f3658f 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index a62b08e1572e440fb15afd49b06a315ed753f943..4f0ecd28354d18a752a47cf2ddb8716f9fa179d6 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 3582c5cf88439713a2cf520aed8e85c1cc5c5ba4..e4596f151cd437d5a56522b87f596676b10777dd 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  * This program is free software; you can redistribute it and/or modify it
index febc9cdb739125174e143b0159feb0b529cb5ac6..dde68cd54a5307384d91b8d9774be4d7de1d9968 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 413025bdcb50feb68c8af9bcb398bbae37811f80..4ec61243da82e2b14e595f3ab85db6b1b4b46269 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2015 Intel Corporation.
  *
index 9b622b33bb5accb6c33f0059c98fc64e9e2f4d8e..856d2cda0643312491b20214b6a32e42552324a2 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 568c96842f2818b1e72de9fd0a19a2625070e363..e8fa8c6530e050460bb0ffd1a9811b03fd043e20 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 1c6b8d9176a8e0f39815c8fb7d8681a644db2186..8dbc399b345e54903cb0bf0704367a0ca57f8c64 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
@@ -109,6 +110,7 @@ struct vf_data_storage {
        u16 pf_qos;
        u16 tx_rate;
        bool spoofchk_enabled;
+       bool trusted;
 };
 
 /* Number of unicast MAC filters reserved for the PF in the RAR registers */
index 606e6761758f7b2bde7fdae1e7c54ae713d007fa..e77ba0d5866d0cc64aae1aae57568638dc95e751 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index 44b6a68f1af727136271132014b1efa412ee7e32..41b306fb90f8cbb2f4dcc1c49b178235d294be81 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
index b88fae78536951cd76506a78cb19e044762fc943..c1c0bc30a16d8196f8319a21dcb1f8d13ef4ac52 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
@@ -190,6 +191,8 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
                                   bool setting);
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf,
+                               bool setting);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
                                 struct ifla_vf_info *ivi);
 static void igb_check_vf_rate_limit(struct igb_adapter *);
@@ -774,8 +777,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg)
        if (!(~value) && (!reg || !(~readl(hw_addr)))) {
                struct net_device *netdev = igb->netdev;
                hw->hw_addr = NULL;
-               netif_device_detach(netdev);
-               netdev_err(netdev, "PCIe link lost, device now detached\n");
+               netdev_err(netdev, "PCIe link lost\n");
        }
 
        return value;
@@ -2527,6 +2529,7 @@ static const struct net_device_ops igb_netdev_ops = {
        .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
        .ndo_set_vf_rate        = igb_ndo_set_vf_bw,
        .ndo_set_vf_spoofchk    = igb_ndo_set_vf_spoofchk,
+       .ndo_set_vf_trust       = igb_ndo_set_vf_trust,
        .ndo_get_vf_config      = igb_ndo_get_vf_config,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = igb_netpoll,
@@ -5747,7 +5750,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
                struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
 
-               if (adapter->tstamp_config.tx_type & HWTSTAMP_TX_ON &&
+               if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
                    !test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS,
                                           &adapter->state)) {
                        skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
@@ -6383,6 +6386,9 @@ static int igb_vf_configure(struct igb_adapter *adapter, int vf)
        /* By default spoof check is enabled for all VFs */
        adapter->vf_data[vf].spoofchk_enabled = true;
 
+       /* By default VFs are not trusted */
+       adapter->vf_data[vf].trusted = false;
+
        return 0;
 }
 
@@ -6940,13 +6946,13 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf,
                }
                break;
        case E1000_VF_MAC_FILTER_ADD:
-               if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) {
+               if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+                   !vf_data->trusted) {
                        dev_warn(&pdev->dev,
                                 "VF %d requested MAC filter but is administratively denied\n",
                                 vf);
                        return -EINVAL;
                }
-
                if (!is_valid_ether_addr(addr)) {
                        dev_warn(&pdev->dev,
                                 "VF %d attempted to set invalid MAC filter\n",
@@ -6998,7 +7004,8 @@ static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
        int ret = 0;
 
        if (!info) {
-               if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) {
+               if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+                   !vf_data->trusted) {
                        dev_warn(&pdev->dev,
                                 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
                                 vf);
@@ -8934,6 +8941,22 @@ static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
        return 0;
 }
 
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
+{
+       struct igb_adapter *adapter = netdev_priv(netdev);
+
+       if (vf >= adapter->vfs_allocated_count)
+               return -EINVAL;
+       if (adapter->vf_data[vf].trusted == setting)
+               return 0;
+
+       adapter->vf_data[vf].trusted = setting;
+
+       dev_info(&adapter->pdev->dev, "VF %u is %strusted\n",
+                vf, setting ? "" : "not ");
+       return 0;
+}
+
 static int igb_ndo_get_vf_config(struct net_device *netdev,
                                 int vf, struct ifla_vf_info *ivi)
 {
@@ -8947,6 +8970,7 @@ static int igb_ndo_get_vf_config(struct net_device *netdev,
        ivi->vlan = adapter->vf_data[vf].pf_vlan;
        ivi->qos = adapter->vf_data[vf].pf_qos;
        ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
+       ivi->trusted = adapter->vf_data[vf].trusted;
        return 0;
 }
 
index 0746b19ec6d3765ac43b44c850c077050d9f02a0..7454b9895a651c0b3ef47f5dab6a095b5355d4db 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /* PTP Hardware Clock (PHC) driver for the Intel 82576 and 82580
  *
  * Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com>
index 044b0ad5fcb948298861bdafb905b9797a7f1f3f..efe29dae384ada199110505a4a4afa816418ca33 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel(R) 82576 Virtual Function Linux driver
index f1789d192e2425e2fa37ca4d0550da3afde3541c..04bcfec0641b9cef2968a4f7b10ac33135950785 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
index a127688e83e64c244e0c381e5e11f4748d2d4d42..ca39e3cccaeb738fd88c5cf3fa7b6be3e4536294 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
index bf69f01f8467754cea8ecdfd7f2f8bc68c82dc21..f5bf248e22eb0aea3cd49cb72168218dbc7611c6 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
index c9a441632e9f07ff855b2aae040db5c716000a29..9195884096f87a269acfa737a6542f5d15b47ab6 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
index 30d58c4a444ed8bec2089ce4c2c0951706b80926..479b062fe9eed3a997a5bbf45c3de32f4e9fb324 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
index 4214c1519a879c9cb70cd1791b361657a5615d1e..e2b7502f1953899d5cd8f65e6d40d8adc7c1358a 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
index 86a7c120b5740555d8e904e7678351fadc96c833..614e52409f11d0448c3aa2cc9639419203fd9da7 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
index 9577ccf4b26ad841ac623c2235ed55c835356c09..bfe8d8297b2e2daac6fd80b258735e68a557338b 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
index d213eefb61698874b87abb915b13435347f3448d..193b50026246fed1b0750db99a201fe8ce843744 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
index 0b20c5e62ffe536ee8e1b8528363e08a6da3cd78..1b42dd554dd274642266308ca9dad1272e9e1633 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel PRO/10GbE Linux driver
index 1180cd59b5708cc45ddd0724cceb159757456325..92022841755f249638bbd9aa76c303776bd1aa2b 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
index 5680f64314b83df9c6f29021cec1a4dd47c1ace6..475297a810fe6a42cbfeec6bd58ea95c7e76fce3 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
index 0bd5d72e1af5b6be4873baf31431e7f5ad09a347..19f36d87ef6198a4f61cb02288e4ff4e025d3f60 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
index 32c1b302d791d74a46ae94c5926d6345b0409c47..24e849902d600ace0af72f50ee58f49487abac2d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
index 8fc90519223149f71230d5f74b88c27c25d1afc9..b1710379192e4f96f2e7625ca415f30ffc19ad3a 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
index 8319465eb38df1948230cf488d40feb75bd442dc..4cd96c88cb5d1547235191ad31cc1294b3b1127d 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel 10 Gigabit PCI Express Linux driver
index c1e3a0039ea5ab71c59d44af76a24d63122bacd9..4f08c712e58e1e4e03213791c0e246e505c87a2f 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index a0ebd9ecf243b9f73fc0bd990b178359706881b0..cb0fe5fedb33b8c2482a8ef5ed3b70ef87dddcaa 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index 4dfc81dbee4b4a145fc6ddc68c642a0b9f6602e2..66a74f4651e8d980803857159887dd68f53c2f0b 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index 61188f343955106e96fa0b3af4e3984f9d935e4f..633be93f3dbbe8454c7cc832d2b268bec1507ef4 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index 4d4c02366cb3afe10791a5d5848ccb4a8c7f23b0..2b311382167a8f2112cfb3b01a210a9c559010cc 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
@@ -153,6 +154,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
 void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw,
                                      ixgbe_link_speed speed);
 
+#define IXGBE_FAILED_READ_RETRIES 5
 #define IXGBE_FAILED_READ_REG 0xffffffffU
 #define IXGBE_FAILED_READ_CFG_DWORD 0xffffffffU
 #define IXGBE_FAILED_READ_CFG_WORD 0xffffU
index fc0a2dd5249956bf19ba836bad5c2d92d61b1b60..73b6362d4327d64ec5cf332303d2a8424cfd51b4 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index f94c7e82a30b24a1d9001d1f32915014690f468f..085130626330695891b0be3ea3e06a138c7d28ff 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index 3164f5453b8fc92b94569ab6d1b17af45329fac2..7edce607f901080d0d8c30cabc28370f7c6c01eb 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index 90c370230e2001250752725ee7d10c0ed71de341..fa030f0abc18f2662bf58e1f2415ccd20d618da5 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index 221f158034808bdeb29738c22047c3cd13f576b9..c0e6ab42e0e1dac088f0496fc3fbbc0a65ede993 100644 (file)
@@ -97,6 +97,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
        {"tx_heartbeat_errors", IXGBE_NETDEV_STAT(tx_heartbeat_errors)},
        {"tx_timeout_count", IXGBE_STAT(tx_timeout_count)},
        {"tx_restart_queue", IXGBE_STAT(restart_queue)},
+       {"rx_length_errors", IXGBE_STAT(stats.rlec)},
        {"rx_long_length_errors", IXGBE_STAT(stats.roc)},
        {"rx_short_length_errors", IXGBE_STAT(stats.ruc)},
        {"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)},
@@ -3059,6 +3060,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 
                for (i = 0; i < reta_entries; i++)
                        adapter->rss_indir_tbl[i] = indir[i];
+
+               ixgbe_store_reta(adapter);
        }
 
        /* Fill out the rss hash key */
@@ -3067,8 +3070,6 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
                ixgbe_store_key(adapter);
        }
 
-       ixgbe_store_reta(adapter);
-
        return 0;
 }
 
index 38385876effb0c9a3d8659afebc5f469575f55f4..cf19199015143f6c1efbb507b91df8433133f81e 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index 93eacddb6704d4ebc7ed8b55dc2121adda7b5e31..68af127987bcbc65981ebbd7e7be5512f4b63bde 100644 (file)
@@ -423,15 +423,21 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
        const char aes_gcm_name[] = "rfc4106(gcm(aes))";
        int key_len;
 
-       if (xs->aead) {
-               key_data = &xs->aead->alg_key[0];
-               key_len = xs->aead->alg_key_len;
-               alg_name = xs->aead->alg_name;
-       } else {
+       if (!xs->aead) {
                netdev_err(dev, "Unsupported IPsec algorithm\n");
                return -EINVAL;
        }
 
+       if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) {
+               netdev_err(dev, "IPsec offload requires %d bit authentication\n",
+                          IXGBE_IPSEC_AUTH_BITS);
+               return -EINVAL;
+       }
+
+       key_data = &xs->aead->alg_key[0];
+       key_len = xs->aead->alg_key_len;
+       alg_name = xs->aead->alg_name;
+
        if (strcmp(alg_name, aes_gcm_name)) {
                netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
                           aes_gcm_name);
@@ -718,23 +724,10 @@ static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
        return true;
 }
 
-/**
- * ixgbe_ipsec_free - called by xfrm garbage collections
- * @xs: pointer to transformer state struct
- *
- * We don't have any garbage to collect, so we shouldn't bother
- * implementing this function, but the XFRM code doesn't check for
- * existence before calling the API callback.
- **/
-static void ixgbe_ipsec_free(struct xfrm_state *xs)
-{
-}
-
 static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
        .xdo_dev_state_add = ixgbe_ipsec_add_sa,
        .xdo_dev_state_delete = ixgbe_ipsec_del_sa,
        .xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
-       .xdo_dev_state_free = ixgbe_ipsec_free,
 };
 
 /**
@@ -781,13 +774,40 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
 
        first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;
 
-       itd->flags = 0;
        if (xs->id.proto == IPPROTO_ESP) {
+
                itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
                              IXGBE_ADVTXD_TUCMD_L4T_TCP;
                if (first->protocol == htons(ETH_P_IP))
                        itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
-               itd->trailer_len = xs->props.trailer_len;
+
+               /* The actual trailer length is authlen (16 bytes) plus
+                * 2 bytes for the proto and the padlen values, plus
+                * padlen bytes of padding.  This ends up not the same
+                * as the static value found in xs->props.trailer_len (21).
+                *
+                * ... but if we're doing GSO, don't bother as the stack
+                * doesn't add a trailer for those.
+                */
+               if (!skb_is_gso(first->skb)) {
+                       /* The "correct" way to get the auth length would be
+                        * to use
+                        *    authlen = crypto_aead_authsize(xs->data);
+                        * but since we know we only have one size to worry
+                        * about * we can let the compiler use the constant
+                        * and save us a few CPU cycles.
+                        */
+                       const int authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+                       struct sk_buff *skb = first->skb;
+                       u8 padlen;
+                       int ret;
+
+                       ret = skb_copy_bits(skb, skb->len - (authlen + 2),
+                                           &padlen, 1);
+                       if (unlikely(ret))
+                               return 0;
+                       itd->trailer_len = authlen + 2 + padlen;
+               }
        }
        if (tsa->encrypt)
                itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
@@ -909,8 +929,13 @@ void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
        ixgbe_ipsec_clear_hw_tables(adapter);
 
        adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
-       adapter->netdev->features |= NETIF_F_HW_ESP;
-       adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+#define IXGBE_ESP_FEATURES     (NETIF_F_HW_ESP | \
+                                NETIF_F_HW_ESP_TX_CSUM | \
+                                NETIF_F_GSO_ESP)
+
+       adapter->netdev->features |= IXGBE_ESP_FEATURES;
+       adapter->netdev->hw_enc_features |= IXGBE_ESP_FEATURES;
 
        return;
 
index da3ce7849e851e0237d12c62f7c6083c3d8aca2d..4f099f516645d072a3ba03246075f7ec5e99e54d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
@@ -32,6 +33,7 @@
 #define IXGBE_IPSEC_MAX_RX_IP_COUNT    128
 #define IXGBE_IPSEC_BASE_RX_INDEX      0
 #define IXGBE_IPSEC_BASE_TX_INDEX      IXGBE_IPSEC_MAX_SA_COUNT
+#define IXGBE_IPSEC_AUTH_BITS          128
 
 #define IXGBE_RXTXIDX_IPS_EN           0x00000001
 #define IXGBE_RXIDX_TBL_SHIFT          1
index 4242f0213e46f80ea70679295df9fb71a46f791a..ed4cbe94c3554660a024bcfac653caef0deffadd 100644 (file)
@@ -58,7 +58,6 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
                return false;
 
        /* start at VMDq register offset for SR-IOV enabled setups */
-       pool = 0;
        reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
        for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
                /* If we are greater than indices move to next pool */
index 0da5aa2c8aba53ae6c7b461aa01f803e9c215406..afadba99f7b8286e287a2a88703026319f78924b 100644 (file)
@@ -353,23 +353,32 @@ static void ixgbe_remove_adapter(struct ixgbe_hw *hw)
                ixgbe_service_event_schedule(adapter);
 }
 
-static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
+static u32 ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
 {
+       u8 __iomem *reg_addr;
        u32 value;
+       int i;
 
-       /* The following check not only optimizes a bit by not
-        * performing a read on the status register when the
-        * register just read was a status register read that
-        * returned IXGBE_FAILED_READ_REG. It also blocks any
-        * potential recursion.
+       reg_addr = READ_ONCE(hw->hw_addr);
+       if (ixgbe_removed(reg_addr))
+               return IXGBE_FAILED_READ_REG;
+
+       /* Register read of 0xFFFFFFF can indicate the adapter has been removed,
+        * so perform several status register reads to determine if the adapter
+        * has been removed.
         */
-       if (reg == IXGBE_STATUS) {
-               ixgbe_remove_adapter(hw);
-               return;
+       for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) {
+               value = readl(reg_addr + IXGBE_STATUS);
+               if (value != IXGBE_FAILED_READ_REG)
+                       break;
+               mdelay(3);
        }
-       value = ixgbe_read_reg(hw, IXGBE_STATUS);
+
        if (value == IXGBE_FAILED_READ_REG)
                ixgbe_remove_adapter(hw);
+       else
+               value = readl(reg_addr + reg);
+       return value;
 }
 
 /**
@@ -415,7 +424,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
 writes_completed:
        value = readl(reg_addr + reg);
        if (unlikely(value == IXGBE_FAILED_READ_REG))
-               ixgbe_check_remove(hw, reg);
+               value = ixgbe_check_remove(hw, reg);
        return value;
 }
 
@@ -1620,7 +1629,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
        bi->dma = dma;
        bi->page = page;
        bi->page_offset = ixgbe_rx_offset(rx_ring);
-       bi->pagecnt_bias = 1;
+       page_ref_add(page, USHRT_MAX - 1);
+       bi->pagecnt_bias = USHRT_MAX;
        rx_ring->rx_stats.alloc_rx_page++;
 
        return true;
@@ -1888,6 +1898,14 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
                                     ixgbe_rx_pg_size(rx_ring),
                                     DMA_FROM_DEVICE,
                                     IXGBE_RX_DMA_ATTR);
+       } else if (ring_uses_build_skb(rx_ring)) {
+               unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
+
+               dma_sync_single_range_for_cpu(rx_ring->dev,
+                                             IXGBE_CB(skb)->dma,
+                                             offset,
+                                             skb_headlen(skb),
+                                             DMA_FROM_DEVICE);
        } else {
                struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
 
@@ -2022,8 +2040,8 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
         * the pagecnt_bias and page count so that we fully restock the
         * number of references the driver holds.
         */
-       if (unlikely(!pagecnt_bias)) {
-               page_ref_add(page, USHRT_MAX);
+       if (unlikely(pagecnt_bias == 1)) {
+               page_ref_add(page, USHRT_MAX - 1);
                rx_buffer->pagecnt_bias = USHRT_MAX;
        }
 
@@ -7703,7 +7721,8 @@ static void ixgbe_service_task(struct work_struct *work)
 
        if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
                ixgbe_ptp_overflow_check(adapter);
-               ixgbe_ptp_rx_hang(adapter);
+               if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
+                       ixgbe_ptp_rx_hang(adapter);
                ixgbe_ptp_tx_hang(adapter);
        }
 
@@ -7712,7 +7731,8 @@ static void ixgbe_service_task(struct work_struct *work)
 
 static int ixgbe_tso(struct ixgbe_ring *tx_ring,
                     struct ixgbe_tx_buffer *first,
-                    u8 *hdr_len)
+                    u8 *hdr_len,
+                    struct ixgbe_ipsec_tx_data *itd)
 {
        u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
        struct sk_buff *skb = first->skb;
@@ -7726,6 +7746,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
                unsigned char *hdr;
        } l4;
        u32 paylen, l4_offset;
+       u32 fceof_saidx = 0;
        int err;
 
        if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -7751,13 +7772,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
        if (ip.v4->version == 4) {
                unsigned char *csum_start = skb_checksum_start(skb);
                unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+               int len = csum_start - trans_start;
 
                /* IP header will have to cancel out any data that
-                * is not a part of the outer IP header
+                * is not a part of the outer IP header, so set to
+                * a reverse csum if needed, else init check to 0.
                 */
-               ip.v4->check = csum_fold(csum_partial(trans_start,
-                                                     csum_start - trans_start,
-                                                     0));
+               ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
+                                          csum_fold(csum_partial(trans_start,
+                                                                 len, 0)) : 0;
                type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
 
                ip.v4->tot_len = 0;
@@ -7788,12 +7811,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
        mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT;
        mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
 
+       fceof_saidx |= itd->sa_idx;
+       type_tucmd |= itd->flags | itd->trailer_len;
+
        /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
        vlan_macip_lens = l4.hdr - ip.hdr;
        vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT;
        vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-       ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd,
+       ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd,
                          mss_l4len_idx);
 
        return 1;
@@ -7855,10 +7881,8 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
        vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
        vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-       if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) {
-               fceof_saidx |= itd->sa_idx;
-               type_tucmd |= itd->flags | itd->trailer_len;
-       }
+       fceof_saidx |= itd->sa_idx;
+       type_tucmd |= itd->flags | itd->trailer_len;
 
        ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0);
 }
@@ -8486,7 +8510,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
        if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
                goto out_drop;
 #endif
-       tso = ixgbe_tso(tx_ring, first, &hdr_len);
+       tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
        if (tso < 0)
                goto out_drop;
        else if (!tso)
@@ -9895,15 +9919,15 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
 
        /* We can only support IPV4 TSO in tunnels if we can mangle the
         * inner IP ID field, so strip TSO if MANGLEID is not supported.
+        * IPsec offoad sets skb->encapsulation but still can handle
+        * the TSO, so it's the exception.
         */
-       if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
-               features &= ~NETIF_F_TSO;
-
-#ifdef CONFIG_XFRM_OFFLOAD
-       /* IPsec offload doesn't get along well with others *yet* */
-       if (skb->sp)
-               features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM);
+       if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
+#ifdef CONFIG_XFRM
+               if (!skb->sp)
 #endif
+                       features &= ~NETIF_F_TSO;
+       }
 
        return features;
 }
index 811cb4f64a5bd60f102886204a1070d3b40298f3..c4628b6635903f713ad1f5de012a5ff0398c39f3 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index 538a1c5475b685c6280a1308a3e9e223fd45535a..72446644f9fa0efd45eb79c3fddd3567b8f51433 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel 10 Gigabit PCI Express Linux drive
index b0cac961df3bf3235155a11684d633299f83e179..d6a7e77348c5409047e5e5918f752f06e64c4879 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index 27a70a52f3c9d9ee82b1f1e114eab4a3faa11e3c..008aa073a679d27f3d907015c68acb8fd824fe65 100644 (file)
@@ -831,7 +831,11 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
        IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg);
 
        /* force drop enable for all VF Rx queues */
-       ixgbe_write_qde(adapter, vf, IXGBE_QDE_ENABLE);
+       reg = IXGBE_QDE_ENABLE;
+       if (adapter->vfinfo[vf].pf_vlan)
+               reg |= IXGBE_QDE_HIDE_VLAN;
+
+       ixgbe_write_qde(adapter, vf, reg);
 
        /* enable receive for vf */
        reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset));
index cf67b9b18ed7fa9b96a856d05ae35271b9ec8a17..e30d1f07e8919f47addd362027987ad7c61b5fe0 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index ca45359686d39fd98e862e298e7d2dcda547ea05..2daa81e6e9b241392f2632b86110c164fbbd67a6 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
index e21cd48491d338fc467182cdbf33c8b8b35509fb..182d640e9f7ac04a0deaf56009b6d8af96a2ad5c 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel 10 Gigabit PCI Express Linux driver
index f470d020477174f6dba278ce4439c65a5a447d6e..3123267dfba974049cc26496570cafe842eb81ac 100644 (file)
@@ -1847,9 +1847,9 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
                         (IXGBE_CS4227_EDC_MODE_SR << 1));
 
        if (setup_linear)
-               reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
+               reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
        else
-               reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
+               reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
 
        ret_val = hw->phy.ops.write_reg(hw, reg_slice,
                                        IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext);
index 4ce4c97ef5ad441c78afc8ceaf35fa34dfefb162..bb47814cfa9029a508176ec66fb51d063825875e 100644 (file)
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel 82599 Virtual Function driver
index 8617cae2f801cc279f828c4e44da886bad42749e..71c828842b11b4f110f6352095737fcf78f7991b 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
index 4400e49090b47d8e2715901ede4525dab06238c2..8e7d6c6f5c9242362b81ad98b7ee42f374433daf 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
+  Copyright(c) 1999 - 2018 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -82,6 +82,7 @@ static struct ixgbe_stats ixgbevf_gstrings_stats[] = {
 
 #define IXGBEVF_QUEUE_STATS_LEN ( \
        (((struct ixgbevf_adapter *)netdev_priv(netdev))->num_tx_queues + \
+        ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_xdp_queues + \
         ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_rx_queues) * \
         (sizeof(struct ixgbevf_stats) / sizeof(u64)))
 #define IXGBEVF_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbevf_gstrings_stats)
@@ -94,6 +95,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
 
+static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBEVF_PRIV_FLAGS_LEGACY_RX   BIT(0)
+       "legacy-rx",
+};
+
+#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings)
+
 static int ixgbevf_get_link_ksettings(struct net_device *netdev,
                                      struct ethtool_link_ksettings *cmd)
 {
@@ -241,6 +249,8 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev,
                sizeof(drvinfo->version));
        strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
                sizeof(drvinfo->bus_info));
+
+       drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN;
 }
 
 static void ixgbevf_get_ringparam(struct net_device *netdev,
@@ -260,7 +270,7 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
        struct ixgbevf_adapter *adapter = netdev_priv(netdev);
        struct ixgbevf_ring *tx_ring = NULL, *rx_ring = NULL;
        u32 new_rx_count, new_tx_count;
-       int i, err = 0;
+       int i, j, err = 0;
 
        if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
                return -EINVAL;
@@ -284,15 +294,19 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
        if (!netif_running(adapter->netdev)) {
                for (i = 0; i < adapter->num_tx_queues; i++)
                        adapter->tx_ring[i]->count = new_tx_count;
+               for (i = 0; i < adapter->num_xdp_queues; i++)
+                       adapter->xdp_ring[i]->count = new_tx_count;
                for (i = 0; i < adapter->num_rx_queues; i++)
                        adapter->rx_ring[i]->count = new_rx_count;
                adapter->tx_ring_count = new_tx_count;
+               adapter->xdp_ring_count = new_tx_count;
                adapter->rx_ring_count = new_rx_count;
                goto clear_reset;
        }
 
        if (new_tx_count != adapter->tx_ring_count) {
-               tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring));
+               tx_ring = vmalloc((adapter->num_tx_queues +
+                                  adapter->num_xdp_queues) * sizeof(*tx_ring));
                if (!tx_ring) {
                        err = -ENOMEM;
                        goto clear_reset;
@@ -315,6 +329,24 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
                                goto clear_reset;
                        }
                }
+
+               for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+                       /* clone ring and setup updated count */
+                       tx_ring[i] = *adapter->xdp_ring[j];
+                       tx_ring[i].count = new_tx_count;
+                       err = ixgbevf_setup_tx_resources(&tx_ring[i]);
+                       if (err) {
+                               while (i) {
+                                       i--;
+                                       ixgbevf_free_tx_resources(&tx_ring[i]);
+                               }
+
+                               vfree(tx_ring);
+                               tx_ring = NULL;
+
+                               goto clear_reset;
+                       }
+               }
        }
 
        if (new_rx_count != adapter->rx_ring_count) {
@@ -327,8 +359,13 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
                for (i = 0; i < adapter->num_rx_queues; i++) {
                        /* clone ring and setup updated count */
                        rx_ring[i] = *adapter->rx_ring[i];
+
+                       /* Clear copied XDP RX-queue info */
+                       memset(&rx_ring[i].xdp_rxq, 0,
+                              sizeof(rx_ring[i].xdp_rxq));
+
                        rx_ring[i].count = new_rx_count;
-                       err = ixgbevf_setup_rx_resources(&rx_ring[i]);
+                       err = ixgbevf_setup_rx_resources(adapter, &rx_ring[i]);
                        if (err) {
                                while (i) {
                                        i--;
@@ -354,6 +391,12 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
                }
                adapter->tx_ring_count = new_tx_count;
 
+               for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+                       ixgbevf_free_tx_resources(adapter->xdp_ring[j]);
+                       *adapter->xdp_ring[j] = tx_ring[i];
+               }
+               adapter->xdp_ring_count = new_tx_count;
+
                vfree(tx_ring);
                tx_ring = NULL;
        }
@@ -376,7 +419,8 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 clear_reset:
        /* free Tx resources if Rx error is encountered */
        if (tx_ring) {
-               for (i = 0; i < adapter->num_tx_queues; i++)
+               for (i = 0;
+                    i < adapter->num_tx_queues + adapter->num_xdp_queues; i++)
                        ixgbevf_free_tx_resources(&tx_ring[i]);
                vfree(tx_ring);
        }
@@ -392,6 +436,8 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset)
                return IXGBEVF_TEST_LEN;
        case ETH_SS_STATS:
                return IXGBEVF_STATS_LEN;
+       case ETH_SS_PRIV_FLAGS:
+               return IXGBEVF_PRIV_FLAGS_STR_LEN;
        default:
                return -EINVAL;
        }
@@ -446,6 +492,23 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev,
                i += 2;
        }
 
+       /* populate XDP queue data */
+       for (j = 0; j < adapter->num_xdp_queues; j++) {
+               ring = adapter->xdp_ring[j];
+               if (!ring) {
+                       data[i++] = 0;
+                       data[i++] = 0;
+                       continue;
+               }
+
+               do {
+                       start = u64_stats_fetch_begin_irq(&ring->syncp);
+                       data[i] = ring->stats.packets;
+                       data[i + 1] = ring->stats.bytes;
+               } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+               i += 2;
+       }
+
        /* populate Rx queue data */
        for (j = 0; j < adapter->num_rx_queues; j++) {
                ring = adapter->rx_ring[j];
@@ -489,6 +552,12 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
                        sprintf(p, "tx_queue_%u_bytes", i);
                        p += ETH_GSTRING_LEN;
                }
+               for (i = 0; i < adapter->num_xdp_queues; i++) {
+                       sprintf(p, "xdp_queue_%u_packets", i);
+                       p += ETH_GSTRING_LEN;
+                       sprintf(p, "xdp_queue_%u_bytes", i);
+                       p += ETH_GSTRING_LEN;
+               }
                for (i = 0; i < adapter->num_rx_queues; i++) {
                        sprintf(p, "rx_queue_%u_packets", i);
                        p += ETH_GSTRING_LEN;
@@ -496,6 +565,10 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
                        p += ETH_GSTRING_LEN;
                }
                break;
+       case ETH_SS_PRIV_FLAGS:
+               memcpy(data, ixgbevf_priv_flags_strings,
+                      IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+               break;
        }
 }
 
@@ -888,6 +961,37 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
        return err;
 }
 
+static u32 ixgbevf_get_priv_flags(struct net_device *netdev)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       u32 priv_flags = 0;
+
+       if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+               priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX;
+
+       return priv_flags;
+}
+
+static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       unsigned int flags = adapter->flags;
+
+       flags &= ~IXGBEVF_FLAGS_LEGACY_RX;
+       if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX)
+               flags |= IXGBEVF_FLAGS_LEGACY_RX;
+
+       if (flags != adapter->flags) {
+               adapter->flags = flags;
+
+               /* reset interface to repopulate queues */
+               if (netif_running(netdev))
+                       ixgbevf_reinit_locked(adapter);
+       }
+
+       return 0;
+}
+
 static const struct ethtool_ops ixgbevf_ethtool_ops = {
        .get_drvinfo            = ixgbevf_get_drvinfo,
        .get_regs_len           = ixgbevf_get_regs_len,
@@ -909,6 +1013,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
        .get_rxfh_key_size      = ixgbevf_get_rxfh_key_size,
        .get_rxfh               = ixgbevf_get_rxfh,
        .get_link_ksettings     = ixgbevf_get_link_ksettings,
+       .get_priv_flags         = ixgbevf_get_priv_flags,
+       .set_priv_flags         = ixgbevf_set_priv_flags,
 };
 
 void ixgbevf_set_ethtool_ops(struct net_device *netdev)
index f6952425c87d40662022f17aa04c331ac4e1929b..447ce1d5e0e39db594d8c404033833dce013bf6d 100644 (file)
@@ -1,7 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
+  Copyright(c) 1999 - 2018 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -34,6 +35,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/u64_stats_sync.h>
+#include <net/xdp.h>
 
 #include "vf.h"
 
 struct ixgbevf_tx_buffer {
        union ixgbe_adv_tx_desc *next_to_watch;
        unsigned long time_stamp;
-       struct sk_buff *skb;
+       union {
+               struct sk_buff *skb;
+               /* XDP uses address ptr on irq_clean */
+               void *data;
+       };
        unsigned int bytecount;
        unsigned short gso_segs;
        __be16 protocol;
@@ -89,20 +95,25 @@ struct ixgbevf_rx_queue_stats {
 };
 
 enum ixgbevf_ring_state_t {
+       __IXGBEVF_RX_3K_BUFFER,
+       __IXGBEVF_RX_BUILD_SKB_ENABLED,
        __IXGBEVF_TX_DETECT_HANG,
        __IXGBEVF_HANG_CHECK_ARMED,
+       __IXGBEVF_TX_XDP_RING,
 };
 
-#define check_for_tx_hang(ring) \
-       test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define set_check_for_tx_hang(ring) \
-       set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define clear_check_for_tx_hang(ring) \
-       clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define ring_is_xdp(ring) \
+               test_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define set_ring_xdp(ring) \
+               set_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define clear_ring_xdp(ring) \
+               clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
 
 struct ixgbevf_ring {
        struct ixgbevf_ring *next;
+       struct ixgbevf_q_vector *q_vector;      /* backpointer to q_vector */
        struct net_device *netdev;
+       struct bpf_prog *xdp_prog;
        struct device *dev;
        void *desc;                     /* descriptor ring memory */
        dma_addr_t dma;                 /* phys. address of descriptor ring */
@@ -123,7 +134,7 @@ struct ixgbevf_ring {
                struct ixgbevf_tx_queue_stats tx_stats;
                struct ixgbevf_rx_queue_stats rx_stats;
        };
-
+       struct xdp_rxq_info xdp_rxq;
        u64 hw_csum_rx_error;
        u8 __iomem *tail;
        struct sk_buff *skb;
@@ -133,13 +144,14 @@ struct ixgbevf_ring {
         */
        u16 reg_idx;
        int queue_index; /* needed for multiqueue queue management */
-};
+} ____cacheline_internodealigned_in_smp;
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
 #define IXGBEVF_RX_BUFFER_WRITE        16      /* Must be power of 2 */
 
 #define MAX_RX_QUEUES IXGBE_VF_MAX_RX_QUEUES
 #define MAX_TX_QUEUES IXGBE_VF_MAX_TX_QUEUES
+#define MAX_XDP_QUEUES IXGBE_VF_MAX_TX_QUEUES
 #define IXGBEVF_MAX_RSS_QUEUES         2
 #define IXGBEVF_82599_RETA_SIZE                128     /* 128 entries */
 #define IXGBEVF_X550_VFRETA_SIZE       64      /* 64 entries */
@@ -156,12 +168,20 @@ struct ixgbevf_ring {
 /* Supported Rx Buffer Sizes */
 #define IXGBEVF_RXBUFFER_256   256    /* Used for packet split */
 #define IXGBEVF_RXBUFFER_2048  2048
+#define IXGBEVF_RXBUFFER_3072  3072
 
 #define IXGBEVF_RX_HDR_SIZE    IXGBEVF_RXBUFFER_256
-#define IXGBEVF_RX_BUFSZ       IXGBEVF_RXBUFFER_2048
 
 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 
+#define IXGBEVF_SKB_PAD                (NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IXGBEVF_MAX_FRAME_BUILD_SKB \
+       (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD)
+#else
+#define IXGBEVF_MAX_FRAME_BUILD_SKB    IXGBEVF_RXBUFFER_2048
+#endif
+
 #define IXGBE_TX_FLAGS_CSUM            BIT(0)
 #define IXGBE_TX_FLAGS_VLAN            BIT(1)
 #define IXGBE_TX_FLAGS_TSO             BIT(2)
@@ -170,6 +190,50 @@ struct ixgbevf_ring {
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK  0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT      16
 
+#define ring_uses_large_buffer(ring) \
+       test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define set_ring_uses_large_buffer(ring) \
+       set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define clear_ring_uses_large_buffer(ring) \
+       clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+
+#define ring_uses_build_skb(ring) \
+       test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define set_ring_build_skb_enabled(ring) \
+       set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define clear_ring_build_skb_enabled(ring) \
+       clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+       if (ring_uses_large_buffer(ring))
+               return IXGBEVF_RXBUFFER_3072;
+
+       if (ring_uses_build_skb(ring))
+               return IXGBEVF_MAX_FRAME_BUILD_SKB;
+#endif
+       return IXGBEVF_RXBUFFER_2048;
+}
+
+static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+       if (ring_uses_large_buffer(ring))
+               return 1;
+#endif
+       return 0;
+}
+
+#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring))
+
+#define check_for_tx_hang(ring) \
+       test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+       set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+       clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+
 struct ixgbevf_ring_container {
        struct ixgbevf_ring *ring;      /* pointer to linked list of rings */
        unsigned int total_bytes;       /* total bytes processed this int */
@@ -194,7 +258,11 @@ struct ixgbevf_q_vector {
        u16 itr; /* Interrupt throttle rate written to EITR */
        struct napi_struct napi;
        struct ixgbevf_ring_container rx, tx;
+       struct rcu_head rcu;    /* to avoid race with update stats on free */
        char name[IFNAMSIZ + 9];
+
+       /* for dynamic allocation of rings associated with this q_vector */
+       struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp;
 #ifdef CONFIG_NET_RX_BUSY_POLL
        unsigned int state;
 #define IXGBEVF_QV_STATE_IDLE          0
@@ -284,6 +352,10 @@ struct ixgbevf_adapter {
        u32 eims_enable_mask;
        u32 eims_other;
 
+       /* XDP */
+       int num_xdp_queues;
+       struct ixgbevf_ring *xdp_ring[MAX_XDP_QUEUES];
+
        /* TX */
        int num_tx_queues;
        struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */
@@ -304,6 +376,7 @@ struct ixgbevf_adapter {
 
        /* OS defined structs */
        struct net_device *netdev;
+       struct bpf_prog *xdp_prog;
        struct pci_dev *pdev;
 
        /* structs defined in ixgbe_vf.h */
@@ -317,6 +390,7 @@ struct ixgbevf_adapter {
        unsigned long state;
        u64 tx_busy;
        unsigned int tx_ring_count;
+       unsigned int xdp_ring_count;
        unsigned int rx_ring_count;
 
        u8 __iomem *io_addr; /* Mainly for iounmap use */
@@ -331,6 +405,8 @@ struct ixgbevf_adapter {
 
        u32 *rss_key;
        u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
+       u32 flags;
+#define IXGBEVF_FLAGS_LEGACY_RX                BIT(1)
 };
 
 enum ixbgevf_state_t {
@@ -388,7 +464,8 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter);
 void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter);
 void ixgbevf_reset(struct ixgbevf_adapter *adapter);
 void ixgbevf_set_ethtool_ops(struct net_device *netdev);
-int ixgbevf_setup_rx_resources(struct ixgbevf_ring *);
+int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
+                              struct ixgbevf_ring *rx_ring);
 int ixgbevf_setup_tx_resources(struct ixgbevf_ring *);
 void ixgbevf_free_rx_resources(struct ixgbevf_ring *);
 void ixgbevf_free_tx_resources(struct ixgbevf_ring *);
index 9b3d43d2810697949ef2f9ea1c6972e562c3a98a..3d9033f26effa743fc4142a6b99a64d7a5effcfd 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
+  Copyright(c) 1999 - 2018 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -50,6 +50,9 @@
 #include <linux/if_vlan.h>
 #include <linux/prefetch.h>
 #include <net/mpls.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/atomic.h>
 
 #include "ixgbevf.h"
 
@@ -130,6 +133,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
 static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter);
 static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector);
 static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter);
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer);
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+                                 struct ixgbevf_rx_buffer *old_buff);
 
 static void ixgbevf_remove_adapter(struct ixgbe_hw *hw)
 {
@@ -318,7 +324,10 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
                total_packets += tx_buffer->gso_segs;
 
                /* free the skb */
-               napi_consume_skb(tx_buffer->skb, napi_budget);
+               if (ring_is_xdp(tx_ring))
+                       page_frag_free(tx_buffer->data);
+               else
+                       napi_consume_skb(tx_buffer->skb, napi_budget);
 
                /* unmap skb header data */
                dma_unmap_single(tx_ring->dev,
@@ -382,7 +391,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 
                eop_desc = tx_ring->tx_buffer_info[i].next_to_watch;
 
-               pr_err("Detected Tx Unit Hang\n"
+               pr_err("Detected Tx Unit Hang%s\n"
                       "  Tx Queue             <%d>\n"
                       "  TDH, TDT             <%x>, <%x>\n"
                       "  next_to_use          <%x>\n"
@@ -392,6 +401,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
                       "  eop_desc->wb.status  <%x>\n"
                       "  time_stamp           <%lx>\n"
                       "  jiffies              <%lx>\n",
+                      ring_is_xdp(tx_ring) ? " XDP" : "",
                       tx_ring->queue_index,
                       IXGBE_READ_REG(hw, IXGBE_VFTDH(tx_ring->reg_idx)),
                       IXGBE_READ_REG(hw, IXGBE_VFTDT(tx_ring->reg_idx)),
@@ -399,7 +409,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
                       eop_desc, (eop_desc ? eop_desc->wb.status : 0),
                       tx_ring->tx_buffer_info[i].time_stamp, jiffies);
 
-               netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+               if (!ring_is_xdp(tx_ring))
+                       netif_stop_subqueue(tx_ring->netdev,
+                                           tx_ring->queue_index);
 
                /* schedule immediate reset if we believe we hung */
                ixgbevf_tx_timeout_reset(adapter);
@@ -407,6 +419,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
                return true;
        }
 
+       if (ring_is_xdp(tx_ring))
+               return !!budget;
+
 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
        if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
                     (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
@@ -527,6 +542,51 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
+static
+struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
+                                               const unsigned int size)
+{
+       struct ixgbevf_rx_buffer *rx_buffer;
+
+       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+       prefetchw(rx_buffer->page);
+
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+                                     rx_buffer->dma,
+                                     rx_buffer->page_offset,
+                                     size,
+                                     DMA_FROM_DEVICE);
+
+       rx_buffer->pagecnt_bias--;
+
+       return rx_buffer;
+}
+
+static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
+                                 struct ixgbevf_rx_buffer *rx_buffer,
+                                 struct sk_buff *skb)
+{
+       if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
+               /* hand second half of page back to the ring */
+               ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+       } else {
+               if (IS_ERR(skb))
+                       /* We are not reusing the buffer so unmap it and free
+                        * any references we are holding to it
+                        */
+                       dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+                                            ixgbevf_rx_pg_size(rx_ring),
+                                            DMA_FROM_DEVICE,
+                                            IXGBEVF_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_buffer->page,
+                                       rx_buffer->pagecnt_bias);
+       }
+
+       /* clear contents of rx_buffer */
+       rx_buffer->page = NULL;
+}
+
 /**
  * ixgbevf_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
@@ -554,32 +614,38 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
        return true;
 }
 
+static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring)
+{
+       return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0;
+}
+
 static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
                                      struct ixgbevf_rx_buffer *bi)
 {
        struct page *page = bi->page;
-       dma_addr_t dma = bi->dma;
+       dma_addr_t dma;
 
        /* since we are recycling buffers we should seldom need to alloc */
        if (likely(page))
                return true;
 
        /* alloc new page for storage */
-       page = dev_alloc_page();
+       page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring));
        if (unlikely(!page)) {
                rx_ring->rx_stats.alloc_rx_page_failed++;
                return false;
        }
 
        /* map page for use */
-       dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+       dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+                                ixgbevf_rx_pg_size(rx_ring),
                                 DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
 
        /* if mapping failed free memory back to system since
         * there isn't much point in holding memory we can't use
         */
        if (dma_mapping_error(rx_ring->dev, dma)) {
-               __free_page(page);
+               __free_pages(page, ixgbevf_rx_pg_order(rx_ring));
 
                rx_ring->rx_stats.alloc_rx_page_failed++;
                return false;
@@ -587,7 +653,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 
        bi->dma = dma;
        bi->page = page;
-       bi->page_offset = 0;
+       bi->page_offset = ixgbevf_rx_offset(rx_ring);
        bi->pagecnt_bias = 1;
        rx_ring->rx_stats.alloc_rx_page++;
 
@@ -621,7 +687,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
                /* sync the buffer for use by the device */
                dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
                                                 bi->page_offset,
-                                                IXGBEVF_RX_BUFSZ,
+                                                ixgbevf_rx_bufsz(rx_ring),
                                                 DMA_FROM_DEVICE);
 
                /* Refresh the desc even if pkt_addr didn't change
@@ -685,6 +751,10 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
                                    union ixgbe_adv_rx_desc *rx_desc,
                                    struct sk_buff *skb)
 {
+       /* XDP packets use error pointer so abort at this point */
+       if (IS_ERR(skb))
+               return true;
+
        /* verify that the packet does not have any known errors */
        if (unlikely(ixgbevf_test_staterr(rx_desc,
                                          IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
@@ -734,11 +804,10 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
        return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
 }
 
-static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
-                                     struct page *page,
-                                     const unsigned int truesize)
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer)
 {
-       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+       struct page *page = rx_buffer->page;
 
        /* avoid re-using remote pages */
        if (unlikely(ixgbevf_page_is_reserved(page)))
@@ -746,17 +815,13 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely(page_ref_count(page) != pagecnt_bias))
+       if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
                return false;
-
-       /* flip page offset to other buffer */
-       rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
-
 #else
-       /* move offset up to the next cache line */
-       rx_buffer->page_offset += truesize;
+#define IXGBEVF_LAST_OFFSET \
+       (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
 
-       if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+       if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET)
                return false;
 
 #endif
@@ -765,7 +830,7 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
         * the pagecnt_bias and page count so that we fully restock the
         * number of references the driver holds.
         */
-       if (unlikely(pagecnt_bias == 1)) {
+       if (unlikely(!pagecnt_bias)) {
                page_ref_add(page, USHRT_MAX);
                rx_buffer->pagecnt_bias = USHRT_MAX;
        }
@@ -777,136 +842,268 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
  * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
  * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
  *
  * This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
  **/
-static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
                                struct ixgbevf_rx_buffer *rx_buffer,
-                               u16 size,
-                               union ixgbe_adv_rx_desc *rx_desc,
-                               struct sk_buff *skb)
+                               struct sk_buff *skb,
+                               unsigned int size)
 {
-       struct page *page = rx_buffer->page;
-       unsigned char *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
-       unsigned int truesize = IXGBEVF_RX_BUFSZ;
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
-       unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+       unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+                               SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+                               SKB_DATA_ALIGN(size);
 #endif
-       unsigned int pull_len;
+       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+                       rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
+#endif
+}
 
-       if (unlikely(skb_is_nonlinear(skb)))
-               goto add_tail_frag;
+static
+struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
+                                     struct ixgbevf_rx_buffer *rx_buffer,
+                                     struct xdp_buff *xdp,
+                                     union ixgbe_adv_rx_desc *rx_desc)
+{
+       unsigned int size = xdp->data_end - xdp->data;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
+                                              xdp->data_hard_start);
+#endif
+       unsigned int headlen;
+       struct sk_buff *skb;
 
-       if (likely(size <= IXGBEVF_RX_HDR_SIZE)) {
-               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+       /* prefetch first cache line of first page */
+       prefetch(xdp->data);
+#if L1_CACHE_BYTES < 128
+       prefetch(xdp->data + L1_CACHE_BYTES);
+#endif
+       /* Note, we get here by enabling legacy-rx via:
+        *
+        *    ethtool --set-priv-flags <dev> legacy-rx on
+        *
+        * In this mode, we currently get 0 extra XDP headroom as
+        * opposed to having legacy-rx off, where we process XDP
+        * packets going to stack via ixgbevf_build_skb().
+        *
+        * For ixgbevf_construct_skb() mode it means that the
+        * xdp->data_meta will always point to xdp->data, since
+        * the helper cannot expand the head. Should this ever
+        * changed in future for legacy-rx mode on, then lets also
+        * add xdp->data_meta handling here.
+        */
 
-               /* page is not reserved, we can reuse buffer as is */
-               if (likely(!ixgbevf_page_is_reserved(page)))
-                       return true;
+       /* allocate a skb to store the frags */
+       skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
+       if (unlikely(!skb))
+               return NULL;
 
-               /* this page cannot be reused so discard it */
-               return false;
-       }
-
-       /* we need the header to contain the greater of either ETH_HLEN or
-        * 60 bytes if the skb->len is less than 60 for skb_pad.
-        */
-       pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
+       /* Determine available headroom for copy */
+       headlen = size;
+       if (headlen > IXGBEVF_RX_HDR_SIZE)
+               headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE);
 
        /* align pull length to size of long to optimize memcpy performance */
-       memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
+       memcpy(__skb_put(skb, headlen), xdp->data,
+              ALIGN(headlen, sizeof(long)));
 
        /* update all of the pointers */
-       va += pull_len;
-       size -= pull_len;
+       size -= headlen;
+       if (size) {
+               skb_add_rx_frag(skb, 0, rx_buffer->page,
+                               (xdp->data + headlen) -
+                                       page_address(rx_buffer->page),
+                               size, truesize);
+#if (PAGE_SIZE < 8192)
+               rx_buffer->page_offset ^= truesize;
+#else
+               rx_buffer->page_offset += truesize;
+#endif
+       } else {
+               rx_buffer->pagecnt_bias++;
+       }
 
-add_tail_frag:
-       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                       (unsigned long)va & ~PAGE_MASK, size, truesize);
+       return skb;
+}
+
+static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
+                                            u32 qmask)
+{
+       struct ixgbe_hw *hw = &adapter->hw;
 
-       return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
+       IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
 }
 
-static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
-                                              union ixgbe_adv_rx_desc *rx_desc,
-                                              struct sk_buff *skb)
+static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
+                                        struct ixgbevf_rx_buffer *rx_buffer,
+                                        struct xdp_buff *xdp,
+                                        union ixgbe_adv_rx_desc *rx_desc)
 {
-       struct ixgbevf_rx_buffer *rx_buffer;
-       struct page *page;
-       u16 size = le16_to_cpu(rx_desc->wb.upper.length);
+       unsigned int metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+                               SKB_DATA_ALIGN(xdp->data_end -
+                                              xdp->data_hard_start);
+#endif
+       struct sk_buff *skb;
 
-       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-       page = rx_buffer->page;
-       prefetchw(page);
+       /* Prefetch first cache line of first page. If xdp->data_meta
+        * is unused, this points to xdp->data, otherwise, we likely
+        * have a consumer accessing first few bytes of meta data,
+        * and then actual data.
+        */
+       prefetch(xdp->data_meta);
+#if L1_CACHE_BYTES < 128
+       prefetch(xdp->data_meta + L1_CACHE_BYTES);
+#endif
 
-       /* we are reusing so sync this buffer for CPU use */
-       dma_sync_single_range_for_cpu(rx_ring->dev,
-                                     rx_buffer->dma,
-                                     rx_buffer->page_offset,
-                                     size,
-                                     DMA_FROM_DEVICE);
+       /* build an skb around the page buffer */
+       skb = build_skb(xdp->data_hard_start, truesize);
+       if (unlikely(!skb))
+               return NULL;
 
-       if (likely(!skb)) {
-               void *page_addr = page_address(page) +
-                                 rx_buffer->page_offset;
+       /* update pointers within the skb to store the data */
+       skb_reserve(skb, xdp->data - xdp->data_hard_start);
+       __skb_put(skb, xdp->data_end - xdp->data);
+       if (metasize)
+               skb_metadata_set(skb, metasize);
 
-               /* prefetch first cache line of first page */
-               prefetch(page_addr);
-#if L1_CACHE_BYTES < 128
-               prefetch(page_addr + L1_CACHE_BYTES);
+       /* update buffer offset */
+#if (PAGE_SIZE < 8192)
+       rx_buffer->page_offset ^= truesize;
+#else
+       rx_buffer->page_offset += truesize;
 #endif
 
-               /* allocate a skb to store the frags */
-               skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-                                               IXGBEVF_RX_HDR_SIZE);
-               if (unlikely(!skb)) {
-                       rx_ring->rx_stats.alloc_rx_buff_failed++;
-                       return NULL;
-               }
+       return skb;
+}
 
-               /* we will be copying header into skb->data in
-                * pskb_may_pull so it is in our interest to prefetch
-                * it now to avoid a possible cache miss
-                */
-               prefetchw(skb->data);
-       }
+#define IXGBEVF_XDP_PASS 0
+#define IXGBEVF_XDP_CONSUMED 1
+#define IXGBEVF_XDP_TX 2
 
-       /* pull page into skb */
-       if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
-               /* hand second half of page back to the ring */
-               ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
-       } else {
-               /* We are not reusing the buffer so unmap it and free
-                * any references we are holding to it
-                */
-               dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-                                    PAGE_SIZE, DMA_FROM_DEVICE,
-                                    IXGBEVF_RX_DMA_ATTR);
-               __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
-       }
+static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
+                                struct xdp_buff *xdp)
+{
+       struct ixgbevf_tx_buffer *tx_buffer;
+       union ixgbe_adv_tx_desc *tx_desc;
+       u32 len, cmd_type;
+       dma_addr_t dma;
+       u16 i;
 
-       /* clear contents of buffer_info */
-       rx_buffer->dma = 0;
-       rx_buffer->page = NULL;
+       len = xdp->data_end - xdp->data;
 
-       return skb;
+       if (unlikely(!ixgbevf_desc_unused(ring)))
+               return IXGBEVF_XDP_CONSUMED;
+
+       dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+       if (dma_mapping_error(ring->dev, dma))
+               return IXGBEVF_XDP_CONSUMED;
+
+       /* record the location of the first descriptor for this packet */
+       tx_buffer = &ring->tx_buffer_info[ring->next_to_use];
+       tx_buffer->bytecount = len;
+       tx_buffer->gso_segs = 1;
+       tx_buffer->protocol = 0;
+
+       i = ring->next_to_use;
+       tx_desc = IXGBEVF_TX_DESC(ring, i);
+
+       dma_unmap_len_set(tx_buffer, len, len);
+       dma_unmap_addr_set(tx_buffer, dma, dma);
+       tx_buffer->data = xdp->data;
+       tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+       /* put descriptor type bits */
+       cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+                  IXGBE_ADVTXD_DCMD_DEXT |
+                  IXGBE_ADVTXD_DCMD_IFCS;
+       cmd_type |= len | IXGBE_TXD_CMD;
+       tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+       tx_desc->read.olinfo_status =
+                       cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) |
+                                   IXGBE_ADVTXD_CC);
+
+       /* Avoid any potential race with cleanup */
+       smp_wmb();
+
+       /* set next_to_watch value indicating a packet is present */
+       i++;
+       if (i == ring->count)
+               i = 0;
+
+       tx_buffer->next_to_watch = tx_desc;
+       ring->next_to_use = i;
+
+       return IXGBEVF_XDP_TX;
 }
 
-static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
-                                            u32 qmask)
+static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
+                                      struct ixgbevf_ring  *rx_ring,
+                                      struct xdp_buff *xdp)
 {
-       struct ixgbe_hw *hw = &adapter->hw;
+       int result = IXGBEVF_XDP_PASS;
+       struct ixgbevf_ring *xdp_ring;
+       struct bpf_prog *xdp_prog;
+       u32 act;
 
-       IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
+       rcu_read_lock();
+       xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+       if (!xdp_prog)
+               goto xdp_out;
+
+       act = bpf_prog_run_xdp(xdp_prog, xdp);
+       switch (act) {
+       case XDP_PASS:
+               break;
+       case XDP_TX:
+               xdp_ring = adapter->xdp_ring[rx_ring->queue_index];
+               result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp);
+               break;
+       default:
+               bpf_warn_invalid_xdp_action(act);
+               /* fallthrough */
+       case XDP_ABORTED:
+               trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+               /* fallthrough -- handle aborts by dropping packet */
+       case XDP_DROP:
+               result = IXGBEVF_XDP_CONSUMED;
+               break;
+       }
+xdp_out:
+       rcu_read_unlock();
+       return ERR_PTR(-result);
+}
+
+static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring,
+                                  struct ixgbevf_rx_buffer *rx_buffer,
+                                  unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+
+       rx_buffer->page_offset ^= truesize;
+#else
+       unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+                               SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+                               SKB_DATA_ALIGN(size);
+
+       rx_buffer->page_offset += truesize;
+#endif
 }
 
 static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
@@ -914,11 +1111,18 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                                int budget)
 {
        unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+       struct ixgbevf_adapter *adapter = q_vector->adapter;
        u16 cleaned_count = ixgbevf_desc_unused(rx_ring);
        struct sk_buff *skb = rx_ring->skb;
+       bool xdp_xmit = false;
+       struct xdp_buff xdp;
+
+       xdp.rxq = &rx_ring->xdp_rxq;
 
        while (likely(total_rx_packets < budget)) {
+               struct ixgbevf_rx_buffer *rx_buffer;
                union ixgbe_adv_rx_desc *rx_desc;
+               unsigned int size;
 
                /* return some buffers to hardware, one at a time is too slow */
                if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
@@ -927,8 +1131,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                }
 
                rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
-               if (!rx_desc->wb.upper.length)
+               size = le16_to_cpu(rx_desc->wb.upper.length);
+               if (!size)
                        break;
 
                /* This memory barrier is needed to keep us from reading
@@ -937,15 +1141,48 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
                 */
                rmb();
 
+               rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
+
                /* retrieve a buffer from the ring */
-               skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
+               if (!skb) {
+                       xdp.data = page_address(rx_buffer->page) +
+                                  rx_buffer->page_offset;
+                       xdp.data_meta = xdp.data;
+                       xdp.data_hard_start = xdp.data -
+                                             ixgbevf_rx_offset(rx_ring);
+                       xdp.data_end = xdp.data + size;
+
+                       skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
+               }
+
+               if (IS_ERR(skb)) {
+                       if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) {
+                               xdp_xmit = true;
+                               ixgbevf_rx_buffer_flip(rx_ring, rx_buffer,
+                                                      size);
+                       } else {
+                               rx_buffer->pagecnt_bias++;
+                       }
+                       total_rx_packets++;
+                       total_rx_bytes += size;
+               } else if (skb) {
+                       ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+               } else if (ring_uses_build_skb(rx_ring)) {
+                       skb = ixgbevf_build_skb(rx_ring, rx_buffer,
+                                               &xdp, rx_desc);
+               } else {
+                       skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
+                                                   &xdp, rx_desc);
+               }
 
                /* exit if we failed to retrieve a buffer */
                if (!skb) {
                        rx_ring->rx_stats.alloc_rx_buff_failed++;
+                       rx_buffer->pagecnt_bias++;
                        break;
                }
 
+               ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb);
                cleaned_count++;
 
                /* fetch next buffer in frame if non-eop */
@@ -987,6 +1224,17 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
        /* place incomplete frames back on ring for completion */
        rx_ring->skb = skb;
 
+       if (xdp_xmit) {
+               struct ixgbevf_ring *xdp_ring =
+                       adapter->xdp_ring[rx_ring->queue_index];
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.
+                */
+               wmb();
+               ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use);
+       }
+
        u64_stats_update_begin(&rx_ring->syncp);
        rx_ring->stats.packets += total_rx_packets;
        rx_ring->stats.bytes += total_rx_bytes;
@@ -1260,85 +1508,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data)
        return IRQ_HANDLED;
 }
 
-static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx,
-                                    int r_idx)
-{
-       struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-       a->rx_ring[r_idx]->next = q_vector->rx.ring;
-       q_vector->rx.ring = a->rx_ring[r_idx];
-       q_vector->rx.count++;
-}
-
-static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx,
-                                    int t_idx)
-{
-       struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-       a->tx_ring[t_idx]->next = q_vector->tx.ring;
-       q_vector->tx.ring = a->tx_ring[t_idx];
-       q_vector->tx.count++;
-}
-
-/**
- * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors
- * @adapter: board private structure to initialize
- *
- * This function maps descriptor rings to the queue-specific vectors
- * we were allotted through the MSI-X enabling code.  Ideally, we'd have
- * one vector per ring/queue, but on a constrained vector budget, we
- * group the rings as "efficiently" as possible.  You would add new
- * mapping configurations in here.
- **/
-static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
-{
-       int q_vectors;
-       int v_start = 0;
-       int rxr_idx = 0, txr_idx = 0;
-       int rxr_remaining = adapter->num_rx_queues;
-       int txr_remaining = adapter->num_tx_queues;
-       int i, j;
-       int rqpv, tqpv;
-
-       q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-       /* The ideal configuration...
-        * We have enough vectors to map one per queue.
-        */
-       if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) {
-               for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
-                       map_vector_to_rxq(adapter, v_start, rxr_idx);
-
-               for (; txr_idx < txr_remaining; v_start++, txr_idx++)
-                       map_vector_to_txq(adapter, v_start, txr_idx);
-               return 0;
-       }
-
-       /* If we don't have enough vectors for a 1-to-1
-        * mapping, we'll have to group them so there are
-        * multiple queues per vector.
-        */
-       /* Re-adjusting *qpv takes care of the remainder. */
-       for (i = v_start; i < q_vectors; i++) {
-               rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
-               for (j = 0; j < rqpv; j++) {
-                       map_vector_to_rxq(adapter, i, rxr_idx);
-                       rxr_idx++;
-                       rxr_remaining--;
-               }
-       }
-       for (i = v_start; i < q_vectors; i++) {
-               tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
-               for (j = 0; j < tqpv; j++) {
-                       map_vector_to_txq(adapter, i, txr_idx);
-                       txr_idx++;
-                       txr_remaining--;
-               }
-       }
-
-       return 0;
-}
-
 /**
  * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts
  * @adapter: board private structure
@@ -1411,20 +1580,6 @@ static int ixgbevf_request_msix_irqs(struct ixgbevf_adapter *adapter)
        return err;
 }
 
-static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
-{
-       int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-       for (i = 0; i < q_vectors; i++) {
-               struct ixgbevf_q_vector *q_vector = adapter->q_vector[i];
-
-               q_vector->rx.ring = NULL;
-               q_vector->tx.ring = NULL;
-               q_vector->rx.count = 0;
-               q_vector->tx.count = 0;
-       }
-}
-
 /**
  * ixgbevf_request_irq - initialize interrupts
  * @adapter: board private structure
@@ -1464,8 +1619,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
                free_irq(adapter->msix_entries[i].vector,
                         adapter->q_vector[i]);
        }
-
-       ixgbevf_reset_q_vectors(adapter);
 }
 
 /**
@@ -1583,11 +1736,14 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
        /* Setup the HW Tx Head and Tail descriptor pointers */
        for (i = 0; i < adapter->num_tx_queues; i++)
                ixgbevf_configure_tx_ring(adapter, adapter->tx_ring[i]);
+       for (i = 0; i < adapter->num_xdp_queues; i++)
+               ixgbevf_configure_tx_ring(adapter, adapter->xdp_ring[i]);
 }
 
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT        2
 
-static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
+static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter,
+                                    struct ixgbevf_ring *ring, int index)
 {
        struct ixgbe_hw *hw = &adapter->hw;
        u32 srrctl;
@@ -1595,7 +1751,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
        srrctl = IXGBE_SRRCTL_DROP_EN;
 
        srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
-       srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+       if (ring_uses_large_buffer(ring))
+               srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+       else
+               srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
        srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
        IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
@@ -1767,10 +1926,21 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
        ring->next_to_use = 0;
        ring->next_to_alloc = 0;
 
-       ixgbevf_configure_srrctl(adapter, reg_idx);
+       ixgbevf_configure_srrctl(adapter, ring, reg_idx);
+
+       /* RXDCTL.RLPML does not work on 82599 */
+       if (adapter->hw.mac.type != ixgbe_mac_82599_vf) {
+               rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+                           IXGBE_RXDCTL_RLPML_EN);
 
-       /* allow any size packet since we can handle overflow */
-       rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+#if (PAGE_SIZE < 8192)
+               /* Limit the maximum frame size so we don't overrun the skb */
+               if (ring_uses_build_skb(ring) &&
+                   !ring_uses_large_buffer(ring))
+                       rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB |
+                                 IXGBE_RXDCTL_RLPML_EN;
+#endif
+       }
 
        rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
        IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
@@ -1779,6 +1949,29 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
        ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
 }
 
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+                                     struct ixgbevf_ring *rx_ring)
+{
+       struct net_device *netdev = adapter->netdev;
+       unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+       /* set build_skb and buffer size flags */
+       clear_ring_build_skb_enabled(rx_ring);
+       clear_ring_uses_large_buffer(rx_ring);
+
+       if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+               return;
+
+       set_ring_build_skb_enabled(rx_ring);
+
+       if (PAGE_SIZE < 8192) {
+               if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+                       return;
+
+               set_ring_uses_large_buffer(rx_ring);
+       }
+}
+
 /**
  * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
  * @adapter: board private structure
@@ -1806,8 +1999,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
        /* Setup the HW Rx Head and Tail Descriptor Pointers and
         * the Base and Length of the Rx Descriptor Ring
         */
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]);
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+               ixgbevf_set_rx_buffer_len(adapter, rx_ring);
+               ixgbevf_configure_rx_ring(adapter, rx_ring);
+       }
 }
 
 static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev,
@@ -2136,13 +2333,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
                dma_sync_single_range_for_cpu(rx_ring->dev,
                                              rx_buffer->dma,
                                              rx_buffer->page_offset,
-                                             IXGBEVF_RX_BUFSZ,
+                                             ixgbevf_rx_bufsz(rx_ring),
                                              DMA_FROM_DEVICE);
 
                /* free resources associated with mapping */
                dma_unmap_page_attrs(rx_ring->dev,
                                     rx_buffer->dma,
-                                    PAGE_SIZE,
+                                    ixgbevf_rx_pg_size(rx_ring),
                                     DMA_FROM_DEVICE,
                                     IXGBEVF_RX_DMA_ATTR);
 
@@ -2172,7 +2369,10 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring)
                union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
 
                /* Free all the Tx ring sk_buffs */
-               dev_kfree_skb_any(tx_buffer->skb);
+               if (ring_is_xdp(tx_ring))
+                       page_frag_free(tx_buffer->data);
+               else
+                       dev_kfree_skb_any(tx_buffer->skb);
 
                /* unmap skb header data */
                dma_unmap_single(tx_ring->dev,
@@ -2240,6 +2440,8 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter)
 
        for (i = 0; i < adapter->num_tx_queues; i++)
                ixgbevf_clean_tx_ring(adapter->tx_ring[i]);
+       for (i = 0; i < adapter->num_xdp_queues; i++)
+               ixgbevf_clean_tx_ring(adapter->xdp_ring[i]);
 }
 
 void ixgbevf_down(struct ixgbevf_adapter *adapter)
@@ -2278,6 +2480,13 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter)
                                IXGBE_TXDCTL_SWFLSH);
        }
 
+       for (i = 0; i < adapter->num_xdp_queues; i++) {
+               u8 reg_idx = adapter->xdp_ring[i]->reg_idx;
+
+               IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx),
+                               IXGBE_TXDCTL_SWFLSH);
+       }
+
        if (!pci_channel_offline(adapter->pdev))
                ixgbevf_reset(adapter);
 
@@ -2375,6 +2584,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
        /* Start with base case */
        adapter->num_rx_queues = 1;
        adapter->num_tx_queues = 1;
+       adapter->num_xdp_queues = 0;
 
        spin_lock_bh(&adapter->mbx_lock);
 
@@ -2396,8 +2606,13 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
                case ixgbe_mbox_api_11:
                case ixgbe_mbox_api_12:
                case ixgbe_mbox_api_13:
+                       if (adapter->xdp_prog &&
+                           hw->mac.max_tx_queues == rss)
+                               rss = rss > 3 ? 2 : 1;
+
                        adapter->num_rx_queues = rss;
                        adapter->num_tx_queues = rss;
+                       adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0;
                default:
                        break;
                }
@@ -2405,105 +2620,209 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 }
 
 /**
- * ixgbevf_alloc_queues - Allocate memory for all rings
+ * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
  * @adapter: board private structure to initialize
  *
- * We allocate one ring per queue at run-time since we don't know the
- * number of queues at compile-time.  The polling_netdev array is
- * intended for Multiqueue, but should work fine with a single queue.
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
  **/
-static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter)
+static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
 {
+       int vector, v_budget;
+
+       /* It's easy to be greedy for MSI-X vectors, but it really
+        * doesn't do us much good if we have a lot more vectors
+        * than CPU's.  So let's be conservative and only ask for
+        * (roughly) the same number of vectors as there are CPU's.
+        * The default is to use pairs of vectors.
+        */
+       v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
+       v_budget = min_t(int, v_budget, num_online_cpus());
+       v_budget += NON_Q_VECTORS;
+
+       adapter->msix_entries = kcalloc(v_budget,
+                                       sizeof(struct msix_entry), GFP_KERNEL);
+       if (!adapter->msix_entries)
+               return -ENOMEM;
+
+       for (vector = 0; vector < v_budget; vector++)
+               adapter->msix_entries[vector].entry = vector;
+
+       /* A failure in MSI-X entry allocation isn't fatal, but the VF driver
+        * does not support any other modes, so we will simply fail here. Note
+        * that we clean up the msix_entries pointer else-where.
+        */
+       return ixgbevf_acquire_msix_vectors(adapter, v_budget);
+}
+
+static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
+                            struct ixgbevf_ring_container *head)
+{
+       ring->next = head->ring;
+       head->ring = ring;
+       head->count++;
+}
+
+/**
+ * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: number of Tx rings for q vector
+ * @txr_idx: index of first Tx ring to assign
+ * @xdp_count: total number of XDP rings to allocate
+ * @xdp_idx: index of first XDP ring to allocate
+ * @rxr_count: number of Rx rings for q vector
+ * @rxr_idx: index of first Rx ring to assign
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
+                                 int txr_count, int txr_idx,
+                                 int xdp_count, int xdp_idx,
+                                 int rxr_count, int rxr_idx)
+{
+       struct ixgbevf_q_vector *q_vector;
+       int reg_idx = txr_idx + xdp_idx;
        struct ixgbevf_ring *ring;
-       int rx = 0, tx = 0;
+       int ring_count, size;
+
+       ring_count = txr_count + xdp_count + rxr_count;
+       size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
+
+       /* allocate q_vector and rings */
+       q_vector = kzalloc(size, GFP_KERNEL);
+       if (!q_vector)
+               return -ENOMEM;
+
+       /* initialize NAPI */
+       netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64);
 
-       for (; tx < adapter->num_tx_queues; tx++) {
-               ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-               if (!ring)
-                       goto err_allocation;
+       /* tie q_vector and adapter together */
+       adapter->q_vector[v_idx] = q_vector;
+       q_vector->adapter = adapter;
+       q_vector->v_idx = v_idx;
 
+       /* initialize pointer to rings */
+       ring = q_vector->ring;
+
+       while (txr_count) {
+               /* assign generic ring traits */
                ring->dev = &adapter->pdev->dev;
                ring->netdev = adapter->netdev;
+
+               /* configure backlink on ring */
+               ring->q_vector = q_vector;
+
+               /* update q_vector Tx values */
+               ixgbevf_add_ring(ring, &q_vector->tx);
+
+               /* apply Tx specific ring traits */
                ring->count = adapter->tx_ring_count;
-               ring->queue_index = tx;
-               ring->reg_idx = tx;
+               ring->queue_index = txr_idx;
+               ring->reg_idx = reg_idx;
 
-               adapter->tx_ring[tx] = ring;
+               /* assign ring to adapter */
+                adapter->tx_ring[txr_idx] = ring;
+
+               /* update count and index */
+               txr_count--;
+               txr_idx++;
+               reg_idx++;
+
+               /* push pointer to next ring */
+               ring++;
        }
 
-       for (; rx < adapter->num_rx_queues; rx++) {
-               ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-               if (!ring)
-                       goto err_allocation;
+       while (xdp_count) {
+               /* assign generic ring traits */
+               ring->dev = &adapter->pdev->dev;
+               ring->netdev = adapter->netdev;
 
+               /* configure backlink on ring */
+               ring->q_vector = q_vector;
+
+               /* update q_vector Tx values */
+               ixgbevf_add_ring(ring, &q_vector->tx);
+
+               /* apply Tx specific ring traits */
+               ring->count = adapter->tx_ring_count;
+               ring->queue_index = xdp_idx;
+               ring->reg_idx = reg_idx;
+               set_ring_xdp(ring);
+
+               /* assign ring to adapter */
+               adapter->xdp_ring[xdp_idx] = ring;
+
+               /* update count and index */
+               xdp_count--;
+               xdp_idx++;
+               reg_idx++;
+
+               /* push pointer to next ring */
+               ring++;
+       }
+
+       while (rxr_count) {
+               /* assign generic ring traits */
                ring->dev = &adapter->pdev->dev;
                ring->netdev = adapter->netdev;
 
+               /* configure backlink on ring */
+               ring->q_vector = q_vector;
+
+               /* update q_vector Rx values */
+               ixgbevf_add_ring(ring, &q_vector->rx);
+
+               /* apply Rx specific ring traits */
                ring->count = adapter->rx_ring_count;
-               ring->queue_index = rx;
-               ring->reg_idx = rx;
+               ring->queue_index = rxr_idx;
+               ring->reg_idx = rxr_idx;
 
-               adapter->rx_ring[rx] = ring;
-       }
+               /* assign ring to adapter */
+               adapter->rx_ring[rxr_idx] = ring;
 
-       return 0;
+               /* update count and index */
+               rxr_count--;
+               rxr_idx++;
 
-err_allocation:
-       while (tx) {
-               kfree(adapter->tx_ring[--tx]);
-               adapter->tx_ring[tx] = NULL;
+               /* push pointer to next ring */
+               ring++;
        }
 
-       while (rx) {
-               kfree(adapter->rx_ring[--rx]);
-               adapter->rx_ring[rx] = NULL;
-       }
-       return -ENOMEM;
+       return 0;
 }
 
 /**
- * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector
  * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
  *
- * Attempt to configure the interrupts using the best available
- * capabilities of the hardware and the kernel.
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
  **/
-static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
 {
-       struct net_device *netdev = adapter->netdev;
-       int err;
-       int vector, v_budget;
-
-       /* It's easy to be greedy for MSI-X vectors, but it really
-        * doesn't do us much good if we have a lot more vectors
-        * than CPU's.  So let's be conservative and only ask for
-        * (roughly) the same number of vectors as there are CPU's.
-        * The default is to use pairs of vectors.
-        */
-       v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
-       v_budget = min_t(int, v_budget, num_online_cpus());
-       v_budget += NON_Q_VECTORS;
-
-       /* A failure in MSI-X entry allocation isn't fatal, but it does
-        * mean we disable MSI-X capabilities of the adapter.
-        */
-       adapter->msix_entries = kcalloc(v_budget,
-                                       sizeof(struct msix_entry), GFP_KERNEL);
-       if (!adapter->msix_entries)
-               return -ENOMEM;
+       struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
+       struct ixgbevf_ring *ring;
 
-       for (vector = 0; vector < v_budget; vector++)
-               adapter->msix_entries[vector].entry = vector;
+       ixgbevf_for_each_ring(ring, q_vector->tx) {
+               if (ring_is_xdp(ring))
+                       adapter->xdp_ring[ring->queue_index] = NULL;
+               else
+                       adapter->tx_ring[ring->queue_index] = NULL;
+       }
 
-       err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
-       if (err)
-               return err;
+       ixgbevf_for_each_ring(ring, q_vector->rx)
+               adapter->rx_ring[ring->queue_index] = NULL;
 
-       err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
-       if (err)
-               return err;
+       adapter->q_vector[v_idx] = NULL;
+       netif_napi_del(&q_vector->napi);
 
-       return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+       /* ixgbevf_get_stats() might access the rings on this vector,
+        * we must wait a grace period before freeing it.
+        */
+       kfree_rcu(q_vector, rcu);
 }
 
 /**
@@ -2515,35 +2834,58 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
  **/
 static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
 {
-       int q_idx, num_q_vectors;
-       struct ixgbevf_q_vector *q_vector;
+       int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+       int rxr_remaining = adapter->num_rx_queues;
+       int txr_remaining = adapter->num_tx_queues;
+       int xdp_remaining = adapter->num_xdp_queues;
+       int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
+       int err;
+
+       if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) {
+               for (; rxr_remaining; v_idx++, q_vectors--) {
+                       int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
 
-       num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+                       err = ixgbevf_alloc_q_vector(adapter, v_idx,
+                                                    0, 0, 0, 0, rqpv, rxr_idx);
+                       if (err)
+                               goto err_out;
 
-       for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-               q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL);
-               if (!q_vector)
+                       /* update counts and index */
+                       rxr_remaining -= rqpv;
+                       rxr_idx += rqpv;
+               }
+       }
+
+       for (; q_vectors; v_idx++, q_vectors--) {
+               int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+               int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
+               int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors);
+
+               err = ixgbevf_alloc_q_vector(adapter, v_idx,
+                                            tqpv, txr_idx,
+                                            xqpv, xdp_idx,
+                                            rqpv, rxr_idx);
+
+               if (err)
                        goto err_out;
-               q_vector->adapter = adapter;
-               q_vector->v_idx = q_idx;
-               netif_napi_add(adapter->netdev, &q_vector->napi,
-                              ixgbevf_poll, 64);
-               adapter->q_vector[q_idx] = q_vector;
+
+               /* update counts and index */
+               rxr_remaining -= rqpv;
+               rxr_idx += rqpv;
+               txr_remaining -= tqpv;
+               txr_idx += tqpv;
+               xdp_remaining -= xqpv;
+               xdp_idx += xqpv;
        }
 
        return 0;
 
 err_out:
-       while (q_idx) {
-               q_idx--;
-               q_vector = adapter->q_vector[q_idx];
-#ifdef CONFIG_NET_RX_BUSY_POLL
-               napi_hash_del(&q_vector->napi);
-#endif
-               netif_napi_del(&q_vector->napi);
-               kfree(q_vector);
-               adapter->q_vector[q_idx] = NULL;
+       while (v_idx) {
+               v_idx--;
+               ixgbevf_free_q_vector(adapter, v_idx);
        }
+
        return -ENOMEM;
 }
 
@@ -2557,17 +2899,11 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
  **/
 static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
 {
-       int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-       for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-               struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx];
+       int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 
-               adapter->q_vector[q_idx] = NULL;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-               napi_hash_del(&q_vector->napi);
-#endif
-               netif_napi_del(&q_vector->napi);
-               kfree(q_vector);
+       while (q_vectors) {
+               q_vectors--;
+               ixgbevf_free_q_vector(adapter, q_vectors);
        }
 }
 
@@ -2611,21 +2947,14 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
                goto err_alloc_q_vectors;
        }
 
-       err = ixgbevf_alloc_queues(adapter);
-       if (err) {
-               pr_err("Unable to allocate memory for queues\n");
-               goto err_alloc_queues;
-       }
-
-       hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
-              (adapter->num_rx_queues > 1) ? "Enabled" :
-              "Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
+       hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count %u\n",
+              (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
+              adapter->num_rx_queues, adapter->num_tx_queues,
+              adapter->num_xdp_queues);
 
        set_bit(__IXGBEVF_DOWN, &adapter->state);
 
        return 0;
-err_alloc_queues:
-       ixgbevf_free_q_vectors(adapter);
 err_alloc_q_vectors:
        ixgbevf_reset_interrupt_capability(adapter);
 err_set_interrupt:
@@ -2641,18 +2970,8 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
  **/
 static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
 {
-       int i;
-
-       for (i = 0; i < adapter->num_tx_queues; i++) {
-               kfree(adapter->tx_ring[i]);
-               adapter->tx_ring[i] = NULL;
-       }
-       for (i = 0; i < adapter->num_rx_queues; i++) {
-               kfree(adapter->rx_ring[i]);
-               adapter->rx_ring[i] = NULL;
-       }
-
        adapter->num_tx_queues = 0;
+       adapter->num_xdp_queues = 0;
        adapter->num_rx_queues = 0;
 
        ixgbevf_free_q_vectors(adapter);
@@ -2860,6 +3179,8 @@ static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter)
        if (netif_carrier_ok(adapter->netdev)) {
                for (i = 0; i < adapter->num_tx_queues; i++)
                        set_check_for_tx_hang(adapter->tx_ring[i]);
+               for (i = 0; i < adapter->num_xdp_queues; i++)
+                       set_check_for_tx_hang(adapter->xdp_ring[i]);
        }
 
        /* get one bit for every active Tx/Rx interrupt vector */
@@ -3031,6 +3352,9 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter)
        for (i = 0; i < adapter->num_tx_queues; i++)
                if (adapter->tx_ring[i]->desc)
                        ixgbevf_free_tx_resources(adapter->tx_ring[i]);
+       for (i = 0; i < adapter->num_xdp_queues; i++)
+               if (adapter->xdp_ring[i]->desc)
+                       ixgbevf_free_tx_resources(adapter->xdp_ring[i]);
 }
 
 /**
@@ -3081,26 +3405,44 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring)
  **/
 static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
 {
-       int i, err = 0;
+       int i, j = 0, err = 0;
 
        for (i = 0; i < adapter->num_tx_queues; i++) {
                err = ixgbevf_setup_tx_resources(adapter->tx_ring[i]);
                if (!err)
                        continue;
                hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i);
+               goto err_setup_tx;
+       }
+
+       for (j = 0; j < adapter->num_xdp_queues; j++) {
+               err = ixgbevf_setup_tx_resources(adapter->xdp_ring[j]);
+               if (!err)
+                       continue;
+               hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j);
                break;
        }
 
+       return 0;
+err_setup_tx:
+       /* rewind the index freeing the rings as we go */
+       while (j--)
+               ixgbevf_free_tx_resources(adapter->xdp_ring[j]);
+       while (i--)
+               ixgbevf_free_tx_resources(adapter->tx_ring[i]);
+
        return err;
 }
 
 /**
  * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: board private structure
  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
  *
  * Returns 0 on success, negative on failure
  **/
-int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
+int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
+                              struct ixgbevf_ring *rx_ring)
 {
        int size;
 
@@ -3121,6 +3463,13 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
        if (!rx_ring->desc)
                goto err;
 
+       /* XDP RX-queue info */
+       if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
+                            rx_ring->queue_index) < 0)
+               goto err;
+
+       rx_ring->xdp_prog = adapter->xdp_prog;
+
        return 0;
 err:
        vfree(rx_ring->rx_buffer_info);
@@ -3144,12 +3493,18 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
        int i, err = 0;
 
        for (i = 0; i < adapter->num_rx_queues; i++) {
-               err = ixgbevf_setup_rx_resources(adapter->rx_ring[i]);
+               err = ixgbevf_setup_rx_resources(adapter, adapter->rx_ring[i]);
                if (!err)
                        continue;
                hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
-               break;
+               goto err_setup_rx;
        }
+
+       return 0;
+err_setup_rx:
+       /* rewind the index freeing the rings as we go */
+       while (i--)
+               ixgbevf_free_rx_resources(adapter->rx_ring[i]);
        return err;
 }
 
@@ -3163,6 +3518,8 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring)
 {
        ixgbevf_clean_rx_ring(rx_ring);
 
+       rx_ring->xdp_prog = NULL;
+       xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
        vfree(rx_ring->rx_buffer_info);
        rx_ring->rx_buffer_info = NULL;
 
@@ -3244,28 +3601,31 @@ int ixgbevf_open(struct net_device *netdev)
 
        ixgbevf_configure(adapter);
 
-       /* Map the Tx/Rx rings to the vectors we were allotted.
-        * if request_irq will be called in this function map_rings
-        * must be called *before* up_complete
-        */
-       ixgbevf_map_rings_to_vectors(adapter);
-
        err = ixgbevf_request_irq(adapter);
        if (err)
                goto err_req_irq;
 
+       /* Notify the stack of the actual queue counts. */
+       err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+       if (err)
+               goto err_set_queues;
+
+       err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+       if (err)
+               goto err_set_queues;
+
        ixgbevf_up_complete(adapter);
 
        return 0;
 
+err_set_queues:
+       ixgbevf_free_irq(adapter);
 err_req_irq:
-       ixgbevf_down(adapter);
-err_setup_rx:
        ixgbevf_free_all_rx_resources(adapter);
-err_setup_tx:
+err_setup_rx:
        ixgbevf_free_all_tx_resources(adapter);
+err_setup_tx:
        ixgbevf_reset(adapter);
-
 err_setup_reset:
 
        return err;
@@ -3707,11 +4067,10 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
        return __ixgbevf_maybe_stop_tx(tx_ring, size);
 }
 
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
+                                  struct ixgbevf_ring *tx_ring)
 {
-       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
        struct ixgbevf_tx_buffer *first;
-       struct ixgbevf_ring *tx_ring;
        int tso;
        u32 tx_flags = 0;
        u16 count = TXD_USE_COUNT(skb_headlen(skb));
@@ -3726,8 +4085,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
                return NETDEV_TX_OK;
        }
 
-       tx_ring = adapter->tx_ring[skb->queue_mapping];
-
        /* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
         *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
         *       + 2 desc gap to keep tail from touching head,
@@ -3780,6 +4137,29 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
        return NETDEV_TX_OK;
 }
 
+static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+       struct ixgbevf_ring *tx_ring;
+
+       if (skb->len <= 0) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+
+       /* The minimum packet size for olinfo paylen is 17 so pad the skb
+        * in order to meet this minimum size requirement.
+        */
+       if (skb->len < 17) {
+               if (skb_padto(skb, 17))
+                       return NETDEV_TX_OK;
+               skb->len = 17;
+       }
+
+       tx_ring = adapter->tx_ring[skb->queue_mapping];
+       return ixgbevf_xmit_frame_ring(skb, tx_ring);
+}
+
 /**
  * ixgbevf_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
@@ -3826,6 +4206,12 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
        int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
        int ret;
 
+       /* prevent MTU being changed to a size unsupported by XDP */
+       if (adapter->xdp_prog) {
+               dev_warn(&adapter->pdev->dev, "MTU cannot be changed while XDP program is loaded\n");
+               return -EPERM;
+       }
+
        spin_lock_bh(&adapter->mbx_lock);
        /* notify the PF of our intent to use this size of frame */
        ret = hw->mac.ops.set_rlpml(hw, max_frame);
@@ -3839,6 +4225,9 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
        /* must set new MTU before calling down or up */
        netdev->mtu = new_mtu;
 
+       if (netif_running(netdev))
+               ixgbevf_reinit_locked(adapter);
+
        return 0;
 }
 
@@ -3917,17 +4306,11 @@ static int ixgbevf_resume(struct pci_dev *pdev)
 
        rtnl_lock();
        err = ixgbevf_init_interrupt_scheme(adapter);
+       if (!err && netif_running(netdev))
+               err = ixgbevf_open(netdev);
        rtnl_unlock();
-       if (err) {
-               dev_err(&pdev->dev, "Cannot initialize interrupts\n");
+       if (err)
                return err;
-       }
-
-       if (netif_running(netdev)) {
-               err = ixgbevf_open(netdev);
-               if (err)
-                       return err;
-       }
 
        netif_device_attach(netdev);
 
@@ -3940,6 +4323,23 @@ static void ixgbevf_shutdown(struct pci_dev *pdev)
        ixgbevf_suspend(pdev, PMSG_SUSPEND);
 }
 
+static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats,
+                                     const struct ixgbevf_ring *ring)
+{
+       u64 bytes, packets;
+       unsigned int start;
+
+       if (ring) {
+               do {
+                       start = u64_stats_fetch_begin_irq(&ring->syncp);
+                       bytes = ring->stats.bytes;
+                       packets = ring->stats.packets;
+               } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+               stats->tx_bytes += bytes;
+               stats->tx_packets += packets;
+       }
+}
+
 static void ixgbevf_get_stats(struct net_device *netdev,
                              struct rtnl_link_stats64 *stats)
 {
@@ -3953,6 +4353,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 
        stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc;
 
+       rcu_read_lock();
        for (i = 0; i < adapter->num_rx_queues; i++) {
                ring = adapter->rx_ring[i];
                do {
@@ -3966,14 +4367,14 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 
        for (i = 0; i < adapter->num_tx_queues; i++) {
                ring = adapter->tx_ring[i];
-               do {
-                       start = u64_stats_fetch_begin_irq(&ring->syncp);
-                       bytes = ring->stats.bytes;
-                       packets = ring->stats.packets;
-               } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
-               stats->tx_bytes += bytes;
-               stats->tx_packets += packets;
+               ixgbevf_get_tx_ring_stats(stats, ring);
+       }
+
+       for (i = 0; i < adapter->num_xdp_queues; i++) {
+               ring = adapter->xdp_ring[i];
+               ixgbevf_get_tx_ring_stats(stats, ring);
        }
+       rcu_read_unlock();
 }
 
 #define IXGBEVF_MAX_MAC_HDR_LEN                127
@@ -4010,6 +4411,64 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev,
        return features;
 }
 
+static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
+{
+       int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+       struct ixgbevf_adapter *adapter = netdev_priv(dev);
+       struct bpf_prog *old_prog;
+
+       /* verify ixgbevf ring attributes are sufficient for XDP */
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct ixgbevf_ring *ring = adapter->rx_ring[i];
+
+               if (frame_size > ixgbevf_rx_bufsz(ring))
+                       return -EINVAL;
+       }
+
+       old_prog = xchg(&adapter->xdp_prog, prog);
+
+       /* If transitioning XDP modes reconfigure rings */
+       if (!!prog != !!old_prog) {
+               /* Hardware has to reinitialize queues and interrupts to
+                * match packet buffer alignment. Unfortunately, the
+                * hardware is not flexible enough to do this dynamically.
+                */
+               if (netif_running(dev))
+                       ixgbevf_close(dev);
+
+               ixgbevf_clear_interrupt_scheme(adapter);
+               ixgbevf_init_interrupt_scheme(adapter);
+
+               if (netif_running(dev))
+                       ixgbevf_open(dev);
+       } else {
+               for (i = 0; i < adapter->num_rx_queues; i++)
+                       xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
+       }
+
+       if (old_prog)
+               bpf_prog_put(old_prog);
+
+       return 0;
+}
+
+static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+       struct ixgbevf_adapter *adapter = netdev_priv(dev);
+
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return ixgbevf_xdp_setup(dev, xdp->prog);
+       case XDP_QUERY_PROG:
+               xdp->prog_attached = !!(adapter->xdp_prog);
+               xdp->prog_id = adapter->xdp_prog ?
+                              adapter->xdp_prog->aux->id : 0;
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
 static const struct net_device_ops ixgbevf_netdev_ops = {
        .ndo_open               = ixgbevf_open,
        .ndo_stop               = ixgbevf_close,
@@ -4026,6 +4485,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = {
        .ndo_poll_controller    = ixgbevf_netpoll,
 #endif
        .ndo_features_check     = ixgbevf_features_check,
+       .ndo_bpf                = ixgbevf_xdp,
 };
 
 static void ixgbevf_assign_netdev_ops(struct net_device *dev)
index bc0442acae787ff2c1c67d24be9cf8577afb4371..5ec947fe3d09bdc5aef630009f8c145aab4f477c 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
index 2764fd16261ffef7e97608347b77941b2a15c5d5..278f73980501f0f5a201333cd899900bac44536b 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
index c651fefcc3d22b78e3ec1123394cf5604c94e43e..194fbdaa4519945ed0b22529a2761df0b3f9fc9d 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
index 5a1668cdb461c7437cf510b5592f7ab54134b0f7..f8bc3d4a39ff8066a1cbba35a537799b4b3a58b8 100644 (file)
@@ -44,6 +44,7 @@
 #define MVPP2_RX_ATTR_FIFO_SIZE_REG(port)      (0x20 + 4 * (port))
 #define MVPP2_RX_MIN_PKT_SIZE_REG              0x60
 #define MVPP2_RX_FIFO_INIT_REG                 0x64
+#define MVPP22_TX_FIFO_THRESH_REG(port)                (0x8840 + 4 * (port))
 #define MVPP22_TX_FIFO_SIZE_REG(port)          (0x8860 + 4 * (port))
 
 /* RX DMA Top Registers */
 #define     MVPP2_RXQ_PACKET_OFFSET_MASK       0x70000000
 #define     MVPP2_RXQ_DISABLE_MASK             BIT(31)
 
+/* Top Registers */
+#define MVPP2_MH_REG(port)                     (0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED                     BIT(5)
+
 /* Parser Registers */
 #define MVPP2_PRS_INIT_LOOKUP_REG              0x1000
 #define     MVPP2_PRS_PORT_LU_MAX              0xf
 #define MVPP2_BM_BPPI_READ_PTR_REG(pool)       (0x6100 + ((pool) * 4))
 #define MVPP2_BM_BPPI_PTRS_NUM_REG(pool)       (0x6140 + ((pool) * 4))
 #define     MVPP2_BM_BPPI_PTR_NUM_MASK         0x7ff
+#define MVPP22_BM_POOL_PTRS_NUM_MASK           0xfff8
 #define     MVPP2_BM_BPPI_PREFETCH_FULL_MASK   BIT(16)
 #define MVPP2_BM_POOL_CTRL_REG(pool)           (0x6200 + ((pool) * 4))
 #define     MVPP2_BM_START_MASK                        BIT(0)
 #define MVPP2_ETH_TYPE_LEN             2
 #define MVPP2_PPPOE_HDR_SIZE           8
 #define MVPP2_VLAN_TAG_LEN             4
+#define MVPP2_VLAN_TAG_EDSA_LEN                8
 
 /* Lbtd 802.3 type */
 #define MVPP2_IP_LBDT_TYPE             0xfffa
 /* TX FIFO constants */
 #define MVPP22_TX_FIFO_DATA_SIZE_10KB          0xa
 #define MVPP22_TX_FIFO_DATA_SIZE_3KB           0x3
+#define MVPP2_TX_FIFO_THRESHOLD_MIN            256
+#define MVPP2_TX_FIFO_THRESHOLD_10KB   \
+       (MVPP22_TX_FIFO_DATA_SIZE_10KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
+#define MVPP2_TX_FIFO_THRESHOLD_3KB    \
+       (MVPP22_TX_FIFO_DATA_SIZE_3KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
 
 /* RX buffer constants */
 #define MVPP2_SKB_SHINFO_SIZE \
@@ -589,6 +601,9 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_PROTO_MASK      0xff
 #define MVPP2_PRS_TCAM_PROTO_MASK_L    0x3f
 #define MVPP2_PRS_DBL_VLANS_MAX                100
+#define MVPP2_PRS_CAST_MASK            BIT(0)
+#define MVPP2_PRS_MCAST_VAL            BIT(0)
+#define MVPP2_PRS_UCAST_VAL            0x0
 
 /* Tcam structure:
  * - lookup ID - 4 bits
@@ -609,35 +624,81 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_LU_BYTE                 20
 #define MVPP2_PRS_TCAM_EN_OFFS(offs)           ((offs) + 2)
 #define MVPP2_PRS_TCAM_INV_WORD                        5
+
+#define MVPP2_PRS_VID_TCAM_BYTE         2
+
+/* TCAM range for unicast and multicast filtering. We have 25 entries per port,
+ * with 4 dedicated to UC filtering and the rest to multicast filtering.
+ * Additionnally we reserve one entry for the broadcast address, and one for
+ * each port's own address.
+ */
+#define MVPP2_PRS_MAC_UC_MC_FILT_MAX   25
+#define MVPP2_PRS_MAC_RANGE_SIZE       80
+
+/* Number of entries per port dedicated to UC and MC filtering */
+#define MVPP2_PRS_MAC_UC_FILT_MAX      4
+#define MVPP2_PRS_MAC_MC_FILT_MAX      (MVPP2_PRS_MAC_UC_MC_FILT_MAX - \
+                                        MVPP2_PRS_MAC_UC_FILT_MAX)
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX                11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE 33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY   (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY  (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
 /* Tcam entries ID */
 #define MVPP2_PE_DROP_ALL              0
 #define MVPP2_PE_FIRST_FREE_TID                1
-#define MVPP2_PE_LAST_FREE_TID         (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* MAC filtering range */
+#define MVPP2_PE_MAC_RANGE_END         (MVPP2_PE_VID_FILT_RANGE_START - 1)
+#define MVPP2_PE_MAC_RANGE_START       (MVPP2_PE_MAC_RANGE_END - \
+                                               MVPP2_PRS_MAC_RANGE_SIZE + 1)
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END     (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START   (MVPP2_PE_VID_FILT_RANGE_END - \
+                                        MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID          (MVPP2_PE_VID_FILT_RANGE_START - 1)
 #define MVPP2_PE_IP6_EXT_PROTO_UN      (MVPP2_PRS_TCAM_SRAM_SIZE - 30)
-#define MVPP2_PE_MAC_MC_IP6            (MVPP2_PRS_TCAM_SRAM_SIZE - 29)
-#define MVPP2_PE_IP6_ADDR_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 28)
-#define MVPP2_PE_IP4_ADDR_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 27)
-#define MVPP2_PE_LAST_DEFAULT_FLOW     (MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW    (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED           (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED         (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED            (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED          (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED     (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED   (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED      (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED    (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT            (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT           (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
-#define MVPP2_PE_VLAN_DBL              (MVPP2_PRS_TCAM_SRAM_SIZE - 5)
-#define MVPP2_PE_VLAN_NONE             (MVPP2_PRS_TCAM_SRAM_SIZE - 4)
-#define MVPP2_PE_MAC_MC_ALL            (MVPP2_PRS_TCAM_SRAM_SIZE - 3)
-#define MVPP2_PE_MAC_PROMISCUOUS       (MVPP2_PRS_TCAM_SRAM_SIZE - 2)
+#define MVPP2_PE_IP6_ADDR_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 29)
+#define MVPP2_PE_IP4_ADDR_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 28)
+#define MVPP2_PE_LAST_DEFAULT_FLOW     (MVPP2_PRS_TCAM_SRAM_SIZE - 27)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW    (MVPP2_PRS_TCAM_SRAM_SIZE - 22)
+#define MVPP2_PE_EDSA_TAGGED           (MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_UNTAGGED         (MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_DSA_TAGGED            (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_UNTAGGED          (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED     (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED   (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_DSA_TAGGED      (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED    (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_MH_DEFAULT            (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_DSA_DEFAULT           (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_IP6_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP4_PROTO_UN          (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_ETH_TYPE_UN           (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_VID_FLTR_DEFAULT      (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VLAN_DBL              (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_VLAN_NONE             (MVPP2_PRS_TCAM_SRAM_SIZE - 5)
+/* reserved */
+#define MVPP2_PE_MAC_MC_PROMISCUOUS    (MVPP2_PRS_TCAM_SRAM_SIZE - 3)
+#define MVPP2_PE_MAC_UC_PROMISCUOUS    (MVPP2_PRS_TCAM_SRAM_SIZE - 2)
 #define MVPP2_PE_MAC_NON_PROMISCUOUS   (MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 
+#define MVPP2_PRS_VID_PORT_FIRST(port) (MVPP2_PE_VID_FILT_RANGE_START + \
+                                        ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port)  (MVPP2_PRS_VID_PORT_FIRST(port) \
+                                        + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port)  (MVPP2_PRS_VID_PORT_FIRST(port) \
+                                        + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
 /* Sram structure
  * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
  */
@@ -725,6 +786,7 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT                BIT(4)
 #define MVPP2_PRS_SINGLE_VLAN_AI               0
 #define MVPP2_PRS_DBL_VLAN_AI_BIT              BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT              BIT(0)
 
 /* DSA/EDSA type */
 #define MVPP2_PRS_TAGGED               true
@@ -747,6 +809,7 @@ enum mvpp2_prs_lookup {
        MVPP2_PRS_LU_MAC,
        MVPP2_PRS_LU_DSA,
        MVPP2_PRS_LU_VLAN,
+       MVPP2_PRS_LU_VID,
        MVPP2_PRS_LU_L2,
        MVPP2_PRS_LU_PPPOE,
        MVPP2_PRS_LU_IP4,
@@ -755,6 +818,12 @@ enum mvpp2_prs_lookup {
        MVPP2_PRS_LU_LAST,
 };
 
+/* L2 cast enum */
+enum mvpp2_prs_l2_cast {
+       MVPP2_PRS_L2_UNI_CAST,
+       MVPP2_PRS_L2_MULTI_CAST,
+};
+
 /* L3 cast enum */
 enum mvpp2_prs_l3_cast {
        MVPP2_PRS_L3_UNI_CAST,
@@ -772,23 +841,26 @@ enum mvpp2_prs_l3_cast {
 #define MVPP22_RSS_TABLE_ENTRIES       32
 
 /* BM constants */
-#define MVPP2_BM_POOLS_NUM             8
+#define MVPP2_BM_JUMBO_BUF_NUM         512
 #define MVPP2_BM_LONG_BUF_NUM          1024
 #define MVPP2_BM_SHORT_BUF_NUM         2048
 #define MVPP2_BM_POOL_SIZE_MAX         (16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
 #define MVPP2_BM_POOL_PTR_ALIGN                128
-#define MVPP2_BM_SWF_LONG_POOL(port)   ((port > 2) ? 2 : port)
-#define MVPP2_BM_SWF_SHORT_POOL                3
 
 /* BM cookie (32 bits) definition */
 #define MVPP2_BM_COOKIE_POOL_OFFS      8
 #define MVPP2_BM_COOKIE_CPU_OFFS       24
 
+#define MVPP2_BM_SHORT_FRAME_SIZE              512
+#define MVPP2_BM_LONG_FRAME_SIZE               2048
+#define MVPP2_BM_JUMBO_FRAME_SIZE              10240
 /* BM short pool packet size
  * These value assure that for SWF the total number
  * of bytes allocated for each buffer will be 512
  */
-#define MVPP2_BM_SHORT_PKT_SIZE                MVPP2_RX_MAX_PKT_SIZE(512)
+#define MVPP2_BM_SHORT_PKT_SIZE        MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_SHORT_FRAME_SIZE)
+#define MVPP2_BM_LONG_PKT_SIZE MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_LONG_FRAME_SIZE)
+#define MVPP2_BM_JUMBO_PKT_SIZE        MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_JUMBO_FRAME_SIZE)
 
 #define MVPP21_ADDR_SPACE_SZ           0
 #define MVPP22_ADDR_SPACE_SZ           SZ_64K
@@ -796,12 +868,18 @@ enum mvpp2_prs_l3_cast {
 #define MVPP2_MAX_THREADS              8
 #define MVPP2_MAX_QVECS                        MVPP2_MAX_THREADS
 
-enum mvpp2_bm_type {
-       MVPP2_BM_FREE,
-       MVPP2_BM_SWF_LONG,
-       MVPP2_BM_SWF_SHORT
+enum mvpp2_bm_pool_log_num {
+       MVPP2_BM_SHORT,
+       MVPP2_BM_LONG,
+       MVPP2_BM_JUMBO,
+       MVPP2_BM_POOLS_NUM
 };
 
+static struct {
+       int pkt_size;
+       int buf_num;
+} mvpp2_pools[MVPP2_BM_POOLS_NUM];
+
 /* GMAC MIB Counters register definitions */
 #define MVPP21_MIB_COUNTERS_OFFSET             0x1000
 #define MVPP21_MIB_COUNTERS_PORT_SZ            0x400
@@ -1230,7 +1308,6 @@ struct mvpp2_cls_lookup_entry {
 struct mvpp2_bm_pool {
        /* Pool number in the range 0-7 */
        int id;
-       enum mvpp2_bm_type type;
 
        /* Buffer Pointers Pool External (BPPE) size */
        int size;
@@ -1662,6 +1739,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
        mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
 }
 
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+                               unsigned short vid)
+{
+       mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+       mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
 /* Set bits in sram sw entry */
 static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
                                    int val)
@@ -1914,78 +1999,43 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add)
        mvpp2_prs_hw_write(priv, &pe);
 }
 
-/* Set port to promiscuous mode */
-static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, bool add)
+/* Set port to unicast or multicast promiscuous mode */
+static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port,
+                                     enum mvpp2_prs_l2_cast l2_cast, bool add)
 {
        struct mvpp2_prs_entry pe;
+       unsigned char cast_match;
+       unsigned int ri;
+       int tid;
 
-       /* Promiscuous mode - Accept unknown packets */
-
-       if (priv->prs_shadow[MVPP2_PE_MAC_PROMISCUOUS].valid) {
-               /* Entry exist - update port only */
-               pe.index = MVPP2_PE_MAC_PROMISCUOUS;
-               mvpp2_prs_hw_read(priv, &pe);
+       if (l2_cast == MVPP2_PRS_L2_UNI_CAST) {
+               cast_match = MVPP2_PRS_UCAST_VAL;
+               tid = MVPP2_PE_MAC_UC_PROMISCUOUS;
+               ri = MVPP2_PRS_RI_L2_UCAST;
        } else {
-               /* Entry doesn't exist - create new */
-               memset(&pe, 0, sizeof(pe));
-               mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
-               pe.index = MVPP2_PE_MAC_PROMISCUOUS;
-
-               /* Continue - set next lookup */
-               mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
-
-               /* Set result info bits */
-               mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_UCAST,
-                                        MVPP2_PRS_RI_L2_CAST_MASK);
-
-               /* Shift to ethertype */
-               mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
-                                        MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
-               /* Mask all ports */
-               mvpp2_prs_tcam_port_map_set(&pe, 0);
-
-               /* Update shadow table */
-               mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+               cast_match = MVPP2_PRS_MCAST_VAL;
+               tid = MVPP2_PE_MAC_MC_PROMISCUOUS;
+               ri = MVPP2_PRS_RI_L2_MCAST;
        }
 
-       /* Update port mask */
-       mvpp2_prs_tcam_port_set(&pe, port, add);
-
-       mvpp2_prs_hw_write(priv, &pe);
-}
-
-/* Accept multicast */
-static void mvpp2_prs_mac_multi_set(struct mvpp2 *priv, int port, int index,
-                                   bool add)
-{
-       struct mvpp2_prs_entry pe;
-       unsigned char da_mc;
-
-       /* Ethernet multicast address first byte is
-        * 0x01 for IPv4 and 0x33 for IPv6
-        */
-       da_mc = (index == MVPP2_PE_MAC_MC_ALL) ? 0x01 : 0x33;
-
-       if (priv->prs_shadow[index].valid) {
-               /* Entry exist - update port only */
-               pe.index = index;
+       /* promiscuous mode - Accept unknown unicast or multicast packets */
+       if (priv->prs_shadow[tid].valid) {
+               pe.index = tid;
                mvpp2_prs_hw_read(priv, &pe);
        } else {
-               /* Entry doesn't exist - create new */
                memset(&pe, 0, sizeof(pe));
                mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
-               pe.index = index;
+               pe.index = tid;
 
                /* Continue - set next lookup */
                mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
 
                /* Set result info bits */
-               mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_MCAST,
-                                        MVPP2_PRS_RI_L2_CAST_MASK);
+               mvpp2_prs_sram_ri_update(&pe, ri, MVPP2_PRS_RI_L2_CAST_MASK);
 
-               /* Update tcam entry data first byte */
-               mvpp2_prs_tcam_data_byte_set(&pe, 0, da_mc, 0xff);
+               /* Match UC or MC addresses */
+               mvpp2_prs_tcam_data_byte_set(&pe, 0, cast_match,
+                                            MVPP2_PRS_CAST_MASK);
 
                /* Shift to ethertype */
                mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
@@ -2029,24 +2079,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
                mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
                pe.index = tid;
 
-               /* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
-               mvpp2_prs_sram_shift_set(&pe, shift,
-                                        MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
                /* Update shadow table */
                mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
 
                if (tagged) {
                        /* Set tagged bit in DSA tag */
                        mvpp2_prs_tcam_data_byte_set(&pe, 0,
-                                                    MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
-                                                    MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
-                       /* Clear all ai bits for next iteration */
-                       mvpp2_prs_sram_ai_update(&pe, 0,
-                                                MVPP2_PRS_SRAM_AI_MASK);
-                       /* If packet is tagged continue check vlans */
-                       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+                                            MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+                                            MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+                       /* Set ai bits for next iteration */
+                       if (extend)
+                               mvpp2_prs_sram_ai_update(&pe, 1,
+                                                       MVPP2_PRS_SRAM_AI_MASK);
+                       else
+                               mvpp2_prs_sram_ai_update(&pe, 0,
+                                                       MVPP2_PRS_SRAM_AI_MASK);
+
+                       /* If packet is tagged continue check vid filtering */
+                       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
                } else {
+                       /* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+                       mvpp2_prs_sram_shift_set(&pe, shift,
+                                       MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
                        /* Set result info bits to 'no vlans' */
                        mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
                                                 MVPP2_PRS_RI_VLAN_MASK);
@@ -2231,10 +2287,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 
                mvpp2_prs_match_etype(pe, 0, tpid);
 
-               mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
-               /* Shift 4 bytes - skip 1 vlan tag */
-               mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
-                                        MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+               /* VLAN tag detected, proceed with VID filtering */
+               mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+
                /* Clear all ai bits for next iteration */
                mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 
@@ -2375,8 +2430,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
                mvpp2_prs_match_etype(pe, 4, tpid2);
 
                mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
-               /* Shift 8 bytes - skip 2 vlan tags */
-               mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+               /* Shift 4 bytes - skip outer vlan tag */
+               mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
                                         MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
                mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
                                         MVPP2_PRS_RI_VLAN_MASK);
@@ -2694,11 +2749,10 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv)
        mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
        mvpp2_prs_hw_write(priv, &pe);
 
-       /* place holders only - no ports */
+       /* Create dummy entries for drop all and promiscuous modes */
        mvpp2_prs_mac_drop_all_set(priv, 0, false);
-       mvpp2_prs_mac_promisc_set(priv, 0, false);
-       mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_ALL, false);
-       mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_IP6, false);
+       mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false);
+       mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false);
 }
 
 /* Set default entries for various types of dsa packets */
@@ -2755,6 +2809,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
        mvpp2_prs_hw_write(priv, &pe);
 }
 
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+       struct mvpp2_prs_entry pe;
+
+       memset(&pe, 0, sizeof(pe));
+
+       /* Set default vid entry */
+       pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+       mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+       /* Skip VLAN header - Set offset to 4 bytes */
+       mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+                                MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Unmask all ports */
+       mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+       /* Update shadow table and hw entry */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+
+       /* Set default vid entry for extended DSA*/
+       memset(&pe, 0, sizeof(pe));
+
+       /* Set default vid entry */
+       pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+       mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+                                MVPP2_PRS_EDSA_VID_AI_BIT);
+
+       /* Skip VLAN header - Set offset to 8 bytes */
+       mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+                                MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Unmask all ports */
+       mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+       /* Update shadow table and hw entry */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Match basic ethertypes */
 static int mvpp2_prs_etype_init(struct mvpp2 *priv)
 {
@@ -3023,7 +3133,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
        mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
        pe.index = MVPP2_PE_VLAN_DBL;
 
-       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
        /* Clear ai for next iterations */
        mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
        mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
@@ -3386,6 +3497,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
        return 0;
 }
 
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+                                   u16 mask)
+{
+       unsigned char byte[2], enable[2];
+       struct mvpp2_prs_entry pe;
+       u16 rvid, rmask;
+       int tid;
+
+       /* Go through the all entries with MVPP2_PRS_LU_VID */
+       for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+            tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+               if (!priv->prs_shadow[tid].valid ||
+                   priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+                       continue;
+
+               pe.index = tid;
+
+               mvpp2_prs_hw_read(priv, &pe);
+               mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+               mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+               rvid = ((byte[0] & 0xf) << 8) + byte[1];
+               rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+               if (rvid != vid || rmask != mask)
+                       continue;
+
+               return tid;
+       }
+
+       return 0;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+       unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+                                port->id * MVPP2_PRS_VLAN_FILT_MAX;
+       unsigned int mask = 0xfff, reg_val, shift;
+       struct mvpp2 *priv = port->priv;
+       struct mvpp2_prs_entry pe;
+       int tid;
+
+       /* Scan TCAM and see if entry with this <vid,port> already exist */
+       tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+       reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+       if (reg_val & MVPP2_DSA_EXTENDED)
+               shift = MVPP2_VLAN_TAG_EDSA_LEN;
+       else
+               shift = MVPP2_VLAN_TAG_LEN;
+
+       /* No such entry */
+       if (!tid) {
+               memset(&pe, 0, sizeof(pe));
+
+               /* Go through all entries from first to last in vlan range */
+               tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+                                               vid_start +
+                                               MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+               /* There isn't room for a new VID filter */
+               if (tid < 0)
+                       return tid;
+
+               mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+               pe.index = tid;
+
+               /* Mask all ports */
+               mvpp2_prs_tcam_port_map_set(&pe, 0);
+       } else {
+               mvpp2_prs_hw_read(priv, &pe);
+       }
+
+       /* Enable the current port */
+       mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+       /* Continue - set next lookup */
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Skip VLAN header - Set offset to 4 or 8 bytes */
+       mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Set match on VID */
+       mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       /* Update shadow table */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+
+       return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+       struct mvpp2 *priv = port->priv;
+       int tid;
+
+       /* Scan TCAM and see if entry with this <vid,port> already exist */
+       tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+       /* No such entry */
+       if (tid)
+               return;
+
+       mvpp2_prs_hw_inv(priv, tid);
+       priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+       struct mvpp2 *priv = port->priv;
+       int tid;
+
+       for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+            tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+               if (priv->prs_shadow[tid].valid)
+                       mvpp2_prs_vid_entry_remove(port, tid);
+       }
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+       unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+       struct mvpp2 *priv = port->priv;
+
+       /* Invalidate the guard entry */
+       mvpp2_prs_hw_inv(priv, tid);
+
+       priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+       unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+       struct mvpp2 *priv = port->priv;
+       unsigned int reg_val, shift;
+       struct mvpp2_prs_entry pe;
+
+       if (priv->prs_shadow[tid].valid)
+               return;
+
+       memset(&pe, 0, sizeof(pe));
+
+       pe.index = tid;
+
+       reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+       if (reg_val & MVPP2_DSA_EXTENDED)
+               shift = MVPP2_VLAN_TAG_EDSA_LEN;
+       else
+               shift = MVPP2_VLAN_TAG_LEN;
+
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+       /* Mask all ports */
+       mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+       /* Update port mask */
+       mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+       /* Continue - set next lookup */
+       mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+       /* Skip VLAN header - Set offset to 4 or 8 bytes */
+       mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+       /* Drop VLAN packets that don't belong to any VIDs on this port */
+       mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+                                MVPP2_PRS_RI_DROP_MASK);
+
+       /* Clear all ai bits for next iteration */
+       mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+       /* Update shadow table */
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+       mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Parser default initialization */
 static int mvpp2_prs_default_init(struct platform_device *pdev,
                                  struct mvpp2 *priv)
@@ -3429,6 +3726,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev,
 
        mvpp2_prs_dsa_init(priv);
 
+       mvpp2_prs_vid_init(priv);
+
        err = mvpp2_prs_etype_init(priv);
        if (err)
                return err;
@@ -3485,8 +3784,8 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
        mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
 
        /* Go through the all entires with MVPP2_PRS_LU_MAC */
-       for (tid = MVPP2_PE_FIRST_FREE_TID;
-            tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+       for (tid = MVPP2_PE_MAC_RANGE_START;
+            tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
                unsigned int entry_pmap;
 
                if (!priv->prs_shadow[tid].valid ||
@@ -3508,16 +3807,17 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 }
 
 /* Update parser's mac da entry */
-static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
-                                  const u8 *da, bool add)
+static int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da,
+                                  bool add)
 {
-       struct mvpp2_prs_entry *pe;
-       unsigned int pmap, len, ri;
        unsigned char mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+       struct mvpp2 *priv = port->priv;
+       unsigned int pmap, len, ri;
+       struct mvpp2_prs_entry *pe;
        int tid;
 
        /* Scan TCAM and see if entry with this <MAC DA, port> already exist */
-       pe = mvpp2_prs_mac_da_range_find(priv, (1 << port), da, mask,
+       pe = mvpp2_prs_mac_da_range_find(priv, BIT(port->id), da, mask,
                                         MVPP2_PRS_UDF_MAC_DEF);
 
        /* No such entry */
@@ -3526,18 +3826,10 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
                        return 0;
 
                /* Create new TCAM entry */
-               /* Find first range mac entry*/
-               for (tid = MVPP2_PE_FIRST_FREE_TID;
-                    tid <= MVPP2_PE_LAST_FREE_TID; tid++)
-                       if (priv->prs_shadow[tid].valid &&
-                           (priv->prs_shadow[tid].lu == MVPP2_PRS_LU_MAC) &&
-                           (priv->prs_shadow[tid].udf ==
-                                                      MVPP2_PRS_UDF_MAC_RANGE))
-                               break;
-
                /* Go through the all entries from first to last */
-               tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
-                                               tid - 1);
+               tid = mvpp2_prs_tcam_first_free(priv,
+                                               MVPP2_PE_MAC_RANGE_START,
+                                               MVPP2_PE_MAC_RANGE_END);
                if (tid < 0)
                        return tid;
 
@@ -3552,7 +3844,7 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
        }
 
        /* Update port mask */
-       mvpp2_prs_tcam_port_set(pe, port, add);
+       mvpp2_prs_tcam_port_set(pe, port->id, add);
 
        /* Invalidate the entry if no ports are left enabled */
        pmap = mvpp2_prs_tcam_port_map_get(pe);
@@ -3576,12 +3868,16 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
                mvpp2_prs_tcam_data_byte_set(pe, len, da[len], 0xff);
 
        /* Set result info bits */
-       if (is_broadcast_ether_addr(da))
+       if (is_broadcast_ether_addr(da)) {
                ri = MVPP2_PRS_RI_L2_BCAST;
-       else if (is_multicast_ether_addr(da))
+       } else if (is_multicast_ether_addr(da)) {
                ri = MVPP2_PRS_RI_L2_MCAST;
-       else
-               ri = MVPP2_PRS_RI_L2_UCAST | MVPP2_PRS_RI_MAC_ME_MASK;
+       } else {
+               ri = MVPP2_PRS_RI_L2_UCAST;
+
+               if (ether_addr_equal(da, port->dev->dev_addr))
+                       ri |= MVPP2_PRS_RI_MAC_ME_MASK;
+       }
 
        mvpp2_prs_sram_ri_update(pe, ri, MVPP2_PRS_RI_L2_CAST_MASK |
                                 MVPP2_PRS_RI_MAC_ME_MASK);
@@ -3608,13 +3904,12 @@ static int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
        int err;
 
        /* Remove old parser entry */
-       err = mvpp2_prs_mac_da_accept(port->priv, port->id, dev->dev_addr,
-                                     false);
+       err = mvpp2_prs_mac_da_accept(port, dev->dev_addr, false);
        if (err)
                return err;
 
        /* Add new parser entry */
-       err = mvpp2_prs_mac_da_accept(port->priv, port->id, da, true);
+       err = mvpp2_prs_mac_da_accept(port, da, true);
        if (err)
                return err;
 
@@ -3624,14 +3919,15 @@ static int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
        return 0;
 }
 
-/* Delete all port's multicast simple (not range) entries */
-static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
+static void mvpp2_prs_mac_del_all(struct mvpp2_port *port)
 {
+       struct mvpp2 *priv = port->priv;
        struct mvpp2_prs_entry pe;
+       unsigned long pmap;
        int index, tid;
 
-       for (tid = MVPP2_PE_FIRST_FREE_TID;
-            tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+       for (tid = MVPP2_PE_MAC_RANGE_START;
+            tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
                unsigned char da[ETH_ALEN], da_mask[ETH_ALEN];
 
                if (!priv->prs_shadow[tid].valid ||
@@ -3639,18 +3935,29 @@ static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
                    (priv->prs_shadow[tid].udf != MVPP2_PRS_UDF_MAC_DEF))
                        continue;
 
-               /* Only simple mac entries */
                pe.index = tid;
                mvpp2_prs_hw_read(priv, &pe);
 
+               pmap = mvpp2_prs_tcam_port_map_get(&pe);
+
+               /* We only want entries active on this port */
+               if (!test_bit(port->id, &pmap))
+                       continue;
+
                /* Read mac addr from entry */
                for (index = 0; index < ETH_ALEN; index++)
                        mvpp2_prs_tcam_data_byte_get(&pe, index, &da[index],
                                                     &da_mask[index]);
 
-               if (is_multicast_ether_addr(da) && !is_broadcast_ether_addr(da))
-                       /* Delete this entry */
-                       mvpp2_prs_mac_da_accept(priv, port, da, false);
+               /* Special cases : Don't remove broadcast and port's own
+                * address
+                */
+               if (is_broadcast_ether_addr(da) ||
+                   ether_addr_equal(da, port->dev->dev_addr))
+                       continue;
+
+               /* Remove entry from TCAM */
+               mvpp2_prs_mac_da_accept(port, da, false);
        }
 }
 
@@ -3901,7 +4208,6 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
        val |= MVPP2_BM_START_MASK;
        mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
 
-       bm_pool->type = MVPP2_BM_FREE;
        bm_pool->size = size;
        bm_pool->pkt_size = 0;
        bm_pool->buf_num = 0;
@@ -3954,11 +4260,17 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
 
 /* Free all buffers from the pool */
 static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
-                              struct mvpp2_bm_pool *bm_pool)
+                              struct mvpp2_bm_pool *bm_pool, int buf_num)
 {
        int i;
 
-       for (i = 0; i < bm_pool->buf_num; i++) {
+       if (buf_num > bm_pool->buf_num) {
+               WARN(1, "Pool does not have so many bufs pool(%d) bufs(%d)\n",
+                    bm_pool->id, buf_num);
+               buf_num = bm_pool->buf_num;
+       }
+
+       for (i = 0; i < buf_num; i++) {
                dma_addr_t buf_dma_addr;
                phys_addr_t buf_phys_addr;
                void *data;
@@ -3980,16 +4292,39 @@ static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
        bm_pool->buf_num -= i;
 }
 
+/* Check number of buffers in BM pool */
+static int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool)
+{
+       int buf_num = 0;
+
+       buf_num += mvpp2_read(priv, MVPP2_BM_POOL_PTRS_NUM_REG(bm_pool->id)) &
+                                   MVPP22_BM_POOL_PTRS_NUM_MASK;
+       buf_num += mvpp2_read(priv, MVPP2_BM_BPPI_PTRS_NUM_REG(bm_pool->id)) &
+                                   MVPP2_BM_BPPI_PTR_NUM_MASK;
+
+       /* HW has one buffer ready which is not reflected in the counters */
+       if (buf_num)
+               buf_num += 1;
+
+       return buf_num;
+}
+
 /* Cleanup pool */
 static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
                                 struct mvpp2 *priv,
                                 struct mvpp2_bm_pool *bm_pool)
 {
+       int buf_num;
        u32 val;
 
-       mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool);
-       if (bm_pool->buf_num) {
-               WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
+       buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
+       mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool, buf_num);
+
+       /* Check buffer counters after free */
+       buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
+       if (buf_num) {
+               WARN(1, "cannot free all buffers in pool %d, buf_num left %d\n",
+                    bm_pool->id, bm_pool->buf_num);
                return 0;
        }
 
@@ -4051,6 +4386,21 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
        return 0;
 }
 
+static void mvpp2_setup_bm_pool(void)
+{
+       /* Short pool */
+       mvpp2_pools[MVPP2_BM_SHORT].buf_num  = MVPP2_BM_SHORT_BUF_NUM;
+       mvpp2_pools[MVPP2_BM_SHORT].pkt_size = MVPP2_BM_SHORT_PKT_SIZE;
+
+       /* Long pool */
+       mvpp2_pools[MVPP2_BM_LONG].buf_num  = MVPP2_BM_LONG_BUF_NUM;
+       mvpp2_pools[MVPP2_BM_LONG].pkt_size = MVPP2_BM_LONG_PKT_SIZE;
+
+       /* Jumbo pool */
+       mvpp2_pools[MVPP2_BM_JUMBO].buf_num  = MVPP2_BM_JUMBO_BUF_NUM;
+       mvpp2_pools[MVPP2_BM_JUMBO].pkt_size = MVPP2_BM_JUMBO_PKT_SIZE;
+}
+
 /* Attach long pool to rxq */
 static void mvpp2_rxq_long_pool_set(struct mvpp2_port *port,
                                    int lrxq, int long_pool)
@@ -4189,13 +4539,11 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
        bm_pool->buf_num += i;
 
        netdev_dbg(port->dev,
-                  "%s pool %d: pkt_size=%4d, buf_size=%4d, total_size=%4d\n",
-                  bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+                  "pool %d: pkt_size=%4d, buf_size=%4d, total_size=%4d\n",
                   bm_pool->id, bm_pool->pkt_size, buf_size, total_size);
 
        netdev_dbg(port->dev,
-                  "%s pool %d: %d of %d buffers added\n",
-                  bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+                  "pool %d: %d of %d buffers added\n",
                   bm_pool->id, i, buf_num);
        return i;
 }
@@ -4204,25 +4552,20 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
  * pool pointer on success
  */
 static struct mvpp2_bm_pool *
-mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
-                 int pkt_size)
+mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
 {
        struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
        int num;
 
-       if (new_pool->type != MVPP2_BM_FREE && new_pool->type != type) {
-               netdev_err(port->dev, "mixing pool types is forbidden\n");
+       if (pool >= MVPP2_BM_POOLS_NUM) {
+               netdev_err(port->dev, "Invalid pool %d\n", pool);
                return NULL;
        }
 
-       if (new_pool->type == MVPP2_BM_FREE)
-               new_pool->type = type;
-
        /* Allocate buffers in case BM pool is used as long pool, but packet
         * size doesn't match MTU or BM pool hasn't being used yet
         */
-       if (((type == MVPP2_BM_SWF_LONG) && (pkt_size > new_pool->pkt_size)) ||
-           (new_pool->pkt_size == 0)) {
+       if (new_pool->pkt_size == 0) {
                int pkts_num;
 
                /* Set default buffer number or free all the buffers in case
@@ -4230,12 +4573,10 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
                 */
                pkts_num = new_pool->buf_num;
                if (pkts_num == 0)
-                       pkts_num = type == MVPP2_BM_SWF_LONG ?
-                                  MVPP2_BM_LONG_BUF_NUM :
-                                  MVPP2_BM_SHORT_BUF_NUM;
+                       pkts_num = mvpp2_pools[pool].buf_num;
                else
                        mvpp2_bm_bufs_free(port->dev->dev.parent,
-                                          port->priv, new_pool);
+                                          port->priv, new_pool, pkts_num);
 
                new_pool->pkt_size = pkt_size;
                new_pool->frag_size =
@@ -4261,16 +4602,28 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 {
        int rxq;
+       enum mvpp2_bm_pool_log_num long_log_pool, short_log_pool;
+
+       /* If port pkt_size is higher than 1518B:
+        * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
+        * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
+        */
+       if (port->pkt_size > MVPP2_BM_LONG_PKT_SIZE) {
+               long_log_pool = MVPP2_BM_JUMBO;
+               short_log_pool = MVPP2_BM_LONG;
+       } else {
+               long_log_pool = MVPP2_BM_LONG;
+               short_log_pool = MVPP2_BM_SHORT;
+       }
 
        if (!port->pool_long) {
                port->pool_long =
-                      mvpp2_bm_pool_use(port, MVPP2_BM_SWF_LONG_POOL(port->id),
-                                        MVPP2_BM_SWF_LONG,
-                                        port->pkt_size);
+                       mvpp2_bm_pool_use(port, long_log_pool,
+                                         mvpp2_pools[long_log_pool].pkt_size);
                if (!port->pool_long)
                        return -ENOMEM;
 
-               port->pool_long->port_map |= (1 << port->id);
+               port->pool_long->port_map |= BIT(port->id);
 
                for (rxq = 0; rxq < port->nrxqs; rxq++)
                        mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id);
@@ -4278,13 +4631,12 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
        if (!port->pool_short) {
                port->pool_short =
-                       mvpp2_bm_pool_use(port, MVPP2_BM_SWF_SHORT_POOL,
-                                         MVPP2_BM_SWF_SHORT,
-                                         MVPP2_BM_SHORT_PKT_SIZE);
+                       mvpp2_bm_pool_use(port, short_log_pool,
+                                         mvpp2_pools[short_log_pool].pkt_size);
                if (!port->pool_short)
                        return -ENOMEM;
 
-               port->pool_short->port_map |= (1 << port->id);
+               port->pool_short->port_map |= BIT(port->id);
 
                for (rxq = 0; rxq < port->nrxqs; rxq++)
                        mvpp2_rxq_short_pool_set(port, rxq,
@@ -4297,30 +4649,49 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 {
        struct mvpp2_port *port = netdev_priv(dev);
-       struct mvpp2_bm_pool *port_pool = port->pool_long;
-       int num, pkts_num = port_pool->buf_num;
+       enum mvpp2_bm_pool_log_num new_long_pool;
        int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
 
-       /* Update BM pool with new buffer size */
-       mvpp2_bm_bufs_free(dev->dev.parent, port->priv, port_pool);
-       if (port_pool->buf_num) {
-               WARN(1, "cannot free all buffers in pool %d\n", port_pool->id);
-               return -EIO;
-       }
-
-       port_pool->pkt_size = pkt_size;
-       port_pool->frag_size = SKB_DATA_ALIGN(MVPP2_RX_BUF_SIZE(pkt_size)) +
-               MVPP2_SKB_SHINFO_SIZE;
-       num = mvpp2_bm_bufs_add(port, port_pool, pkts_num);
-       if (num != pkts_num) {
-               WARN(1, "pool %d: %d of %d allocated\n",
-                    port_pool->id, num, pkts_num);
-               return -EIO;
+       /* If port MTU is higher than 1518B:
+        * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
+        * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
+        */
+       if (pkt_size > MVPP2_BM_LONG_PKT_SIZE)
+               new_long_pool = MVPP2_BM_JUMBO;
+       else
+               new_long_pool = MVPP2_BM_LONG;
+
+       if (new_long_pool != port->pool_long->id) {
+               /* Remove port from old short & long pool */
+               port->pool_long = mvpp2_bm_pool_use(port, port->pool_long->id,
+                                                   port->pool_long->pkt_size);
+               port->pool_long->port_map &= ~BIT(port->id);
+               port->pool_long = NULL;
+
+               port->pool_short = mvpp2_bm_pool_use(port, port->pool_short->id,
+                                                    port->pool_short->pkt_size);
+               port->pool_short->port_map &= ~BIT(port->id);
+               port->pool_short = NULL;
+
+               port->pkt_size =  pkt_size;
+
+               /* Add port to new short & long pool */
+               mvpp2_swf_bm_pool_init(port);
+
+               /* Update L4 checksum when jumbo enable/disable on port */
+               if (new_long_pool == MVPP2_BM_JUMBO && port->id != 0) {
+                       dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+                       dev->hw_features &= ~(NETIF_F_IP_CSUM |
+                                             NETIF_F_IPV6_CSUM);
+               } else {
+                       dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+                       dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+               }
        }
 
-       mvpp2_bm_pool_bufsize_set(port->priv, port_pool,
-                                 MVPP2_RX_BUF_SIZE(port_pool->pkt_size));
        dev->mtu = mtu;
+       dev->wanted_features = dev->features;
+
        netdev_update_features(dev);
        return 0;
 }
@@ -7007,15 +7378,14 @@ static int mvpp2_open(struct net_device *dev)
                        0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
        int err;
 
-       err = mvpp2_prs_mac_da_accept(port->priv, port->id, mac_bcast, true);
+       err = mvpp2_prs_mac_da_accept(port, mac_bcast, true);
        if (err) {
                netdev_err(dev, "mvpp2_prs_mac_da_accept BC failed\n");
                return err;
        }
-       err = mvpp2_prs_mac_da_accept(port->priv, port->id,
-                                     dev->dev_addr, true);
+       err = mvpp2_prs_mac_da_accept(port, dev->dev_addr, true);
        if (err) {
-               netdev_err(dev, "mvpp2_prs_mac_da_accept MC failed\n");
+               netdev_err(dev, "mvpp2_prs_mac_da_accept own addr failed\n");
                return err;
        }
        err = mvpp2_prs_tag_mode_set(port->priv, port->id, MVPP2_TAG_TYPE_MH);
@@ -7129,30 +7499,64 @@ static int mvpp2_stop(struct net_device *dev)
        return 0;
 }
 
+static int mvpp2_prs_mac_da_accept_list(struct mvpp2_port *port,
+                                       struct netdev_hw_addr_list *list)
+{
+       struct netdev_hw_addr *ha;
+       int ret;
+
+       netdev_hw_addr_list_for_each(ha, list) {
+               ret = mvpp2_prs_mac_da_accept(port, ha->addr, true);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static void mvpp2_set_rx_promisc(struct mvpp2_port *port, bool enable)
+{
+       if (!enable && (port->dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+               mvpp2_prs_vid_enable_filtering(port);
+       else
+               mvpp2_prs_vid_disable_filtering(port);
+
+       mvpp2_prs_mac_promisc_set(port->priv, port->id,
+                                 MVPP2_PRS_L2_UNI_CAST, enable);
+
+       mvpp2_prs_mac_promisc_set(port->priv, port->id,
+                                 MVPP2_PRS_L2_MULTI_CAST, enable);
+}
+
 static void mvpp2_set_rx_mode(struct net_device *dev)
 {
        struct mvpp2_port *port = netdev_priv(dev);
-       struct mvpp2 *priv = port->priv;
-       struct netdev_hw_addr *ha;
-       int id = port->id;
-       bool allmulti = dev->flags & IFF_ALLMULTI;
-
-retry:
-       mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC);
-       mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti);
-       mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti);
-
-       /* Remove all port->id's mcast enries */
-       mvpp2_prs_mcast_del_all(priv, id);
-
-       if (!allmulti) {
-               netdev_for_each_mc_addr(ha, dev) {
-                       if (mvpp2_prs_mac_da_accept(priv, id, ha->addr, true)) {
-                               allmulti = true;
-                               goto retry;
-                       }
-               }
+
+       /* Clear the whole UC and MC list */
+       mvpp2_prs_mac_del_all(port);
+
+       if (dev->flags & IFF_PROMISC) {
+               mvpp2_set_rx_promisc(port, true);
+               return;
+       }
+
+       mvpp2_set_rx_promisc(port, false);
+
+       if (netdev_uc_count(dev) > MVPP2_PRS_MAC_UC_FILT_MAX ||
+           mvpp2_prs_mac_da_accept_list(port, &dev->uc))
+               mvpp2_prs_mac_promisc_set(port->priv, port->id,
+                                         MVPP2_PRS_L2_UNI_CAST, true);
+
+       if (dev->flags & IFF_ALLMULTI) {
+               mvpp2_prs_mac_promisc_set(port->priv, port->id,
+                                         MVPP2_PRS_L2_MULTI_CAST, true);
+               return;
        }
+
+       if (netdev_mc_count(dev) > MVPP2_PRS_MAC_MC_FILT_MAX ||
+           mvpp2_prs_mac_da_accept_list(port, &dev->mc))
+               mvpp2_prs_mac_promisc_set(port->priv, port->id,
+                                         MVPP2_PRS_L2_MULTI_CAST, true);
 }
 
 static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -7292,6 +7696,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        return ret;
 }
 
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       int ret;
+
+       ret = mvpp2_prs_vid_entry_add(port, vid);
+       if (ret)
+               netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+                          MVPP2_PRS_VLAN_FILT_MAX - 1);
+       return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       mvpp2_prs_vid_entry_remove(port, vid);
+       return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+                             netdev_features_t features)
+{
+       netdev_features_t changed = dev->features ^ features;
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+               if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+                       mvpp2_prs_vid_enable_filtering(port);
+               } else {
+                       /* Invalidate all registered VID filters for this
+                        * port
+                        */
+                       mvpp2_prs_vid_remove_all(port);
+
+                       mvpp2_prs_vid_disable_filtering(port);
+               }
+       }
+
+       return 0;
+}
+
 /* Ethtool methods */
 
 /* Set interrupt coalescing for ethtools */
@@ -7433,6 +7879,9 @@ static const struct net_device_ops mvpp2_netdev_ops = {
        .ndo_change_mtu         = mvpp2_change_mtu,
        .ndo_get_stats64        = mvpp2_get_stats64,
        .ndo_do_ioctl           = mvpp2_ioctl,
+       .ndo_vlan_rx_add_vid    = mvpp2_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid   = mvpp2_vlan_rx_kill_vid,
+       .ndo_set_features       = mvpp2_set_features,
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
@@ -7943,16 +8392,25 @@ static int mvpp2_port_probe(struct platform_device *pdev,
                }
        }
 
-       features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+       features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+                  NETIF_F_TSO;
        dev->features = features | NETIF_F_RXCSUM;
-       dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+       dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+                           NETIF_F_HW_VLAN_CTAG_FILTER;
+
+       if (port->pool_long->id == MVPP2_BM_JUMBO && port->id != 0) {
+               dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+               dev->hw_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+       }
+
        dev->vlan_features |= features;
        dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
+       dev->priv_flags |= IFF_UNICAST_FLT;
 
-       /* MTU range: 68 - 9676 */
+       /* MTU range: 68 - 9704 */
        dev->min_mtu = ETH_MIN_MTU;
-       /* 9676 == 9700 - 20 and rounding to 8 */
-       dev->max_mtu = 9676;
+       /* 9704 == 9728 - 20 and rounding to 8 */
+       dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
 
        err = register_netdev(dev);
        if (err < 0) {
@@ -8083,14 +8541,25 @@ static void mvpp22_rx_fifo_init(struct mvpp2 *priv)
        mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
 }
 
-/* Initialize Tx FIFO's */
+/* Initialize Tx FIFO's: the total FIFO size is 19kB on PPv2.2 and 10G
+ * interfaces must have a Tx FIFO size of 10kB. As only port 0 can do 10G,
+ * configure its Tx FIFO size to 10kB and the others ports Tx FIFO size to 3kB.
+ */
 static void mvpp22_tx_fifo_init(struct mvpp2 *priv)
 {
-       int port;
+       int port, size, thrs;
 
-       for (port = 0; port < MVPP2_MAX_PORTS; port++)
-               mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port),
-                           MVPP22_TX_FIFO_DATA_SIZE_3KB);
+       for (port = 0; port < MVPP2_MAX_PORTS; port++) {
+               if (port == 0) {
+                       size = MVPP22_TX_FIFO_DATA_SIZE_10KB;
+                       thrs = MVPP2_TX_FIFO_THRESHOLD_10KB;
+               } else {
+                       size = MVPP22_TX_FIFO_DATA_SIZE_3KB;
+                       thrs = MVPP2_TX_FIFO_THRESHOLD_3KB;
+               }
+               mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port), size);
+               mvpp2_write(priv, MVPP22_TX_FIFO_THRESH_REG(port), thrs);
+       }
 }
 
 static void mvpp2_axi_init(struct mvpp2 *priv)
@@ -8284,6 +8753,8 @@ static int mvpp2_probe(struct platform_device *pdev)
                        priv->sysctrl_base = NULL;
        }
 
+       mvpp2_setup_bm_pool();
+
        for (i = 0; i < MVPP2_MAX_THREADS; i++) {
                u32 addr_space_sz;
 
index ebc1f566a4d953ab623c5fb4ca6dc018375280e3..9a7a2f05ab35af8a6806861dc83ad3f48e7bdd53 100644 (file)
@@ -199,6 +199,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
        "rx_xdp_drop",
        "rx_xdp_tx",
        "rx_xdp_tx_full",
+
+       /* phy statistics */
+       "rx_packets_phy", "rx_bytes_phy",
+       "tx_packets_phy", "tx_bytes_phy",
 };
 
 static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -411,6 +415,10 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
                if (bitmap_iterator_test(&it))
                        data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
 
+       for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
        for (i = 0; i < priv->tx_ring_num[TX]; i++) {
                data[index++] = priv->tx_ring[TX][i]->packets;
                data[index++] = priv->tx_ring[TX][i]->bytes;
@@ -490,6 +498,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
                                strcpy(data + (index++) * ETH_GSTRING_LEN,
                                       main_strings[strings]);
 
+               for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+                    bitmap_iterator_inc(&it))
+                       if (bitmap_iterator_test(&it))
+                               strcpy(data + (index++) * ETH_GSTRING_LEN,
+                                      main_strings[strings]);
+
                for (i = 0; i < priv->tx_ring_num[TX]; i++) {
                        sprintf(data + (index++) * ETH_GSTRING_LEN,
                                "tx%d_packets", i);
index 8fc51bc290038237824e56c477e82fb1f97b637e..e0adac4a9a191f923e68d896293c04dce5b67fc7 100644 (file)
@@ -3256,6 +3256,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 
        bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
        last_i += NUM_XDP_STATS;
+
+       if (!mlx4_is_slave(dev))
+               bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+       last_i += NUM_PHY_STATS;
 }
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -3630,10 +3634,6 @@ int mlx4_en_reset_config(struct net_device *dev,
                mlx4_en_stop_port(dev, 1);
        }
 
-       en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
-               ts_config.rx_filter,
-               !!(features & NETIF_F_HW_VLAN_CTAG_RX));
-
        mlx4_en_safe_replace_resources(priv, tmp);
 
        if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
index 1fa4849a6f560f2c3e15dddc13c03bb59031a5b7..0158b88bea5b6d835a7524ad41949834e0526a23 100644 (file)
@@ -275,19 +275,31 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
                priv->port_stats.xmit_more         += READ_ONCE(ring->xmit_more);
        }
 
-       if (mlx4_is_master(mdev->dev)) {
-               stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
-                                                  &mlx4_en_stats->RTOT_prio_1,
-                                                  NUM_PRIORITIES);
-               stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
-                                                  &mlx4_en_stats->TTOT_prio_1,
-                                                  NUM_PRIORITIES);
-               stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
-                                                &mlx4_en_stats->ROCT_prio_1,
-                                                NUM_PRIORITIES);
-               stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
-                                                &mlx4_en_stats->TOCT_prio_1,
-                                                NUM_PRIORITIES);
+       if (!mlx4_is_slave(mdev->dev)) {
+               struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+               p_stats->rx_packets_phy =
+                       en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+                                      &mlx4_en_stats->RTOT_prio_1,
+                                      NUM_PRIORITIES);
+               p_stats->tx_packets_phy =
+                       en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+                                      &mlx4_en_stats->TTOT_prio_1,
+                                      NUM_PRIORITIES);
+               p_stats->rx_bytes_phy =
+                       en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+                                      &mlx4_en_stats->ROCT_prio_1,
+                                      NUM_PRIORITIES);
+               p_stats->tx_bytes_phy =
+                       en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+                                      &mlx4_en_stats->TOCT_prio_1,
+                                      NUM_PRIORITIES);
+               if (mlx4_is_master(mdev->dev)) {
+                       stats->rx_packets = p_stats->rx_packets_phy;
+                       stats->tx_packets = p_stats->tx_packets_phy;
+                       stats->rx_bytes = p_stats->rx_bytes_phy;
+                       stats->tx_bytes = p_stats->tx_bytes_phy;
+               }
        }
 
        /* net device stats */
index b4d144e6751450a78a27139feec4dc8f128f4fcf..05787efef492b1c0c6ce540ef73647fad91ce282 100644 (file)
@@ -291,13 +291,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 
        tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
                                        sizeof(struct mlx4_en_rx_alloc));
-       ring->rx_info = vzalloc_node(tmp, node);
+       ring->rx_info = kvzalloc_node(tmp, GFP_KERNEL, node);
        if (!ring->rx_info) {
-               ring->rx_info = vzalloc(tmp);
-               if (!ring->rx_info) {
-                       err = -ENOMEM;
-                       goto err_xdp_info;
-               }
+               err = -ENOMEM;
+               goto err_xdp_info;
        }
 
        en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
@@ -318,7 +315,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
        return 0;
 
 err_info:
-       vfree(ring->rx_info);
+       kvfree(ring->rx_info);
        ring->rx_info = NULL;
 err_xdp_info:
        xdp_rxq_info_unreg(&ring->xdp_rxq);
@@ -447,7 +444,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
                bpf_prog_put(old_prog);
        xdp_rxq_info_unreg(&ring->xdp_rxq);
        mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
-       vfree(ring->rx_info);
+       kvfree(ring->rx_info);
        ring->rx_info = NULL;
        kfree(ring);
        *pring = NULL;
@@ -649,6 +646,12 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
        return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -662,12 +665,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
        int polled = 0;
        int index;
 
-       if (unlikely(!priv->port_up))
+       if (unlikely(!priv->port_up || budget <= 0))
                return 0;
 
-       if (unlikely(budget <= 0))
-               return polled;
-
        ring = priv->rx_ring[cq_ring];
 
        /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
@@ -838,12 +838,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                                ring->csum_ok++;
                        } else {
                                if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
-                                     (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
-#if IS_ENABLED(CONFIG_IPV6)
-                                                                MLX4_CQE_STATUS_IPV6))))
-#else
-                                                                0))))
-#endif
+                                     (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
                                        goto csum_none;
                                if (check_csum(cqe, skb, va, dev->features))
                                        goto csum_none;
index f470ae37d9374a3c519c8c43729ef5000e9d85bb..f7c81133594f341cfb0a6e2b1dbb1cc54c3d72ac 100644 (file)
@@ -608,6 +608,7 @@ struct mlx4_en_priv {
        struct mlx4_en_flow_stats_tx tx_flowstats;
        struct mlx4_en_port_stats port_stats;
        struct mlx4_en_xdp_stats xdp_stats;
+       struct mlx4_en_phy_stats phy_stats;
        struct mlx4_en_stats_bitmap stats_bitmap;
        struct list_head mc_list;
        struct list_head curr_list;
index aab28eb27a30dc75714eedb58e86c16f7a501ea2..86b6051da8ecb10ced1f9e4bc6ad5ef08bab5693 100644 (file)
@@ -63,6 +63,14 @@ struct mlx4_en_xdp_stats {
 #define NUM_XDP_STATS          3
 };
 
+struct mlx4_en_phy_stats {
+       unsigned long rx_packets_phy;
+       unsigned long rx_bytes_phy;
+       unsigned long tx_packets_phy;
+       unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS          4
+};
+
 #define NUM_MAIN_STATS 21
 
 #define MLX4_NUM_PRIORITIES    8
@@ -116,7 +124,7 @@ enum {
 
 #define NUM_ALL_STATS  (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
                         NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
-                        NUM_XDP_STATS)
+                        NUM_XDP_STATS + NUM_PHY_STATS)
 
 #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
                                  sizeof(((struct net_device_stats *)0)->n))
index 53e69edaedde2354e35dd7f64959e13b03228ddc..9f1b1939716a32ed28a66ce7d8cd8f5c05f6f806 100644 (file)
 #include "mlx5_core.h"
 #include "fpga/ipsec.h"
 
-void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-                                  struct mlx5_accel_ipsec_sa *cmd)
-{
-       if (!MLX5_IPSEC_DEV(mdev))
-               return ERR_PTR(-EOPNOTSUPP);
-
-       return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd);
-}
-
-int mlx5_accel_ipsec_sa_cmd_wait(void *ctx)
-{
-       return mlx5_fpga_ipsec_sa_cmd_wait(ctx);
-}
-
 u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
        return mlx5_fpga_ipsec_device_caps(mdev);
 }
+EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps);
 
 unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
 {
@@ -67,6 +54,21 @@ int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
        return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
 }
 
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+                                      struct mlx5_accel_esp_xfrm *xfrm,
+                                      const __be32 saddr[4],
+                                      const __be32 daddr[4],
+                                      const __be32 spi, bool is_ipv6)
+{
+       return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr,
+                                            spi, is_ipv6);
+}
+
+void mlx5_accel_esp_free_hw_context(void *context)
+{
+       mlx5_fpga_ipsec_delete_sa_ctx(context);
+}
+
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 {
        return mlx5_fpga_ipsec_init(mdev);
@@ -76,3 +78,32 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
        mlx5_fpga_ipsec_cleanup(mdev);
 }
+
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+                          const struct mlx5_accel_esp_xfrm_attrs *attrs,
+                          u32 flags)
+{
+       struct mlx5_accel_esp_xfrm *xfrm;
+
+       xfrm = mlx5_fpga_esp_create_xfrm(mdev, attrs, flags);
+       if (IS_ERR(xfrm))
+               return xfrm;
+
+       xfrm->mdev = mdev;
+       return xfrm;
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm);
+
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+       mlx5_fpga_esp_destroy_xfrm(xfrm);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
+
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+                              const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+       return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
index d6e20fea95544fb302c6bb141550e62a9c8cb7b9..024dbd22a89b907b2ecd3878059bb2449928881c 100644 (file)
 #ifndef __MLX5_ACCEL_IPSEC_H__
 #define __MLX5_ACCEL_IPSEC_H__
 
-#ifdef CONFIG_MLX5_ACCEL
-
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/accel.h>
 
-enum {
-       MLX5_ACCEL_IPSEC_DEVICE = BIT(1),
-       MLX5_ACCEL_IPSEC_IPV6 = BIT(2),
-       MLX5_ACCEL_IPSEC_ESP = BIT(3),
-       MLX5_ACCEL_IPSEC_LSO = BIT(4),
-};
-
-#define MLX5_IPSEC_SADB_IP_AH       BIT(7)
-#define MLX5_IPSEC_SADB_IP_ESP      BIT(6)
-#define MLX5_IPSEC_SADB_SA_VALID    BIT(5)
-#define MLX5_IPSEC_SADB_SPI_EN      BIT(4)
-#define MLX5_IPSEC_SADB_DIR_SX      BIT(3)
-#define MLX5_IPSEC_SADB_IPV6        BIT(2)
-
-enum {
-       MLX5_IPSEC_CMD_ADD_SA = 0,
-       MLX5_IPSEC_CMD_DEL_SA = 1,
-};
-
-enum mlx5_accel_ipsec_enc_mode {
-       MLX5_IPSEC_SADB_MODE_NONE = 0,
-       MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1,
-       MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3,
-};
+#ifdef CONFIG_MLX5_ACCEL
 
 #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
-                             MLX5_ACCEL_IPSEC_DEVICE)
-
-struct mlx5_accel_ipsec_sa {
-       __be32 cmd;
-       u8 key_enc[32];
-       u8 key_auth[32];
-       __be32 sip[4];
-       __be32 dip[4];
-       union {
-               struct {
-                       __be32 reserved;
-                       u8 salt_iv[8];
-                       __be32 salt;
-               } __packed gcm;
-               struct {
-                       u8 salt[16];
-               } __packed cbc;
-       };
-       __be32 spi;
-       __be32 sw_sa_handle;
-       __be16 tfclen;
-       u8 enc_mode;
-       u8 sip_masklen;
-       u8 dip_masklen;
-       u8 flags;
-       u8 reserved[2];
-} __packed;
-
-/**
- * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command
- * @mdev: mlx5 device
- * @cmd: command to execute
- * May be called from atomic context. Returns context pointer, or error
- * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic
- * context, to cleanup the context pointer
- */
-void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-                                  struct mlx5_accel_ipsec_sa *cmd);
-
-/**
- * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion
- * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec
- * Sleeps (killable) until command execution is complete.
- * Returns the command result, or -EINTR if killed
- */
-int mlx5_accel_ipsec_sa_cmd_wait(void *context);
-
-u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+                             MLX5_ACCEL_IPSEC_CAP_DEVICE)
 
 unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
                                   unsigned int count);
 
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+                                      struct mlx5_accel_esp_xfrm *xfrm,
+                                      const __be32 saddr[4],
+                                      const __be32 daddr[4],
+                                      const __be32 spi, bool is_ipv6);
+void mlx5_accel_esp_free_hw_context(void *context);
+
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
 void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
@@ -124,6 +60,20 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
 #define MLX5_IPSEC_DEV(mdev) false
 
+static inline void *
+mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+                                struct mlx5_accel_esp_xfrm *xfrm,
+                                const __be32 saddr[4],
+                                const __be32 daddr[4],
+                                const __be32 spi, bool is_ipv6)
+{
+       return NULL;
+}
+
+static inline void mlx5_accel_esp_free_hw_context(void *context)
+{
+}
+
 static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 {
        return 0;
index 669ed16938b3437675de6116e4557584e96aafd1..a4179122a2796bafc87a429e7ad54abbfc347ed9 100644 (file)
@@ -109,8 +109,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        cq->cons_index = 0;
        cq->arm_sn     = 0;
        cq->eq         = eq;
-       refcount_set(&cq->refcount, 0);
-       mlx5_cq_hold(cq);
+       refcount_set(&cq->refcount, 1);
        init_completion(&cq->free);
        if (!cq->comp)
                cq->comp = mlx5_add_cq_to_tasklet;
index 17b723218b0c0d891a44379e0f69ad804d43c154..b994b80d5714ad142a3af4711c65d62b7b1d5a6c 100644 (file)
@@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
 }
 EXPORT_SYMBOL(mlx5_unregister_interface);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+       mutex_lock(&mlx5_intf_mutex);
+       mlx5_remove_dev_by_protocol(mdev, protocol);
+       mlx5_add_dev_by_protocol(mdev, protocol);
+       mutex_unlock(&mlx5_intf_mutex);
+}
+
 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
 {
        struct mlx5_priv *priv = &mdev->priv;
index fd509160c8f6cc2f1ee81ae2594feb8952e37a8e..d93ff567b40d95ef5faebafae64730ce690fde8e 100644 (file)
@@ -246,6 +246,9 @@ const char *parse_fs_dst(struct trace_seq *p,
        case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
                trace_seq_printf(p, "counter_id=%u\n", counter_id);
                break;
+       case MLX5_FLOW_DESTINATION_TYPE_PORT:
+               trace_seq_printf(p, "port\n");
+               break;
        }
 
        trace_seq_putc(p, 0);
index 80eef4163f52e61d7f511cdd1aaba9dd6898fcd8..a6ba57fbb4146a6af9eebba079333b88a297702e 100644 (file)
@@ -163,9 +163,9 @@ TRACE_EVENT(mlx5_fs_set_fte,
                           fs_get_obj(__entry->fg, fte->node.parent);
                           __entry->group_index = __entry->fg->id;
                           __entry->index = fte->index;
-                          __entry->action = fte->action;
+                          __entry->action = fte->action.action;
                           __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
-                          __entry->flow_tag = fte->flow_tag;
+                          __entry->flow_tag = fte->action.flow_tag;
                           memcpy(__entry->mask_outer,
                                  MLX5_ADDR_OF(fte_match_param,
                                               &__entry->fg->mask.match_criteria,
index bac5103efad3d4e25bc9b0c8d615de387ab87916..cf58c963790478559b4ef9bdd13a8eccf07445aa 100644 (file)
 #include <linux/module.h>
 
 #include "en.h"
-#include "accel/ipsec.h"
 #include "en_accel/ipsec.h"
 #include "en_accel/ipsec_rxtx.h"
 
-struct mlx5e_ipsec_sa_entry {
-       struct hlist_node hlist; /* Item in SADB_RX hashtable */
-       unsigned int handle; /* Handle in SADB_RX */
-       struct xfrm_state *x;
-       struct mlx5e_ipsec *ipsec;
-       void *context;
-};
+
+static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
+{
+       struct mlx5e_ipsec_sa_entry *sa;
+
+       if (!x)
+               return NULL;
+
+       sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+       if (!sa)
+               return NULL;
+
+       WARN_ON(sa->x != x);
+       return sa;
+}
 
 struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
                                              unsigned int handle)
@@ -74,18 +81,16 @@ static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
        unsigned long flags;
        int ret;
 
-       spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
        ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
        if (ret < 0)
-               goto out;
+               return ret;
 
+       spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
        sa_entry->handle = ret;
        hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
-       ret = 0;
-
-out:
        spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
-       return ret;
+
+       return 0;
 }
 
 static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
@@ -101,87 +106,99 @@ static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
 static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
        struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
-       unsigned long flags;
 
-       /* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */
-       synchronize_rcu();
-       spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+       /* xfrm already doing sync rcu between del and free callbacks */
+
        ida_simple_remove(&ipsec->halloc, sa_entry->handle);
-       spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
 }
 
-static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x)
+static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
-       unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4;
-
-       switch (key_len) {
-       case 16:
-               return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128;
-       case 32:
-               return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128;
-       default:
-               netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n",
-                           key_len, x->aead->alg_name);
-               return -1;
+       struct xfrm_replay_state_esn *replay_esn;
+       u32 seq_bottom;
+       u8 overlap;
+       u32 *esn;
+
+       if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
+               sa_entry->esn_state.trigger = 0;
+               return false;
+       }
+
+       replay_esn = sa_entry->x->replay_esn;
+       seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
+       overlap = sa_entry->esn_state.overlap;
+
+       sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
+                                                   htonl(seq_bottom));
+       esn = &sa_entry->esn_state.esn;
+
+       sa_entry->esn_state.trigger = 1;
+       if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
+               ++(*esn);
+               sa_entry->esn_state.overlap = 0;
+               return true;
+       } else if (unlikely(!overlap &&
+                           (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
+               sa_entry->esn_state.overlap = 1;
+               return true;
        }
+
+       return false;
 }
 
-static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry,
-                                   struct mlx5_accel_ipsec_sa *hw_sa)
+static void
+mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
+                                  struct mlx5_accel_esp_xfrm_attrs *attrs)
 {
        struct xfrm_state *x = sa_entry->x;
+       struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm;
        struct aead_geniv_ctx *geniv_ctx;
-       unsigned int crypto_data_len;
        struct crypto_aead *aead;
-       unsigned int key_len;
+       unsigned int crypto_data_len, key_len;
        int ivsize;
 
-       memset(hw_sa, 0, sizeof(*hw_sa));
-
-       if (op == MLX5_IPSEC_CMD_ADD_SA) {
-               crypto_data_len = (x->aead->alg_key_len + 7) / 8;
-               key_len = crypto_data_len - 4; /* 4 bytes salt at end */
-               aead = x->data;
-               geniv_ctx = crypto_aead_ctx(aead);
-               ivsize = crypto_aead_ivsize(aead);
-
-               memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len);
-               /* Duplicate 128 bit key twice according to HW layout */
-               if (key_len == 16)
-                       memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len);
-               memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize);
-               hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
-       }
+       memset(attrs, 0, sizeof(*attrs));
 
-       hw_sa->cmd = htonl(op);
-       hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
-       if (x->props.family == AF_INET) {
-               hw_sa->sip[3] = x->props.saddr.a4;
-               hw_sa->dip[3] = x->id.daddr.a4;
-               hw_sa->sip_masklen = 32;
-               hw_sa->dip_masklen = 32;
-       } else {
-               memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip));
-               memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip));
-               hw_sa->sip_masklen = 128;
-               hw_sa->dip_masklen = 128;
-               hw_sa->flags |= MLX5_IPSEC_SADB_IPV6;
-       }
-       hw_sa->spi = x->id.spi;
-       hw_sa->sw_sa_handle = htonl(sa_entry->handle);
-       switch (x->id.proto) {
-       case IPPROTO_ESP:
-               hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP;
-               break;
-       case IPPROTO_AH:
-               hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH;
-               break;
-       default:
-               break;
+       /* key */
+       crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+       key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+
+       memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
+       aes_gcm->key_len = key_len * 8;
+
+       /* salt and seq_iv */
+       aead = x->data;
+       geniv_ctx = crypto_aead_ctx(aead);
+       ivsize = crypto_aead_ivsize(aead);
+       memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
+       memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
+              sizeof(aes_gcm->salt));
+
+       /* iv len */
+       aes_gcm->icv_len = x->aead->alg_icv_len;
+
+       /* esn */
+       if (sa_entry->esn_state.trigger) {
+               attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+               attrs->esn = sa_entry->esn_state.esn;
+               if (sa_entry->esn_state.overlap)
+                       attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
        }
-       hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x);
-       if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND))
-               hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX;
+
+       /* rx handle */
+       attrs->sa_handle = sa_entry->handle;
+
+       /* algo type */
+       attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+       /* action */
+       attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ?
+                       MLX5_ACCEL_ESP_ACTION_ENCRYPT :
+                       MLX5_ACCEL_ESP_ACTION_DECRYPT;
+       /* flags */
+       attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
+                       MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
+                       MLX5_ACCEL_ESP_FLAGS_TUNNEL;
 }
 
 static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
@@ -203,7 +220,9 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
                netdev_info(netdev, "Cannot offload compressed xfrm states\n");
                return -EINVAL;
        }
-       if (x->props.flags & XFRM_STATE_ESN) {
+       if (x->props.flags & XFRM_STATE_ESN &&
+           !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+           MLX5_ACCEL_IPSEC_CAP_ESN)) {
                netdev_info(netdev, "Cannot offload ESN xfrm states\n");
                return -EINVAL;
        }
@@ -251,7 +270,8 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
                return -EINVAL;
        }
        if (x->props.family == AF_INET6 &&
-           !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) {
+           !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+            MLX5_ACCEL_IPSEC_CAP_IPV6)) {
                netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
                return -EINVAL;
        }
@@ -262,9 +282,10 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 {
        struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
        struct net_device *netdev = x->xso.dev;
-       struct mlx5_accel_ipsec_sa hw_sa;
+       struct mlx5_accel_esp_xfrm_attrs attrs;
        struct mlx5e_priv *priv;
-       void *context;
+       __be32 saddr[4] = {0}, daddr[4] = {0}, spi;
+       bool is_ipv6 = false;
        int err;
 
        priv = netdev_priv(netdev);
@@ -291,22 +312,49 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
                        netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
                        goto err_entry;
                }
+       } else {
+               sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
+                               mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
        }
 
-       mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa);
-       context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
-       if (IS_ERR(context)) {
-               err = PTR_ERR(context);
+       /* check esn */
+       mlx5e_ipsec_update_esn_state(sa_entry);
+
+       /* create xfrm */
+       mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
+       sa_entry->xfrm =
+               mlx5_accel_esp_create_xfrm(priv->mdev, &attrs,
+                                          MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
+       if (IS_ERR(sa_entry->xfrm)) {
+               err = PTR_ERR(sa_entry->xfrm);
                goto err_sadb_rx;
        }
 
-       err = mlx5_accel_ipsec_sa_cmd_wait(context);
-       if (err)
-               goto err_sadb_rx;
+       /* create hw context */
+       if (x->props.family == AF_INET) {
+               saddr[3] = x->props.saddr.a4;
+               daddr[3] = x->id.daddr.a4;
+       } else {
+               memcpy(saddr, x->props.saddr.a6, sizeof(saddr));
+               memcpy(daddr, x->id.daddr.a6, sizeof(daddr));
+               is_ipv6 = true;
+       }
+       spi = x->id.spi;
+       sa_entry->hw_context =
+                       mlx5_accel_esp_create_hw_context(priv->mdev,
+                                                        sa_entry->xfrm,
+                                                        saddr, daddr, spi,
+                                                        is_ipv6);
+       if (IS_ERR(sa_entry->hw_context)) {
+               err = PTR_ERR(sa_entry->hw_context);
+               goto err_xfrm;
+       }
 
        x->xso.offload_handle = (unsigned long)sa_entry;
        goto out;
 
+err_xfrm:
+       mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
 err_sadb_rx:
        if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
                mlx5e_ipsec_sadb_rx_del(sa_entry);
@@ -320,43 +368,26 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 
 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 {
-       struct mlx5e_ipsec_sa_entry *sa_entry;
-       struct mlx5_accel_ipsec_sa hw_sa;
-       void *context;
+       struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
 
-       if (!x->xso.offload_handle)
+       if (!sa_entry)
                return;
 
-       sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
-       WARN_ON(sa_entry->x != x);
-
        if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
                mlx5e_ipsec_sadb_rx_del(sa_entry);
-
-       mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa);
-       context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
-       if (IS_ERR(context))
-               return;
-
-       sa_entry->context = context;
 }
 
 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 {
-       struct mlx5e_ipsec_sa_entry *sa_entry;
-       int res;
+       struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
 
-       if (!x->xso.offload_handle)
+       if (!sa_entry)
                return;
 
-       sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
-       WARN_ON(sa_entry->x != x);
-
-       res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context);
-       sa_entry->context = NULL;
-       if (res) {
-               /* Leftover object will leak */
-               return;
+       if (sa_entry->hw_context) {
+               flush_workqueue(sa_entry->ipsec->wq);
+               mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
+               mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
        }
 
        if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
@@ -383,6 +414,14 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
        ida_init(&ipsec->halloc);
        ipsec->en_priv = priv;
        ipsec->en_priv->ipsec = ipsec;
+       ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
+                              MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
+       ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
+                                           priv->netdev->name);
+       if (!ipsec->wq) {
+               kfree(ipsec);
+               return -ENOMEM;
+       }
        netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
        return 0;
 }
@@ -394,6 +433,9 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
        if (!ipsec)
                return;
 
+       drain_workqueue(ipsec->wq);
+       destroy_workqueue(ipsec->wq);
+
        ida_destroy(&ipsec->halloc);
        kfree(ipsec);
        priv->ipsec = NULL;
@@ -414,11 +456,58 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
        return true;
 }
 
+struct mlx5e_ipsec_modify_state_work {
+       struct work_struct              work;
+       struct mlx5_accel_esp_xfrm_attrs attrs;
+       struct mlx5e_ipsec_sa_entry     *sa_entry;
+};
+
+static void _update_xfrm_state(struct work_struct *work)
+{
+       int ret;
+       struct mlx5e_ipsec_modify_state_work *modify_work =
+               container_of(work, struct mlx5e_ipsec_modify_state_work, work);
+       struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry;
+
+       ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm,
+                                        &modify_work->attrs);
+       if (ret)
+               netdev_warn(sa_entry->ipsec->en_priv->netdev,
+                           "Not an IPSec offload device\n");
+
+       kfree(modify_work);
+}
+
+static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
+{
+       struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+       struct mlx5e_ipsec_modify_state_work *modify_work;
+       bool need_update;
+
+       if (!sa_entry)
+               return;
+
+       need_update = mlx5e_ipsec_update_esn_state(sa_entry);
+       if (!need_update)
+               return;
+
+       modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC);
+       if (!modify_work)
+               return;
+
+       mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
+       modify_work->sa_entry = sa_entry;
+
+       INIT_WORK(&modify_work->work, _update_xfrm_state);
+       WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work));
+}
+
 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
        .xdo_dev_state_add      = mlx5e_xfrm_add_state,
        .xdo_dev_state_delete   = mlx5e_xfrm_del_state,
        .xdo_dev_state_free     = mlx5e_xfrm_free_state,
        .xdo_dev_offload_ok     = mlx5e_ipsec_offload_ok,
+       .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
 };
 
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
@@ -429,7 +518,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
        if (!priv->ipsec)
                return;
 
-       if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) ||
+       if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
            !MLX5_CAP_ETH(mdev, swp)) {
                mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
                return;
@@ -448,7 +537,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
        netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
        netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
 
-       if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+       if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
            !MLX5_CAP_ETH(mdev, swp_lso)) {
                mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
                return;
index 56e00baf16cc44eb3e1709dc5ee1fcbec367f6e5..1198fc1eba4c8a869f2cd232aefe04974f212b0d 100644 (file)
 #include <net/xfrm.h>
 #include <linux/idr.h>
 
+#include "accel/ipsec.h"
+
 #define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
+
 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
 #define MLX5E_METADATA_ETHER_LEN 8
 
@@ -77,10 +81,30 @@ struct mlx5e_ipsec_stats {
 struct mlx5e_ipsec {
        struct mlx5e_priv *en_priv;
        DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+       bool no_trailer;
        spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
        struct ida halloc;
        struct mlx5e_ipsec_sw_stats sw_stats;
        struct mlx5e_ipsec_stats stats;
+       struct workqueue_struct *wq;
+};
+
+struct mlx5e_ipsec_esn_state {
+       u32 esn;
+       u8 trigger: 1;
+       u8 overlap: 1;
+};
+
+struct mlx5e_ipsec_sa_entry {
+       struct hlist_node hlist; /* Item in SADB_RX hashtable */
+       struct mlx5e_ipsec_esn_state esn_state;
+       unsigned int handle; /* Handle in SADB_RX */
+       struct xfrm_state *x;
+       struct mlx5e_ipsec *ipsec;
+       struct mlx5_accel_esp_xfrm *xfrm;
+       void *hw_context;
+       void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x,
+                         struct xfrm_offload *xo);
 };
 
 void mlx5e_ipsec_build_inverse_table(void);
index 6a7c8b04447ebb4eae923cd455e1190982417bee..c245d8e78509f4c791a4099238eeba0a027948ff 100644 (file)
 enum {
        MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
        MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
+       MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17,
 };
 
 struct mlx5e_ipsec_rx_metadata {
-       unsigned char   reserved;
+       unsigned char   nexthdr;
        __be32          sa_handle;
 } __packed;
 
@@ -175,7 +176,30 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
        }
 }
 
-static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo)
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+                           struct xfrm_offload *xo)
+{
+       struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+       __u32 oseq = replay_esn->oseq;
+       int iv_offset;
+       __be64 seqno;
+       u32 seq_hi;
+
+       if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID &&
+                    MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) {
+               seq_hi = xo->seq.hi - 1;
+       } else {
+               seq_hi = xo->seq.hi;
+       }
+
+       /* Place the SN in the IV field */
+       seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32));
+       iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+       skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+                       struct xfrm_offload *xo)
 {
        int iv_offset;
        __be64 seqno;
@@ -227,6 +251,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
        struct mlx5e_priv *priv = netdev_priv(netdev);
        struct xfrm_offload *xo = xfrm_offload(skb);
        struct mlx5e_ipsec_metadata *mdata;
+       struct mlx5e_ipsec_sa_entry *sa_entry;
        struct xfrm_state *x;
 
        if (!xo)
@@ -261,7 +286,8 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
                goto drop;
        }
        mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
-       mlx5e_ipsec_set_iv(skb, xo);
+       sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+       sa_entry->set_iv_op(skb, x, xo);
        mlx5e_ipsec_set_metadata(skb, mdata, xo);
 
        return skb;
@@ -301,10 +327,17 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
        switch (mdata->syndrome) {
        case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
                xo->status = CRYPTO_SUCCESS;
+               if (likely(priv->ipsec->no_trailer)) {
+                       xo->flags |= XFRM_ESP_NO_TRAILER;
+                       xo->proto = mdata->content.rx.nexthdr;
+               }
                break;
        case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
                xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
                break;
+       case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO:
+               xo->status = CRYPTO_INVALID_PROTOCOL;
+               break;
        default:
                atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
                return NULL;
index e37ae2598dbbec33826c759a24f4484921468825..2bfbbef1b054a9435344c790c97a580a6dec8950 100644 (file)
@@ -37,6 +37,7 @@
 #ifdef CONFIG_MLX5_EN_IPSEC
 
 #include <linux/skbuff.h>
+#include <net/xfrm.h>
 #include "en.h"
 
 struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
@@ -46,6 +47,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_ipsec_inverse_table_init(void);
 bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
                               netdev_features_t features);
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+                           struct xfrm_offload *xo);
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+                       struct xfrm_offload *xo);
 struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
                                          struct mlx5e_tx_wqe *wqe,
                                          struct sk_buff *skb);
index 363d8dcb7f174e4b084f93a9379218753a4e41d8..ea4b255380a2f9274ef57430483d67c113ef70dc 100644 (file)
@@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
        kfree(ppriv); /* mlx5e_rep_priv */
 }
 
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5e_rep_priv *rpriv;
+
+       rpriv = mlx5e_rep_to_rep_priv(rep);
+
+       return rpriv->netdev;
+}
+
 static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
@@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 
                rep_if.load = mlx5e_vport_rep_load;
                rep_if.unload = mlx5e_vport_rep_unload;
+               rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
                mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
        }
 }
@@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
 
        rep_if.load = mlx5e_nic_rep_load;
        rep_if.unload = mlx5e_nic_rep_unload;
+       rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
        rep_if.priv = rpriv;
        INIT_LIST_HEAD(&rpriv->vport_sqs_list);
        mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
index fa86a1466718037f89a5bdb4b2d67e9bbe2b341e..7c33df2034f07d69a04b6a1d6365500d1dd70261 100644 (file)
@@ -675,6 +675,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
        struct mlx5_flow_destination dest[2] = {};
        struct mlx5_flow_act flow_act = {
                .action = attr->action,
+               .has_flow_tag = true,
                .flow_tag = attr->flow_tag,
                .encap_id = 0,
        };
index c2b1d7d351fc29362e4ce4de641a280b92b96284..77b7272eaaa8ba366a25503c16ffd111373892ba 100644 (file)
@@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
        esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
        esw->mode = mode;
 
-       if (mode == SRIOV_LEGACY)
+       if (mode == SRIOV_LEGACY) {
                err = esw_create_legacy_fdb_table(esw, nvfs + 1);
-       else
+       } else {
+               mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
                err = esw_offloads_init(esw, nvfs + 1);
+       }
+
        if (err)
                goto abort;
 
@@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 
 abort:
        esw->mode = SRIOV_NONE;
+
+       if (mode == SRIOV_OFFLOADS)
+               mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
        return err;
 }
 
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
        struct esw_mc_addr *mc_promisc;
+       int old_mode;
        int nvports;
        int i;
 
@@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
        else if (esw->mode == SRIOV_OFFLOADS)
                esw_offloads_cleanup(esw, nvports);
 
+       old_mode = esw->mode;
        esw->mode = SRIOV_NONE;
+
+       if (old_mode == SRIOV_OFFLOADS)
+               mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -2175,3 +2188,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
        kvfree(out);
        return err;
 }
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+       return esw->mode;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
index 2fa037066b2f7bacb74cbcb9676a6096cf369eab..98d2177d0806244f2d40acdee26359b74a3832b2 100644 (file)
 #include <linux/if_link.h>
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
 #include "lib/mpfs.h"
 
-enum {
-       SRIOV_NONE,
-       SRIOV_LEGACY,
-       SRIOV_OFFLOADS
-};
-
-enum {
-       REP_ETH,
-       NUM_REP_TYPES,
-};
-
 #ifdef CONFIG_MLX5_ESWITCH
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb {
                        struct mlx5_flow_table *fdb;
                        struct mlx5_flow_group *send_to_vport_grp;
                        struct mlx5_flow_group *miss_grp;
-                       struct mlx5_flow_handle *miss_rule;
+                       struct mlx5_flow_handle *miss_rule_uni;
+                       struct mlx5_flow_handle *miss_rule_multi;
                        int vlan_push_pop_refcount;
                } offloads;
        };
 };
 
-struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
-       int                    (*load)(struct mlx5_core_dev *dev,
-                                      struct mlx5_eswitch_rep *rep);
-       void                   (*unload)(struct mlx5_eswitch_rep *rep);
-       void                    *priv;
-       bool                   valid;
-};
-
-struct mlx5_eswitch_rep {
-       struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
-       u16                    vport;
-       u8                     hw_id[ETH_ALEN];
-       u16                    vlan;
-       u32                    vlan_refcount;
-};
-
 struct mlx5_esw_offload {
        struct mlx5_flow_table *ft_offloads;
        struct mlx5_flow_group *vport_rx_group;
@@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
                                 int vport,
                                 struct ifla_vf_stats *vf_stats);
-struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
-                                   u32 sqn);
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
 struct mlx5_flow_spec;
@@ -278,13 +249,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
 int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-                                    int vport_index,
-                                    struct mlx5_eswitch_rep_if *rep_if,
-                                    u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-                                      int vport_index,
-                                      u8 rep_type);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
index 99f583a15cc388b9171cd198b1e644a6a97b5168..0a8303c1b52f662e42b6d9fb62e1eebaa2e8abfc 100644 (file)
@@ -338,6 +338,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
        kvfree(spec);
        return flow_rule;
 }
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
 
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 {
@@ -350,7 +351,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
        struct mlx5_flow_destination dest = {};
        struct mlx5_flow_handle *flow_rule = NULL;
        struct mlx5_flow_spec *spec;
+       void *headers_c;
+       void *headers_v;
        int err = 0;
+       u8 *dmac_c;
+       u8 *dmac_v;
 
        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
        if (!spec) {
@@ -358,6 +363,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
                goto out;
        }
 
+       spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+       headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                outer_headers);
+       dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+                             outer_headers.dmac_47_16);
+       dmac_c[0] = 0x01;
+
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest.vport_num = 0;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@@ -366,11 +378,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
                                        &flow_act, &dest, 1);
        if (IS_ERR(flow_rule)) {
                err = PTR_ERR(flow_rule);
-               esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
+               esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
                goto out;
        }
 
-       esw->fdb_table.offloads.miss_rule = flow_rule;
+       esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+       headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                outer_headers);
+       dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+                             outer_headers.dmac_47_16);
+       dmac_v[0] = 0x01;
+       flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+                                       &flow_act, &dest, 1);
+       if (IS_ERR(flow_rule)) {
+               err = PTR_ERR(flow_rule);
+               esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+               mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+               goto out;
+       }
+
+       esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
 out:
        kvfree(spec);
        return err;
@@ -426,6 +455,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 }
 
 #define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
 
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 {
@@ -438,6 +468,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        struct mlx5_flow_group *g;
        void *match_criteria;
        u32 *flow_group_in;
+       u8 *dmac;
 
        esw_debug(esw->dev, "Create offloads FDB Tables\n");
        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
@@ -455,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        if (err)
                goto fast_fdb_err;
 
-       table_size = nvports + MAX_PF_SQ + 1;
+       table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
 
        ft_attr.max_fte = table_size;
        ft_attr.prio = FDB_SLOW_PATH;
@@ -478,7 +509,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
        MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
        MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
 
-       ix = nvports + MAX_PF_SQ;
+       ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
 
@@ -492,10 +523,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 
        /* create miss group */
        memset(flow_group_in, 0, inlen);
-       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+                MLX5_MATCH_OUTER_HEADERS);
+       match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+                                     match_criteria);
+       dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+                           outer_headers.dmac_47_16);
+       dmac[0] = 0x01;
 
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
-       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
 
        g = mlx5_create_flow_group(fdb, flow_group_in);
        if (IS_ERR(g)) {
@@ -531,7 +568,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
                return;
 
        esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
-       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
+       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
        mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
        mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
@@ -789,14 +827,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 {
        int err;
 
-       /* disable PF RoCE so missed packets don't go through RoCE steering */
-       mlx5_dev_list_lock();
-       mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-       mlx5_dev_list_unlock();
-
        err = esw_create_offloads_fdb_tables(esw, nvports);
        if (err)
-               goto create_fdb_err;
+               return err;
 
        err = esw_create_offloads_table(esw);
        if (err)
@@ -821,12 +854,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 create_ft_err:
        esw_destroy_offloads_fdb_tables(esw);
 
-create_fdb_err:
-       /* enable back PF RoCE */
-       mlx5_dev_list_lock();
-       mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-       mlx5_dev_list_unlock();
-
        return err;
 }
 
@@ -844,9 +871,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
        }
 
        /* enable back PF RoCE */
-       mlx5_dev_list_lock();
-       mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-       mlx5_dev_list_unlock();
+       mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 
        return err;
 }
@@ -1160,10 +1185,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 
        rep_if->load   = __rep_if->load;
        rep_if->unload = __rep_if->unload;
+       rep_if->get_proto_dev = __rep_if->get_proto_dev;
        rep_if->priv = __rep_if->priv;
 
        rep_if->valid = true;
 }
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
 
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
                                       int vport_index, u8 rep_type)
@@ -1178,6 +1205,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 
        rep->rep_if[rep_type].valid = false;
 }
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
@@ -1188,3 +1216,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
        rep = &offloads->vport_reps[UPLINK_REP_INDEX];
        return rep->rep_if[rep_type].priv;
 }
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+                                int vport,
+                                u8 rep_type)
+{
+       struct mlx5_esw_offload *offloads = &esw->offloads;
+       struct mlx5_eswitch_rep *rep;
+
+       if (vport == FDB_UPLINK_VPORT)
+               vport = UPLINK_REP_INDEX;
+
+       rep = &offloads->vport_reps[vport];
+
+       if (rep->rep_if[rep_type].valid &&
+           rep->rep_if[rep_type].get_proto_dev)
+               return rep->rep_if[rep_type].get_proto_dev(rep);
+       return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+       return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+                                               int vport)
+{
+       return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
index 35d0e33381ca080b6c1f3e0eb500e08a13ffe408..0f5da499a22339fa11eb30fa73334b5d8ec4c039 100644 (file)
  *
  */
 
+#include <linux/rhashtable.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
 
 #include "mlx5_core.h"
+#include "fs_cmd.h"
 #include "fpga/ipsec.h"
 #include "fpga/sdk.h"
 #include "fpga/core.h"
 
 #define SBU_QP_QUEUE_SIZE 8
+#define MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC       (60 * 1000)
 
-enum mlx5_ipsec_response_syndrome {
-       MLX5_IPSEC_RESPONSE_SUCCESS = 0,
-       MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
-       MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2,
-       MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+enum mlx5_fpga_ipsec_cmd_status {
+       MLX5_FPGA_IPSEC_CMD_PENDING,
+       MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
+       MLX5_FPGA_IPSEC_CMD_COMPLETE,
 };
 
-enum mlx5_fpga_ipsec_sacmd_status {
-       MLX5_FPGA_IPSEC_SACMD_PENDING,
-       MLX5_FPGA_IPSEC_SACMD_SEND_FAIL,
-       MLX5_FPGA_IPSEC_SACMD_COMPLETE,
-};
-
-struct mlx5_ipsec_command_context {
+struct mlx5_fpga_ipsec_cmd_context {
        struct mlx5_fpga_dma_buf buf;
-       struct mlx5_accel_ipsec_sa sa;
-       enum mlx5_fpga_ipsec_sacmd_status status;
+       enum mlx5_fpga_ipsec_cmd_status status;
+       struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
        int status_code;
        struct completion complete;
        struct mlx5_fpga_device *dev;
        struct list_head list; /* Item in pending_cmds */
+       u8 command[0];
+};
+
+struct mlx5_fpga_esp_xfrm;
+
+struct mlx5_fpga_ipsec_sa_ctx {
+       struct rhash_head               hash;
+       struct mlx5_ifc_fpga_ipsec_sa   hw_sa;
+       struct mlx5_core_dev            *dev;
+       struct mlx5_fpga_esp_xfrm       *fpga_xfrm;
+};
+
+struct mlx5_fpga_esp_xfrm {
+       unsigned int                    num_rules;
+       struct mlx5_fpga_ipsec_sa_ctx   *sa_ctx;
+       struct mutex                    lock; /* xfrm lock */
+       struct mlx5_accel_esp_xfrm      accel_xfrm;
+};
+
+struct mlx5_fpga_ipsec_rule {
+       struct rb_node                  node;
+       struct fs_fte                   *fte;
+       struct mlx5_fpga_ipsec_sa_ctx   *ctx;
 };
 
-struct mlx5_ipsec_sadb_resp {
-       __be32 syndrome;
-       __be32 sw_sa_handle;
-       u8 reserved[24];
-} __packed;
+static const struct rhashtable_params rhash_sa = {
+       .key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
+       .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
+       .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
+       .automatic_shrinking = true,
+       .min_size = 1,
+};
 
 struct mlx5_fpga_ipsec {
+       struct mlx5_fpga_device *fdev;
        struct list_head pending_cmds;
        spinlock_t pending_cmds_lock; /* Protects pending_cmds */
        u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
        struct mlx5_fpga_conn *conn;
+
+       struct notifier_block   fs_notifier_ingress_bypass;
+       struct notifier_block   fs_notifier_egress;
+
+       /* Map hardware SA           -->  SA context
+        *     (mlx5_fpga_ipsec_sa)       (mlx5_fpga_ipsec_sa_ctx)
+        * We will use this hash to avoid SAs duplication in fpga which
+        * aren't allowed
+        */
+       struct rhashtable sa_hash;      /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
+       struct mutex sa_hash_lock;
+
+       /* Tree holding all rules for this fpga device
+        * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id)
+        */
+       struct rb_root rules_rb;
+       struct mutex rules_rb_lock; /* rules lock */
 };
 
 static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
@@ -97,28 +139,29 @@ static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
                                          struct mlx5_fpga_dma_buf *buf,
                                          u8 status)
 {
-       struct mlx5_ipsec_command_context *context;
+       struct mlx5_fpga_ipsec_cmd_context *context;
 
        if (status) {
-               context = container_of(buf, struct mlx5_ipsec_command_context,
+               context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context,
                                       buf);
                mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
                               status);
-               context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL;
+               context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL;
                complete(&context->complete);
        }
 }
 
-static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
+static inline
+int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
 {
        switch (syndrome) {
-       case MLX5_IPSEC_RESPONSE_SUCCESS:
+       case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS:
                return 0;
-       case MLX5_IPSEC_RESPONSE_SADB_ISSUE:
+       case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE:
                return -EEXIST;
-       case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+       case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST:
                return -EINVAL;
-       case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+       case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
                return -EIO;
        }
        return -EIO;
@@ -126,9 +169,9 @@ static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
 
 static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 {
-       struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data;
-       struct mlx5_ipsec_command_context *context;
-       enum mlx5_ipsec_response_syndrome syndrome;
+       struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
+       struct mlx5_fpga_ipsec_cmd_context *context;
+       enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
        struct mlx5_fpga_device *fdev = cb_arg;
        unsigned long flags;
 
@@ -138,12 +181,12 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
                return;
        }
 
-       mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n",
-                     ntohl(resp->syndrome), ntohl(resp->sw_sa_handle));
+       mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n",
+                     ntohl(resp->syndrome));
 
        spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
        context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
-                                          struct mlx5_ipsec_command_context,
+                                          struct mlx5_fpga_ipsec_cmd_context,
                                           list);
        if (context)
                list_del(&context->list);
@@ -155,51 +198,48 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
        }
        mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
 
-       if (context->sa.sw_sa_handle != resp->sw_sa_handle) {
-               mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
-                             ntohl(context->sa.sw_sa_handle),
-                             ntohl(resp->sw_sa_handle));
-               return;
-       }
-
        syndrome = ntohl(resp->syndrome);
        context->status_code = syndrome_to_errno(syndrome);
-       context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE;
+       context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE;
+       memcpy(&context->resp, resp, sizeof(*resp));
 
        if (context->status_code)
-               mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n",
+               mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n",
                               syndrome);
+
        complete(&context->complete);
 }
 
-void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-                                 struct mlx5_accel_ipsec_sa *cmd)
+static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
+                                     const void *cmd, int cmd_size)
 {
-       struct mlx5_ipsec_command_context *context;
+       struct mlx5_fpga_ipsec_cmd_context *context;
        struct mlx5_fpga_device *fdev = mdev->fpga;
        unsigned long flags;
-       int res = 0;
+       int res;
 
-       BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0);
        if (!fdev || !fdev->ipsec)
                return ERR_PTR(-EOPNOTSUPP);
 
-       context = kzalloc(sizeof(*context), GFP_ATOMIC);
+       if (cmd_size & 3)
+               return ERR_PTR(-EINVAL);
+
+       context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC);
        if (!context)
                return ERR_PTR(-ENOMEM);
 
-       memcpy(&context->sa, cmd, sizeof(*cmd));
+       context->status = MLX5_FPGA_IPSEC_CMD_PENDING;
+       context->dev = fdev;
        context->buf.complete = mlx5_fpga_ipsec_send_complete;
-       context->buf.sg[0].size = sizeof(context->sa);
-       context->buf.sg[0].data = &context->sa;
        init_completion(&context->complete);
-       context->dev = fdev;
+       memcpy(&context->command, cmd, cmd_size);
+       context->buf.sg[0].size = cmd_size;
+       context->buf.sg[0].data = &context->command;
+
        spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
        list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
        spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
 
-       context->status = MLX5_FPGA_IPSEC_SACMD_PENDING;
-
        res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
        if (res) {
                mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
@@ -214,47 +254,103 @@ void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
        return context;
 }
 
-int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
 {
-       struct mlx5_ipsec_command_context *context = ctx;
+       struct mlx5_fpga_ipsec_cmd_context *context = ctx;
+       unsigned long timeout =
+               msecs_to_jiffies(MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC);
        int res;
 
-       res = wait_for_completion_killable(&context->complete);
-       if (res) {
+       res = wait_for_completion_timeout(&context->complete, timeout);
+       if (!res) {
                mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
-               return -EINTR;
+               return -ETIMEDOUT;
        }
 
-       if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE)
+       if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE)
                res = context->status_code;
        else
                res = -EIO;
 
-       kfree(context);
        return res;
 }
 
+static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec)
+{
+       if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command))
+               return true;
+       return false;
+}
+
+static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev,
+                                       struct mlx5_ifc_fpga_ipsec_sa *hw_sa,
+                                       int opcode)
+{
+       struct mlx5_core_dev *dev = fdev->mdev;
+       struct mlx5_ifc_fpga_ipsec_sa *sa;
+       struct mlx5_fpga_ipsec_cmd_context *cmd_context;
+       size_t sa_cmd_size;
+       int err;
+
+       hw_sa->ipsec_sa_v1.cmd = htonl(opcode);
+       if (is_v2_sadb_supported(fdev->ipsec))
+               sa_cmd_size = sizeof(*hw_sa);
+       else
+               sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1);
+
+       cmd_context = (struct mlx5_fpga_ipsec_cmd_context *)
+                       mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size);
+       if (IS_ERR(cmd_context))
+               return PTR_ERR(cmd_context);
+
+       err = mlx5_fpga_ipsec_cmd_wait(cmd_context);
+       if (err)
+               goto out;
+
+       sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command;
+       if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) {
+               mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+                             ntohl(sa->ipsec_sa_v1.sw_sa_handle),
+                             ntohl(cmd_context->resp.sw_sa_handle));
+               err = -EIO;
+       }
+
+out:
+       kfree(cmd_context);
+       return err;
+}
+
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
        struct mlx5_fpga_device *fdev = mdev->fpga;
        u32 ret = 0;
 
-       if (mlx5_fpga_is_ipsec_device(mdev))
-               ret |= MLX5_ACCEL_IPSEC_DEVICE;
-       else
+       if (mlx5_fpga_is_ipsec_device(mdev)) {
+               ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
+               ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA;
+       } else {
                return ret;
+       }
 
        if (!fdev->ipsec)
                return ret;
 
        if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
-               ret |= MLX5_ACCEL_IPSEC_ESP;
+               ret |= MLX5_ACCEL_IPSEC_CAP_ESP;
 
        if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
-               ret |= MLX5_ACCEL_IPSEC_IPV6;
+               ret |= MLX5_ACCEL_IPSEC_CAP_IPV6;
 
        if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
-               ret |= MLX5_ACCEL_IPSEC_LSO;
+               ret |= MLX5_ACCEL_IPSEC_CAP_LSO;
+
+       if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
+               ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
+
+       if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) {
+               ret |= MLX5_ACCEL_IPSEC_CAP_ESN;
+               ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
+       }
 
        return ret;
 }
@@ -318,6 +414,829 @@ int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
        return ret;
 }
 
+static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
+{
+       struct mlx5_fpga_ipsec_cmd_context *context;
+       struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
+       int err;
+
+       cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
+       cmd.flags = htonl(flags);
+       context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
+       if (IS_ERR(context)) {
+               err = PTR_ERR(context);
+               goto out;
+       }
+
+       err = mlx5_fpga_ipsec_cmd_wait(context);
+       if (err)
+               goto out;
+
+       if ((context->resp.flags & cmd.flags) != cmd.flags) {
+               mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n",
+                             cmd.flags,
+                             context->resp.flags);
+               err = -EIO;
+       }
+
+out:
+       return err;
+}
+
+static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
+{
+       u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
+       u32 flags = 0;
+
+       if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER)
+               flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
+
+       return mlx5_fpga_ipsec_set_caps(mdev, flags);
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
+                             const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+                             struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+       const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
+
+       /* key */
+       memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key,
+              aes_gcm->key_len / 8);
+       /* Duplicate 128 bit key twice according to HW layout */
+       if (aes_gcm->key_len == 128)
+               memcpy(&hw_sa->ipsec_sa_v1.key_enc[16],
+                      aes_gcm->aes_key, aes_gcm->key_len / 8);
+
+       /* salt and seq_iv */
+       memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv,
+              sizeof(aes_gcm->seq_iv));
+       memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
+              sizeof(aes_gcm->salt));
+
+       /* esn */
+       if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
+               hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN;
+               hw_sa->ipsec_sa_v1.flags |=
+                               (xfrm_attrs->flags &
+                                MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+                                       MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+               hw_sa->esn = htonl(xfrm_attrs->esn);
+       } else {
+               hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN;
+               hw_sa->ipsec_sa_v1.flags &=
+                               ~(xfrm_attrs->flags &
+                                 MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+                                       MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+               hw_sa->esn = 0;
+       }
+
+       /* rx handle */
+       hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
+
+       /* enc mode */
+       switch (aes_gcm->key_len) {
+       case 128:
+               hw_sa->ipsec_sa_v1.enc_mode =
+                       MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128;
+               break;
+       case 256:
+               hw_sa->ipsec_sa_v1.enc_mode =
+                       MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128;
+               break;
+       }
+
+       /* flags */
+       hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID |
+                       MLX5_FPGA_IPSEC_SA_SPI_EN |
+                       MLX5_FPGA_IPSEC_SA_IP_ESP;
+
+       if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT)
+               hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX;
+       else
+               hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX;
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
+                           struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+                           const __be32 saddr[4],
+                           const __be32 daddr[4],
+                           const __be32 spi, bool is_ipv6,
+                           struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+       mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa);
+
+       /* IPs */
+       memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip));
+       memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip));
+
+       /* SPI */
+       hw_sa->ipsec_sa_v1.spi = spi;
+
+       /* flags */
+       if (is_ipv6)
+               hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
+}
+
+static bool is_full_mask(const void *p, size_t len)
+{
+       WARN_ON(len % 4);
+
+       return !memchr_inv(p, 0xff, len);
+}
+
+static bool validate_fpga_full_mask(struct mlx5_core_dev *dev,
+                                   const u32 *match_c,
+                                   const u32 *match_v)
+{
+       const void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+                                                match_c,
+                                                misc_parameters);
+       const void *headers_c = MLX5_ADDR_OF(fte_match_param,
+                                            match_c,
+                                            outer_headers);
+       const void *headers_v = MLX5_ADDR_OF(fte_match_param,
+                                            match_v,
+                                            outer_headers);
+
+       if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) {
+               const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+                                                   headers_c,
+                                                   src_ipv4_src_ipv6.ipv4_layout.ipv4);
+               const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+                                                   headers_c,
+                                                   dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+               if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+                                                             ipv4)) ||
+                   !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+                                                             ipv4)))
+                       return false;
+       } else {
+               const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+                                                   headers_c,
+                                                   src_ipv4_src_ipv6.ipv6_layout.ipv6);
+               const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+                                                   headers_c,
+                                                   dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+
+               if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+                                                             ipv6)) ||
+                   !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+                                                             ipv6)))
+                       return false;
+       }
+
+       if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+                                      outer_esp_spi),
+                         MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi)))
+               return false;
+
+       return true;
+}
+
+static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
+                                   u8 match_criteria_enable,
+                                   const u32 *match_c,
+                                   const u32 *match_v)
+{
+       u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev);
+       bool ipv6_flow;
+
+       ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
+
+       if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) ||
+           mlx5_fs_is_outer_udp_flow(match_c, match_v) ||
+           mlx5_fs_is_outer_tcp_flow(match_c, match_v) ||
+           mlx5_fs_is_vxlan_flow(match_c) ||
+           !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) ||
+             ipv6_flow))
+               return false;
+
+       if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE))
+               return false;
+
+       if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) &&
+           mlx5_fs_is_outer_ipsec_flow(match_c))
+               return false;
+
+       if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) &&
+           ipv6_flow)
+               return false;
+
+       if (!validate_fpga_full_mask(dev, match_c, match_v))
+               return false;
+
+       return true;
+}
+
+static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
+                                          u8 match_criteria_enable,
+                                          const u32 *match_c,
+                                          const u32 *match_v,
+                                          struct mlx5_flow_act *flow_act)
+{
+       const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
+                                          outer_headers);
+       bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) ||
+                       MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0);
+       bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) ||
+                       MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0);
+       int ret;
+
+       ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c,
+                                     match_v);
+       if (!ret)
+               return ret;
+
+       if (is_dmac || is_smac ||
+           (match_criteria_enable &
+            ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
+           (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
+            flow_act->has_flow_tag)
+               return false;
+
+       return true;
+}
+
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+                                   struct mlx5_accel_esp_xfrm *accel_xfrm,
+                                   const __be32 saddr[4],
+                                   const __be32 daddr[4],
+                                   const __be32 spi, bool is_ipv6)
+{
+       struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
+       struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+                       container_of(accel_xfrm, typeof(*fpga_xfrm),
+                                    accel_xfrm);
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+       struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+       int opcode, err;
+       void *context;
+
+       /* alloc SA */
+       sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
+       if (!sa_ctx)
+               return ERR_PTR(-ENOMEM);
+
+       sa_ctx->dev = mdev;
+
+       /* build candidate SA */
+       mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs,
+                                   saddr, daddr, spi, is_ipv6,
+                                   &sa_ctx->hw_sa);
+
+       mutex_lock(&fpga_xfrm->lock);
+
+       if (fpga_xfrm->sa_ctx) {        /* multiple rules for same accel_xfrm */
+               /* all rules must be with same IPs and SPI */
+               if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa,
+                          sizeof(sa_ctx->hw_sa))) {
+                       context = ERR_PTR(-EINVAL);
+                       goto exists;
+               }
+
+               ++fpga_xfrm->num_rules;
+               context = fpga_xfrm->sa_ctx;
+               goto exists;
+       }
+
+       /* This is unbounded fpga_xfrm, try to add to hash */
+       mutex_lock(&fipsec->sa_hash_lock);
+
+       err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash,
+                                           rhash_sa);
+       if (err) {
+               /* Can't bound different accel_xfrm to already existing sa_ctx.
+                * This is because we can't support multiple ketmats for
+                * same IPs and SPI
+                */
+               context = ERR_PTR(-EEXIST);
+               goto unlock_hash;
+       }
+
+       /* Bound accel_xfrm to sa_ctx */
+       opcode = is_v2_sadb_supported(fdev->ipsec) ?
+                       MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 :
+                       MLX5_FPGA_IPSEC_CMD_OP_ADD_SA;
+       err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+       sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+       if (err) {
+               context = ERR_PTR(err);
+               goto delete_hash;
+       }
+
+       mutex_unlock(&fipsec->sa_hash_lock);
+
+       ++fpga_xfrm->num_rules;
+       fpga_xfrm->sa_ctx = sa_ctx;
+       sa_ctx->fpga_xfrm = fpga_xfrm;
+
+       mutex_unlock(&fpga_xfrm->lock);
+
+       return sa_ctx;
+
+delete_hash:
+       WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+                                      rhash_sa));
+unlock_hash:
+       mutex_unlock(&fipsec->sa_hash_lock);
+
+exists:
+       mutex_unlock(&fpga_xfrm->lock);
+       kfree(sa_ctx);
+       return context;
+}
+
+static void *
+mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
+                                struct fs_fte *fte,
+                                bool is_egress)
+{
+       struct mlx5_accel_esp_xfrm *accel_xfrm;
+       __be32 saddr[4], daddr[4], spi;
+       struct mlx5_flow_group *fg;
+       bool is_ipv6 = false;
+
+       fs_get_obj(fg, fte->node.parent);
+       /* validate */
+       if (is_egress &&
+           !mlx5_is_fpga_egress_ipsec_rule(mdev,
+                                           fg->mask.match_criteria_enable,
+                                           fg->mask.match_criteria,
+                                           fte->val,
+                                           &fte->action))
+               return ERR_PTR(-EINVAL);
+       else if (!mlx5_is_fpga_ipsec_rule(mdev,
+                                         fg->mask.match_criteria_enable,
+                                         fg->mask.match_criteria,
+                                         fte->val))
+               return ERR_PTR(-EINVAL);
+
+       /* get xfrm context */
+       accel_xfrm =
+               (struct mlx5_accel_esp_xfrm *)fte->action.esp_id;
+
+       /* IPs */
+       if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria,
+                                      fte->val)) {
+               memcpy(&saddr[3],
+                      MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+                                   fte->val,
+                                   src_ipv4_src_ipv6.ipv4_layout.ipv4),
+                                   sizeof(saddr[3]));
+               memcpy(&daddr[3],
+                      MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+                                   fte->val,
+                                   dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+                                   sizeof(daddr[3]));
+       } else {
+               memcpy(saddr,
+                      MLX5_ADDR_OF(fte_match_param,
+                                   fte->val,
+                                   outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+                                   sizeof(saddr));
+               memcpy(daddr,
+                      MLX5_ADDR_OF(fte_match_param,
+                                   fte->val,
+                                   outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+                                   sizeof(daddr));
+               is_ipv6 = true;
+       }
+
+       /* SPI */
+       spi = MLX5_GET_BE(typeof(spi),
+                         fte_match_param, fte->val,
+                         misc_parameters.outer_esp_spi);
+
+       /* create */
+       return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
+                                            saddr, daddr,
+                                            spi, is_ipv6);
+}
+
+static void
+mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
+{
+       struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga;
+       struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+       int opcode = is_v2_sadb_supported(fdev->ipsec) ?
+                       MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 :
+                       MLX5_FPGA_IPSEC_CMD_OP_DEL_SA;
+       int err;
+
+       err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+       sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+       if (err) {
+               WARN_ON(err);
+               return;
+       }
+
+       mutex_lock(&fipsec->sa_hash_lock);
+       WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+                                      rhash_sa));
+       mutex_unlock(&fipsec->sa_hash_lock);
+}
+
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+       struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+                       ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm;
+
+       mutex_lock(&fpga_xfrm->lock);
+       if (!--fpga_xfrm->num_rules) {
+               mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
+               fpga_xfrm->sa_ctx = NULL;
+       }
+       mutex_unlock(&fpga_xfrm->lock);
+}
+
+static inline struct mlx5_fpga_ipsec_rule *
+_rule_search(struct rb_root *root, struct fs_fte *fte)
+{
+       struct rb_node *node = root->rb_node;
+
+       while (node) {
+               struct mlx5_fpga_ipsec_rule *rule =
+                               container_of(node, struct mlx5_fpga_ipsec_rule,
+                                            node);
+
+               if (rule->fte < fte)
+                       node = node->rb_left;
+               else if (rule->fte > fte)
+                       node = node->rb_right;
+               else
+                       return rule;
+       }
+       return NULL;
+}
+
+static struct mlx5_fpga_ipsec_rule *
+rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte)
+{
+       struct mlx5_fpga_ipsec_rule *rule;
+
+       mutex_lock(&ipsec_dev->rules_rb_lock);
+       rule = _rule_search(&ipsec_dev->rules_rb, fte);
+       mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+       return rule;
+}
+
+static inline int _rule_insert(struct rb_root *root,
+                              struct mlx5_fpga_ipsec_rule *rule)
+{
+       struct rb_node **new = &root->rb_node, *parent = NULL;
+
+       /* Figure out where to put new node */
+       while (*new) {
+               struct mlx5_fpga_ipsec_rule *this =
+                               container_of(*new, struct mlx5_fpga_ipsec_rule,
+                                            node);
+
+               parent = *new;
+               if (rule->fte < this->fte)
+                       new = &((*new)->rb_left);
+               else if (rule->fte > this->fte)
+                       new = &((*new)->rb_right);
+               else
+                       return -EEXIST;
+       }
+
+       /* Add new node and rebalance tree. */
+       rb_link_node(&rule->node, parent, new);
+       rb_insert_color(&rule->node, root);
+
+       return 0;
+}
+
+static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev,
+                      struct mlx5_fpga_ipsec_rule *rule)
+{
+       int ret;
+
+       mutex_lock(&ipsec_dev->rules_rb_lock);
+       ret = _rule_insert(&ipsec_dev->rules_rb, rule);
+       mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+       return ret;
+}
+
+static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+                               struct mlx5_fpga_ipsec_rule *rule)
+{
+       struct rb_root *root = &ipsec_dev->rules_rb;
+
+       mutex_lock(&ipsec_dev->rules_rb_lock);
+       rb_erase(&rule->node, root);
+       mutex_unlock(&ipsec_dev->rules_rb_lock);
+}
+
+static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+                       struct mlx5_fpga_ipsec_rule *rule)
+{
+       _rule_delete(ipsec_dev, rule);
+       kfree(rule);
+}
+
+struct mailbox_mod {
+       uintptr_t                       saved_esp_id;
+       u32                             saved_action;
+       u32                             saved_outer_esp_spi_value;
+};
+
+static void restore_spec_mailbox(struct fs_fte *fte,
+                                struct mailbox_mod *mbox_mod)
+{
+       char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+                                          fte->val,
+                                          misc_parameters);
+
+       MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+                mbox_mod->saved_outer_esp_spi_value);
+       fte->action.action |= mbox_mod->saved_action;
+       fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id;
+}
+
+static void modify_spec_mailbox(struct mlx5_core_dev *mdev,
+                               struct fs_fte *fte,
+                               struct mailbox_mod *mbox_mod)
+{
+       char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+                                          fte->val,
+                                          misc_parameters);
+
+       mbox_mod->saved_esp_id = fte->action.esp_id;
+       mbox_mod->saved_action = fte->action.action &
+                       (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+                        MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+       mbox_mod->saved_outer_esp_spi_value =
+                       MLX5_GET(fte_match_set_misc, misc_params_v,
+                                outer_esp_spi);
+
+       fte->action.esp_id = 0;
+       fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+                               MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+       if (!MLX5_CAP_FLOWTABLE(mdev,
+                               flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+               MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0);
+}
+
+static enum fs_flow_table_type egress_to_fs_ft(bool egress)
+{
+       return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
+}
+
+static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev,
+                                          struct mlx5_flow_table *ft,
+                                          u32 *in,
+                                          unsigned int *group_id,
+                                          bool is_egress)
+{
+       int (*create_flow_group)(struct mlx5_core_dev *dev,
+                                struct mlx5_flow_table *ft, u32 *in,
+                                unsigned int *group_id) =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
+       char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
+                                          match_criteria.misc_parameters);
+       u32 saved_outer_esp_spi_mask;
+       u8 match_criteria_enable;
+       int ret;
+
+       if (MLX5_CAP_FLOWTABLE(dev,
+                              flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+               return create_flow_group(dev, ft, in, group_id);
+
+       match_criteria_enable =
+               MLX5_GET(create_flow_group_in, in, match_criteria_enable);
+       saved_outer_esp_spi_mask =
+               MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+       if (!match_criteria_enable || !saved_outer_esp_spi_mask)
+               return create_flow_group(dev, ft, in, group_id);
+
+       MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
+
+       if (!(*misc_params_c) &&
+           !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
+               MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+                        match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
+
+       ret = create_flow_group(dev, ft, in, group_id);
+
+       MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
+       MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
+
+       return ret;
+}
+
+static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev,
+                                   struct mlx5_flow_table *ft,
+                                   struct mlx5_flow_group *fg,
+                                   struct fs_fte *fte,
+                                   bool is_egress)
+{
+       int (*create_fte)(struct mlx5_core_dev *dev,
+                         struct mlx5_flow_table *ft,
+                         struct mlx5_flow_group *fg,
+                         struct fs_fte *fte) =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
+       struct mlx5_fpga_device *fdev = dev->fpga;
+       struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+       struct mlx5_fpga_ipsec_rule *rule;
+       bool is_esp = fte->action.esp_id;
+       struct mailbox_mod mbox_mod;
+       int ret;
+
+       if (!is_esp ||
+           !(fte->action.action &
+             (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+              MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+               return create_fte(dev, ft, fg, fte);
+
+       rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+       if (!rule)
+               return -ENOMEM;
+
+       rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
+       if (IS_ERR(rule->ctx)) {
+               int err = PTR_ERR(rule->ctx);
+               kfree(rule);
+               return err;
+       }
+
+       rule->fte = fte;
+       WARN_ON(rule_insert(fipsec, rule));
+
+       modify_spec_mailbox(dev, fte, &mbox_mod);
+       ret = create_fte(dev, ft, fg, fte);
+       restore_spec_mailbox(fte, &mbox_mod);
+       if (ret) {
+               _rule_delete(fipsec, rule);
+               mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+               kfree(rule);
+       }
+
+       return ret;
+}
+
+static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev,
+                                   struct mlx5_flow_table *ft,
+                                   unsigned int group_id,
+                                   int modify_mask,
+                                   struct fs_fte *fte,
+                                   bool is_egress)
+{
+       int (*update_fte)(struct mlx5_core_dev *dev,
+                         struct mlx5_flow_table *ft,
+                         unsigned int group_id,
+                         int modify_mask,
+                         struct fs_fte *fte) =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
+       bool is_esp = fte->action.esp_id;
+       struct mailbox_mod mbox_mod;
+       int ret;
+
+       if (!is_esp ||
+           !(fte->action.action &
+             (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+              MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+               return update_fte(dev, ft, group_id, modify_mask, fte);
+
+       modify_spec_mailbox(dev, fte, &mbox_mod);
+       ret = update_fte(dev, ft, group_id, modify_mask, fte);
+       restore_spec_mailbox(fte, &mbox_mod);
+
+       return ret;
+}
+
+static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev,
+                                   struct mlx5_flow_table *ft,
+                                   struct fs_fte *fte,
+                                   bool is_egress)
+{
+       int (*delete_fte)(struct mlx5_core_dev *dev,
+                         struct mlx5_flow_table *ft,
+                         struct fs_fte *fte) =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
+       struct mlx5_fpga_device *fdev = dev->fpga;
+       struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+       struct mlx5_fpga_ipsec_rule *rule;
+       bool is_esp = fte->action.esp_id;
+       struct mailbox_mod mbox_mod;
+       int ret;
+
+       if (!is_esp ||
+           !(fte->action.action &
+             (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+              MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+               return delete_fte(dev, ft, fte);
+
+       rule = rule_search(fipsec, fte);
+       if (!rule)
+               return -ENOENT;
+
+       mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+       rule_delete(fipsec, rule);
+
+       modify_spec_mailbox(dev, fte, &mbox_mod);
+       ret = delete_fte(dev, ft, fte);
+       restore_spec_mailbox(fte, &mbox_mod);
+
+       return ret;
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev,
+                                           struct mlx5_flow_table *ft,
+                                           u32 *in,
+                                           unsigned int *group_id)
+{
+       return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev,
+                                    struct mlx5_flow_table *ft,
+                                    struct mlx5_flow_group *fg,
+                                    struct fs_fte *fte)
+{
+       return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev,
+                                    struct mlx5_flow_table *ft,
+                                    unsigned int group_id,
+                                    int modify_mask,
+                                    struct fs_fte *fte)
+{
+       return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+                                       true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev,
+                                    struct mlx5_flow_table *ft,
+                                    struct fs_fte *fte)
+{
+       return fpga_ipsec_fs_delete_fte(dev, ft, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev,
+                                            struct mlx5_flow_table *ft,
+                                            u32 *in,
+                                            unsigned int *group_id)
+{
+       return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev,
+                                     struct mlx5_flow_table *ft,
+                                     struct mlx5_flow_group *fg,
+                                     struct fs_fte *fte)
+{
+       return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev,
+                                     struct mlx5_flow_table *ft,
+                                     unsigned int group_id,
+                                     int modify_mask,
+                                     struct fs_fte *fte)
+{
+       return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+                                       false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev,
+                                     struct mlx5_flow_table *ft,
+                                     struct fs_fte *fte)
+{
+       return fpga_ipsec_fs_delete_fte(dev, ft, fte, false);
+}
+
+static struct mlx5_flow_cmds fpga_ipsec_ingress;
+static struct mlx5_flow_cmds fpga_ipsec_egress;
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+       switch (type) {
+       case FS_FT_NIC_RX:
+               return &fpga_ipsec_ingress;
+       case FS_FT_NIC_TX:
+               return &fpga_ipsec_egress;
+       default:
+               WARN_ON(true);
+               return NULL;
+       }
+}
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
        struct mlx5_fpga_conn_attr init_attr = {0};
@@ -332,6 +1251,8 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
        if (!fdev->ipsec)
                return -ENOMEM;
 
+       fdev->ipsec->fdev = fdev;
+
        err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
                                     fdev->ipsec->caps);
        if (err) {
@@ -355,14 +1276,47 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
                goto error;
        }
        fdev->ipsec->conn = conn;
+
+       err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa);
+       if (err)
+               goto err_destroy_conn;
+       mutex_init(&fdev->ipsec->sa_hash_lock);
+
+       fdev->ipsec->rules_rb = RB_ROOT;
+       mutex_init(&fdev->ipsec->rules_rb_lock);
+
+       err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
+                             err);
+               goto err_destroy_hash;
+       }
+
        return 0;
 
+err_destroy_hash:
+       rhashtable_destroy(&fdev->ipsec->sa_hash);
+
+err_destroy_conn:
+       mlx5_fpga_sbu_conn_destroy(conn);
+
 error:
        kfree(fdev->ipsec);
        fdev->ipsec = NULL;
        return err;
 }
 
+static void destroy_rules_rb(struct rb_root *root)
+{
+       struct mlx5_fpga_ipsec_rule *r, *tmp;
+
+       rbtree_postorder_for_each_entry_safe(r, tmp, root, node) {
+               rb_erase(&r->node, root);
+               mlx5_fpga_ipsec_delete_sa_ctx(r->ctx);
+               kfree(r);
+       }
+}
+
 void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
        struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -370,7 +1324,209 @@ void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
        if (!mlx5_fpga_is_ipsec_device(mdev))
                return;
 
+       destroy_rules_rb(&fdev->ipsec->rules_rb);
+       rhashtable_destroy(&fdev->ipsec->sa_hash);
+
        mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
        kfree(fdev->ipsec);
        fdev->ipsec = NULL;
 }
+
+void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+       /* ingress */
+       fpga_ipsec_ingress.create_flow_table =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table;
+       fpga_ipsec_ingress.destroy_flow_table =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table;
+       fpga_ipsec_ingress.modify_flow_table =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table;
+       fpga_ipsec_ingress.create_flow_group =
+               mlx5_fpga_ipsec_fs_create_flow_group_ingress;
+       fpga_ipsec_ingress.destroy_flow_group =
+                mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group;
+       fpga_ipsec_ingress.create_fte =
+               mlx5_fpga_ipsec_fs_create_fte_ingress;
+       fpga_ipsec_ingress.update_fte =
+               mlx5_fpga_ipsec_fs_update_fte_ingress;
+       fpga_ipsec_ingress.delete_fte =
+               mlx5_fpga_ipsec_fs_delete_fte_ingress;
+       fpga_ipsec_ingress.update_root_ft =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft;
+
+       /* egress */
+       fpga_ipsec_egress.create_flow_table =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table;
+       fpga_ipsec_egress.destroy_flow_table =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table;
+       fpga_ipsec_egress.modify_flow_table =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table;
+       fpga_ipsec_egress.create_flow_group =
+               mlx5_fpga_ipsec_fs_create_flow_group_egress;
+       fpga_ipsec_egress.destroy_flow_group =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group;
+       fpga_ipsec_egress.create_fte =
+               mlx5_fpga_ipsec_fs_create_fte_egress;
+       fpga_ipsec_egress.update_fte =
+               mlx5_fpga_ipsec_fs_update_fte_egress;
+       fpga_ipsec_egress.delete_fte =
+               mlx5_fpga_ipsec_fs_delete_fte_egress;
+       fpga_ipsec_egress.update_root_ft =
+               mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft;
+}
+
+static int
+mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
+                                 const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+       if (attrs->tfc_pad) {
+               mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n");
+               return -EOPNOTSUPP;
+       }
+
+       if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
+               mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n");
+               return -EOPNOTSUPP;
+       }
+
+       if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
+               mlx5_core_err(mdev, "Only aes gcm keymat is supported\n");
+               return -EOPNOTSUPP;
+       }
+
+       if (attrs->keymat.aes_gcm.iv_algo !=
+           MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
+               mlx5_core_err(mdev, "Only iv sequence algo is supported\n");
+               return -EOPNOTSUPP;
+       }
+
+       if (attrs->keymat.aes_gcm.icv_len != 128) {
+               mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+               return -EOPNOTSUPP;
+       }
+
+       if (attrs->keymat.aes_gcm.key_len != 128 &&
+           attrs->keymat.aes_gcm.key_len != 256) {
+               mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+               return -EOPNOTSUPP;
+       }
+
+       if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
+           (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps,
+                      v2_command))) {
+               mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+                         const struct mlx5_accel_esp_xfrm_attrs *attrs,
+                         u32 flags)
+{
+       struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+
+       if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) {
+               mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n");
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+               mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+               return ERR_PTR(-EOPNOTSUPP);
+       }
+
+       fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL);
+       if (!fpga_xfrm)
+               return ERR_PTR(-ENOMEM);
+
+       mutex_init(&fpga_xfrm->lock);
+       memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs,
+              sizeof(fpga_xfrm->accel_xfrm.attrs));
+
+       return &fpga_xfrm->accel_xfrm;
+}
+
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+       struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+                       container_of(xfrm, struct mlx5_fpga_esp_xfrm,
+                                    accel_xfrm);
+       /* assuming no sa_ctx are connected to this xfrm_ctx */
+       kfree(fpga_xfrm);
+}
+
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+                             const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+       struct mlx5_core_dev *mdev = xfrm->mdev;
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+       struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+       struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+       struct mlx5_ifc_fpga_ipsec_sa org_hw_sa;
+
+       int err = 0;
+
+       if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
+               return 0;
+
+       if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+               mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+               return -EOPNOTSUPP;
+       }
+
+       if (is_v2_sadb_supported(fipsec)) {
+               mlx5_core_warn(mdev, "Modify esp is not supported\n");
+               return -EOPNOTSUPP;
+       }
+
+       fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm);
+
+       mutex_lock(&fpga_xfrm->lock);
+
+       if (!fpga_xfrm->sa_ctx)
+               /* Unbounded xfrm, chane only sw attrs */
+               goto change_sw_xfrm_attrs;
+
+       /* copy original hw sa */
+       memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa));
+       mutex_lock(&fipsec->sa_hash_lock);
+       /* remove original hw sa from hash */
+       WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+                                      &fpga_xfrm->sa_ctx->hash, rhash_sa));
+       /* update hw_sa with new xfrm attrs*/
+       mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs,
+                                     &fpga_xfrm->sa_ctx->hw_sa);
+       /* try to insert new hw_sa to hash */
+       err = rhashtable_insert_fast(&fipsec->sa_hash,
+                                    &fpga_xfrm->sa_ctx->hash, rhash_sa);
+       if (err)
+               goto rollback_sa;
+
+       /* modify device with new hw_sa */
+       err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa,
+                                          MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2);
+       fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+       if (err)
+               WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+                                              &fpga_xfrm->sa_ctx->hash,
+                                              rhash_sa));
+rollback_sa:
+       if (err) {
+               /* return original hw_sa to hash */
+               memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa,
+                      sizeof(org_hw_sa));
+               WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash,
+                                              &fpga_xfrm->sa_ctx->hash,
+                                              rhash_sa));
+       }
+       mutex_unlock(&fipsec->sa_hash_lock);
+
+change_sw_xfrm_attrs:
+       if (!err)
+               memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
+       mutex_unlock(&fpga_xfrm->lock);
+       return err;
+}
index 26a3e4b56972863efae5e45a28e88df0cf42ad32..2b5e63b0d4d6cd0a59caa9095b6d6d507b6dadb6 100644 (file)
 #define __MLX5_FPGA_IPSEC_H__
 
 #include "accel/ipsec.h"
+#include "fs_cmd.h"
 
 #ifdef CONFIG_MLX5_FPGA
 
-void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-                                 struct mlx5_accel_ipsec_sa *cmd);
-int mlx5_fpga_ipsec_sa_cmd_wait(void *context);
-
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
 unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
                                  unsigned int counters_count);
 
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+                                   struct mlx5_accel_esp_xfrm *accel_xfrm,
+                                   const __be32 saddr[4],
+                                   const __be32 daddr[4],
+                                   const __be32 spi, bool is_ipv6);
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context);
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_build_fs_cmds(void);
 
-#else
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+                         const struct mlx5_accel_esp_xfrm_attrs *attrs,
+                         u32 flags);
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+                             const struct mlx5_accel_esp_xfrm_attrs *attrs);
 
-static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-                                               struct mlx5_accel_ipsec_sa *cmd)
-{
-       return ERR_PTR(-EOPNOTSUPP);
-}
+const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
 
-static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context)
-{
-       return -EOPNOTSUPP;
-}
+#else
 
 static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
@@ -80,6 +85,20 @@ static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
        return 0;
 }
 
+static inline void *
+mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+                             struct mlx5_accel_esp_xfrm *accel_xfrm,
+                             const __be32 saddr[4],
+                             const __be32 daddr[4],
+                             const __be32 spi, bool is_ipv6)
+{
+       return NULL;
+}
+
+static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+}
+
 static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
        return 0;
@@ -89,6 +108,35 @@ static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 }
 
+static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+}
+
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+                         const struct mlx5_accel_esp_xfrm_attrs *attrs,
+                         u32 flags)
+{
+       return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+}
+
+static inline int
+mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+                         const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+       return mlx5_fs_cmd_get_default(type);
+}
+
 #endif /* CONFIG_MLX5_FPGA */
 
 #endif /* __MLX5_FPGA_SADB_H__ */
index 881e2e55840c92ad8893c9bbfa115ccd36826a71..645f83cac34d7d1ea28315d844cce22bb35bab6e 100644 (file)
 #include "mlx5_core.h"
 #include "eswitch.h"
 
-int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-                           struct mlx5_flow_table *ft, u32 underlay_qpn,
-                           bool disconnect)
+static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev,
+                                       struct mlx5_flow_table *ft,
+                                       u32 underlay_qpn,
+                                       bool disconnect)
+{
+       return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev,
+                                          u16 vport,
+                                          enum fs_flow_table_op_mod op_mod,
+                                          enum fs_flow_table_type type,
+                                          unsigned int level,
+                                          unsigned int log_size,
+                                          struct mlx5_flow_table *next_ft,
+                                          unsigned int *table_id, u32 flags)
+{
+       return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev,
+                                           struct mlx5_flow_table *ft)
+{
+       return 0;
+}
+
+static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev,
+                                          struct mlx5_flow_table *ft,
+                                          struct mlx5_flow_table *next_ft)
+{
+       return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev,
+                                          struct mlx5_flow_table *ft,
+                                          u32 *in,
+                                          unsigned int *group_id)
+{
+       return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev,
+                                           struct mlx5_flow_table *ft,
+                                           unsigned int group_id)
+{
+       return 0;
+}
+
+static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev,
+                                   struct mlx5_flow_table *ft,
+                                   struct mlx5_flow_group *group,
+                                   struct fs_fte *fte)
+{
+       return 0;
+}
+
+static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev,
+                                   struct mlx5_flow_table *ft,
+                                   unsigned int group_id,
+                                   int modify_mask,
+                                   struct fs_fte *fte)
+{
+       return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev,
+                                   struct mlx5_flow_table *ft,
+                                   struct fs_fte *fte)
+{
+       return 0;
+}
+
+static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+                                  struct mlx5_flow_table *ft, u32 underlay_qpn,
+                                  bool disconnect)
 {
        u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
        u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
@@ -71,12 +143,14 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
-                              u16 vport,
-                              enum fs_flow_table_op_mod op_mod,
-                              enum fs_flow_table_type type, unsigned int level,
-                              unsigned int log_size, struct mlx5_flow_table
-                              *next_ft, unsigned int *table_id, u32 flags)
+static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
+                                     u16 vport,
+                                     enum fs_flow_table_op_mod op_mod,
+                                     enum fs_flow_table_type type,
+                                     unsigned int level,
+                                     unsigned int log_size,
+                                     struct mlx5_flow_table *next_ft,
+                                     unsigned int *table_id, u32 flags)
 {
        int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
        u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
@@ -125,8 +199,8 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
        return err;
 }
 
-int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
-                               struct mlx5_flow_table *ft)
+static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
+                                      struct mlx5_flow_table *ft)
 {
        u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]   = {0};
        u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
@@ -143,9 +217,9 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
-                              struct mlx5_flow_table *ft,
-                              struct mlx5_flow_table *next_ft)
+static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+                                     struct mlx5_flow_table *ft,
+                                     struct mlx5_flow_table *next_ft)
 {
        u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]   = {0};
        u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
@@ -188,10 +262,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
-                              struct mlx5_flow_table *ft,
-                              u32 *in,
-                              unsigned int *group_id)
+static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
+                                     struct mlx5_flow_table *ft,
+                                     u32 *in,
+                                     unsigned int *group_id)
 {
        u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -213,9 +287,9 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
        return err;
 }
 
-int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
-                               struct mlx5_flow_table *ft,
-                               unsigned int group_id)
+static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
+                                      struct mlx5_flow_table *ft,
+                                      unsigned int group_id)
 {
        u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
        u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]   = {0};
@@ -266,16 +340,17 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
        in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
        MLX5_SET(flow_context, in_flow_context, group_id, group_id);
-       MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
-       MLX5_SET(flow_context, in_flow_context, action, fte->action);
-       MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id);
-       MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id);
+       MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+       MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
+       MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+       MLX5_SET(flow_context, in_flow_context, modify_header_id,
+                fte->action.modify_id);
        in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
                                      match_value);
        memcpy(in_match_value, &fte->val, sizeof(fte->val));
 
        in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
-       if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+       if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
                int list_size = 0;
 
                list_for_each_entry(dst, &fte->node.children, node.list) {
@@ -301,7 +376,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
                         list_size);
        }
 
-       if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+       if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
                int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
                                        log_max_flow_counter,
                                        ft->type));
@@ -332,19 +407,21 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
        return err;
 }
 
-int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
-                       struct mlx5_flow_table *ft,
-                       unsigned group_id,
-                       struct fs_fte *fte)
+static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
+                              struct mlx5_flow_table *ft,
+                              struct mlx5_flow_group *group,
+                              struct fs_fte *fte)
 {
+       unsigned int group_id = group->id;
+
        return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
 }
 
-int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
-                       struct mlx5_flow_table *ft,
-                       unsigned group_id,
-                       int modify_mask,
-                       struct fs_fte *fte)
+static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
+                              struct mlx5_flow_table *ft,
+                              unsigned int group_id,
+                              int modify_mask,
+                              struct fs_fte *fte)
 {
        int opmod;
        int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
@@ -357,9 +434,9 @@ int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
        return  mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
 }
 
-int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
-                       struct mlx5_flow_table *ft,
-                       unsigned int index)
+static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
+                              struct mlx5_flow_table *ft,
+                              struct fs_fte *fte)
 {
        u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
        u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
@@ -367,7 +444,7 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
        MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
        MLX5_SET(delete_fte_in, in, table_type, ft->type);
        MLX5_SET(delete_fte_in, in, table_id, ft->id);
-       MLX5_SET(delete_fte_in, in, flow_index, index);
+       MLX5_SET(delete_fte_in, in, flow_index, fte->index);
        if (ft->vport) {
                MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
                MLX5_SET(delete_fte_in, in, other_vport, 1);
@@ -610,3 +687,53 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
 
        mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds = {
+       .create_flow_table = mlx5_cmd_create_flow_table,
+       .destroy_flow_table = mlx5_cmd_destroy_flow_table,
+       .modify_flow_table = mlx5_cmd_modify_flow_table,
+       .create_flow_group = mlx5_cmd_create_flow_group,
+       .destroy_flow_group = mlx5_cmd_destroy_flow_group,
+       .create_fte = mlx5_cmd_create_fte,
+       .update_fte = mlx5_cmd_update_fte,
+       .delete_fte = mlx5_cmd_delete_fte,
+       .update_root_ft = mlx5_cmd_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
+       .create_flow_table = mlx5_cmd_stub_create_flow_table,
+       .destroy_flow_table = mlx5_cmd_stub_destroy_flow_table,
+       .modify_flow_table = mlx5_cmd_stub_modify_flow_table,
+       .create_flow_group = mlx5_cmd_stub_create_flow_group,
+       .destroy_flow_group = mlx5_cmd_stub_destroy_flow_group,
+       .create_fte = mlx5_cmd_stub_create_fte,
+       .update_fte = mlx5_cmd_stub_update_fte,
+       .delete_fte = mlx5_cmd_stub_delete_fte,
+       .update_root_ft = mlx5_cmd_stub_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
+{
+       return &mlx5_flow_cmds;
+}
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void)
+{
+       return &mlx5_flow_cmd_stubs;
+}
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type)
+{
+       switch (type) {
+       case FS_FT_NIC_RX:
+       case FS_FT_ESW_EGRESS_ACL:
+       case FS_FT_ESW_INGRESS_ACL:
+       case FS_FT_FDB:
+       case FS_FT_SNIFFER_RX:
+       case FS_FT_SNIFFER_TX:
+               return mlx5_fs_cmd_get_fw_cmds();
+       case FS_FT_NIC_TX:
+       default:
+               return mlx5_fs_cmd_get_stub_cmds();
+       }
+}
index 71e2d0f37ad9dde257aefa63bf380a9b5f3b1997..6228ba7bfa1a90fc2cafed3a13128e83391c903c 100644 (file)
 #ifndef _MLX5_FS_CMD_
 #define _MLX5_FS_CMD_
 
-int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
-                              u16 vport,
-                              enum fs_flow_table_op_mod op_mod,
-                              enum fs_flow_table_type type, unsigned int level,
-                              unsigned int log_size, struct mlx5_flow_table
-                              *next_ft, unsigned int *table_id, u32 flags);
+#include "fs_core.h"
 
-int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
-                               struct mlx5_flow_table *ft);
+struct mlx5_flow_cmds {
+       int (*create_flow_table)(struct mlx5_core_dev *dev,
+                                u16 vport,
+                                enum fs_flow_table_op_mod op_mod,
+                                enum fs_flow_table_type type,
+                                unsigned int level, unsigned int log_size,
+                                struct mlx5_flow_table *next_ft,
+                                unsigned int *table_id, u32 flags);
+       int (*destroy_flow_table)(struct mlx5_core_dev *dev,
+                                 struct mlx5_flow_table *ft);
 
-int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
-                              struct mlx5_flow_table *ft,
-                              struct mlx5_flow_table *next_ft);
+       int (*modify_flow_table)(struct mlx5_core_dev *dev,
+                                struct mlx5_flow_table *ft,
+                                struct mlx5_flow_table *next_ft);
 
-int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
-                              struct mlx5_flow_table *ft,
-                              u32 *in, unsigned int *group_id);
+       int (*create_flow_group)(struct mlx5_core_dev *dev,
+                                struct mlx5_flow_table *ft,
+                                u32 *in,
+                                unsigned int *group_id);
 
-int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
-                               struct mlx5_flow_table *ft,
-                               unsigned int group_id);
+       int (*destroy_flow_group)(struct mlx5_core_dev *dev,
+                                 struct mlx5_flow_table *ft,
+                                 unsigned int group_id);
 
-int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
-                       struct mlx5_flow_table *ft,
-                       unsigned group_id,
-                       struct fs_fte *fte);
+       int (*create_fte)(struct mlx5_core_dev *dev,
+                         struct mlx5_flow_table *ft,
+                         struct mlx5_flow_group *fg,
+                         struct fs_fte *fte);
 
-int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
-                       struct mlx5_flow_table *ft,
-                       unsigned group_id,
-                       int modify_mask,
-                       struct fs_fte *fte);
+       int (*update_fte)(struct mlx5_core_dev *dev,
+                         struct mlx5_flow_table *ft,
+                         unsigned int group_id,
+                         int modify_mask,
+                         struct fs_fte *fte);
 
-int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
-                       struct mlx5_flow_table *ft,
-                       unsigned int index);
+       int (*delete_fte)(struct mlx5_core_dev *dev,
+                         struct mlx5_flow_table *ft,
+                         struct fs_fte *fte);
 
-int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-                           struct mlx5_flow_table *ft, u32 underlay_qpn,
-                           bool disconnect);
+       int (*update_root_ft)(struct mlx5_core_dev *dev,
+                             struct mlx5_flow_table *ft,
+                             u32 underlay_qpn,
+                             bool disconnect);
+};
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
@@ -90,4 +96,6 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
                          struct mlx5_cmd_fc_bulk *b, u32 id,
                          u64 *packets, u64 *bytes);
 
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
+
 #endif
index 31fc2cfac3b3beea8c8bc28851cd5172f880926e..3ba07c7096ef7996713a592dcf5761f8b90a9c84 100644 (file)
@@ -37,6 +37,8 @@
 #include "fs_core.h"
 #include "fs_cmd.h"
 #include "diag/fs_tracepoint.h"
+#include "accel/ipsec.h"
+#include "fpga/ipsec.h"
 
 #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
                                         sizeof(struct init_tree_node))
@@ -425,15 +427,17 @@ static void del_sw_prio(struct fs_node *node)
 
 static void del_hw_flow_table(struct fs_node *node)
 {
+       struct mlx5_flow_root_namespace *root;
        struct mlx5_flow_table *ft;
        struct mlx5_core_dev *dev;
        int err;
 
        fs_get_obj(ft, node);
        dev = get_dev(&ft->node);
+       root = find_root(&ft->node);
 
        if (node->active) {
-               err = mlx5_cmd_destroy_flow_table(dev, ft);
+               err = root->cmds->destroy_flow_table(dev, ft);
                if (err)
                        mlx5_core_warn(dev, "flow steering can't destroy ft\n");
        }
@@ -454,6 +458,7 @@ static void del_sw_flow_table(struct fs_node *node)
 
 static void del_sw_hw_rule(struct fs_node *node)
 {
+       struct mlx5_flow_root_namespace *root;
        struct mlx5_flow_rule *rule;
        struct mlx5_flow_table *ft;
        struct mlx5_flow_group *fg;
@@ -477,19 +482,20 @@ static void del_sw_hw_rule(struct fs_node *node)
        if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
            --fte->dests_size) {
                modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
-               fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+               fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
                update_fte = true;
                goto out;
        }
 
-       if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+       if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
            --fte->dests_size) {
                modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
                update_fte = true;
        }
 out:
+       root = find_root(&ft->node);
        if (update_fte && fte->dests_size) {
-               err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte);
+               err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte);
                if (err)
                        mlx5_core_warn(dev,
                                       "%s can't del rule fg id=%d fte_index=%d\n",
@@ -500,6 +506,7 @@ static void del_sw_hw_rule(struct fs_node *node)
 
 static void del_hw_fte(struct fs_node *node)
 {
+       struct mlx5_flow_root_namespace *root;
        struct mlx5_flow_table *ft;
        struct mlx5_flow_group *fg;
        struct mlx5_core_dev *dev;
@@ -512,9 +519,9 @@ static void del_hw_fte(struct fs_node *node)
 
        trace_mlx5_fs_del_fte(fte);
        dev = get_dev(&ft->node);
+       root = find_root(&ft->node);
        if (node->active) {
-               err = mlx5_cmd_delete_fte(dev, ft,
-                                         fte->index);
+               err = root->cmds->delete_fte(dev, ft, fte);
                if (err)
                        mlx5_core_warn(dev,
                                       "flow steering can't delete fte in index %d of flow group id %d\n",
@@ -542,6 +549,7 @@ static void del_sw_fte(struct fs_node *node)
 
 static void del_hw_flow_group(struct fs_node *node)
 {
+       struct mlx5_flow_root_namespace *root;
        struct mlx5_flow_group *fg;
        struct mlx5_flow_table *ft;
        struct mlx5_core_dev *dev;
@@ -551,7 +559,8 @@ static void del_hw_flow_group(struct fs_node *node)
        dev = get_dev(&ft->node);
        trace_mlx5_fs_del_fg(fg);
 
-       if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
+       root = find_root(&ft->node);
+       if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id))
                mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
                               fg->id, ft->id);
 }
@@ -615,10 +624,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
 
        memcpy(fte->val, match_value, sizeof(fte->val));
        fte->node.type =  FS_TYPE_FLOW_ENTRY;
-       fte->flow_tag = flow_act->flow_tag;
-       fte->action = flow_act->action;
-       fte->encap_id = flow_act->encap_id;
-       fte->modify_id = flow_act->modify_id;
+       fte->action = *flow_act;
 
        tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
 
@@ -797,15 +803,14 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
                               struct fs_prio *prio,
                               struct mlx5_flow_table *ft)
 {
+       struct mlx5_flow_root_namespace *root = find_root(&prio->node);
        struct mlx5_flow_table *iter;
        int i = 0;
        int err;
 
        fs_for_each_ft(iter, prio) {
                i++;
-               err = mlx5_cmd_modify_flow_table(dev,
-                                                iter,
-                                                ft);
+               err = root->cmds->modify_flow_table(dev, iter, ft);
                if (err) {
                        mlx5_core_warn(dev, "Failed to modify flow table %d\n",
                                       iter->id);
@@ -853,12 +858,12 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
        if (list_empty(&root->underlay_qpns)) {
                /* Don't set any QPN (zero) in case QPN list is empty */
                qpn = 0;
-               err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false);
+               err = root->cmds->update_root_ft(root->dev, ft, qpn, false);
        } else {
                list_for_each_entry(uqp, &root->underlay_qpns, list) {
                        qpn = uqp->qpn;
-                       err = mlx5_cmd_update_root_ft(root->dev, ft, qpn,
-                                                     false);
+                       err = root->cmds->update_root_ft(root->dev, ft,
+                                                        qpn, false);
                        if (err)
                                break;
                }
@@ -877,6 +882,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
                                         struct mlx5_flow_destination *dest)
 {
+       struct mlx5_flow_root_namespace *root;
        struct mlx5_flow_table *ft;
        struct mlx5_flow_group *fg;
        struct fs_fte *fte;
@@ -884,17 +890,16 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
        int err = 0;
 
        fs_get_obj(fte, rule->node.parent);
-       if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+       if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
                return -EINVAL;
        down_write_ref_node(&fte->node);
        fs_get_obj(fg, fte->node.parent);
        fs_get_obj(ft, fg->node.parent);
 
        memcpy(&rule->dest_attr, dest, sizeof(*dest));
-       err = mlx5_cmd_update_fte(get_dev(&ft->node),
-                                 ft, fg->id,
-                                 modify_mask,
-                                 fte);
+       root = find_root(&ft->node);
+       err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+                                    modify_mask, fte);
        up_write_ref_node(&fte->node);
 
        return err;
@@ -1035,9 +1040,9 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
        tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
        log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
        next_ft = find_next_chained_ft(fs_prio);
-       err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
-                                        ft->level, log_table_sz, next_ft, &ft->id,
-                                        ft->flags);
+       err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod,
+                                           ft->type, ft->level, log_table_sz,
+                                           next_ft, &ft->id, ft->flags);
        if (err)
                goto free_ft;
 
@@ -1053,7 +1058,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
        mutex_unlock(&root->chain_lock);
        return ft;
 destroy_ft:
-       mlx5_cmd_destroy_flow_table(root->dev, ft);
+       root->cmds->destroy_flow_table(root->dev, ft);
 free_ft:
        kfree(ft);
 unlock_root:
@@ -1125,6 +1130,7 @@ EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
                                               u32 *fg_in)
 {
+       struct mlx5_flow_root_namespace *root = find_root(&ft->node);
        void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
                                            fg_in, match_criteria);
        u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
@@ -1152,7 +1158,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
        if (IS_ERR(fg))
                return fg;
 
-       err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
+       err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id);
        if (err) {
                tree_put_node(&fg->node);
                return ERR_PTR(err);
@@ -1275,6 +1281,7 @@ add_rule_fte(struct fs_fte *fte,
             int dest_num,
             bool update_action)
 {
+       struct mlx5_flow_root_namespace *root;
        struct mlx5_flow_handle *handle;
        struct mlx5_flow_table *ft;
        int modify_mask = 0;
@@ -1290,12 +1297,13 @@ add_rule_fte(struct fs_fte *fte,
                modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
 
        fs_get_obj(ft, fg->node.parent);
+       root = find_root(&fg->node);
        if (!(fte->status & FS_FTE_STATUS_EXISTING))
-               err = mlx5_cmd_create_fte(get_dev(&ft->node),
-                                         ft, fg->id, fte);
+               err = root->cmds->create_fte(get_dev(&ft->node),
+                                            ft, fg, fte);
        else
-               err = mlx5_cmd_update_fte(get_dev(&ft->node),
-                                         ft, fg->id, modify_mask, fte);
+               err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+                                                    modify_mask, fte);
        if (err)
                goto free_handle;
 
@@ -1360,6 +1368,7 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table  *ft
 static int create_auto_flow_group(struct mlx5_flow_table *ft,
                                  struct mlx5_flow_group *fg)
 {
+       struct mlx5_flow_root_namespace *root = find_root(&ft->node);
        struct mlx5_core_dev *dev = get_dev(&ft->node);
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
        void *match_criteria_addr;
@@ -1380,7 +1389,7 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
        memcpy(match_criteria_addr, fg->mask.match_criteria,
               sizeof(fg->mask.match_criteria));
 
-       err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id);
+       err = root->cmds->create_flow_group(dev, ft, in, &fg->id);
        if (!err) {
                fg->node.active = true;
                trace_mlx5_fs_add_fg(fg);
@@ -1438,16 +1447,17 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 
 static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
 {
-       if (check_conflicting_actions(flow_act->action, fte->action)) {
+       if (check_conflicting_actions(flow_act->action, fte->action.action)) {
                mlx5_core_warn(get_dev(&fte->node),
                               "Found two FTEs with conflicting actions\n");
                return -EEXIST;
        }
 
-       if (fte->flow_tag != flow_act->flow_tag) {
+       if (flow_act->has_flow_tag &&
+           fte->action.flow_tag != flow_act->flow_tag) {
                mlx5_core_warn(get_dev(&fte->node),
                               "FTE flow tag %u already exists with different flow tag %u\n",
-                              fte->flow_tag,
+                              fte->action.flow_tag,
                               flow_act->flow_tag);
                return -EEXIST;
        }
@@ -1471,12 +1481,12 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
        if (ret)
                return ERR_PTR(ret);
 
-       old_action = fte->action;
-       fte->action |= flow_act->action;
+       old_action = fte->action.action;
+       fte->action.action |= flow_act->action;
        handle = add_rule_fte(fte, fg, dest, dest_num,
                              old_action != flow_act->action);
        if (IS_ERR(handle)) {
-               fte->action = old_action;
+               fte->action.action = old_action;
                return handle;
        }
        trace_mlx5_fs_set_fte(fte, false);
@@ -1637,7 +1647,6 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 
        list_for_each_entry(iter, match_head, list) {
                nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT);
-               ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL);
        }
 
 search_again_locked:
@@ -1919,7 +1928,6 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
                return 0;
 
        new_root_ft = find_next_ft(ft);
-
        if (!new_root_ft) {
                root->root_ft = NULL;
                return 0;
@@ -1928,13 +1936,14 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
        if (list_empty(&root->underlay_qpns)) {
                /* Don't set any QPN (zero) in case QPN list is empty */
                qpn = 0;
-               err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn,
-                                             false);
+               err = root->cmds->update_root_ft(root->dev, new_root_ft,
+                                                qpn, false);
        } else {
                list_for_each_entry(uqp, &root->underlay_qpns, list) {
                        qpn = uqp->qpn;
-                       err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
-                                                     qpn, false);
+                       err = root->cmds->update_root_ft(root->dev,
+                                                        new_root_ft, qpn,
+                                                        false);
                        if (err)
                                break;
                }
@@ -2046,6 +2055,11 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
                        return &steering->sniffer_tx_root_ns->ns;
                else
                        return NULL;
+       case MLX5_FLOW_NAMESPACE_EGRESS:
+               if (steering->egress_root_ns)
+                       return &steering->egress_root_ns->ns;
+               else
+                       return NULL;
        default:
                return NULL;
        }
@@ -2236,13 +2250,18 @@ static int init_root_tree(struct mlx5_flow_steering *steering,
        return 0;
 }
 
-static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering,
-                                                      enum fs_flow_table_type
-                                                      table_type)
+static struct mlx5_flow_root_namespace
+*create_root_ns(struct mlx5_flow_steering *steering,
+               enum fs_flow_table_type table_type)
 {
+       const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
        struct mlx5_flow_root_namespace *root_ns;
        struct mlx5_flow_namespace *ns;
 
+       if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+           (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
+               cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
+
        /* Create the root namespace */
        root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
        if (!root_ns)
@@ -2250,6 +2269,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering
 
        root_ns->dev = steering->dev;
        root_ns->table_type = table_type;
+       root_ns->cmds = cmds;
 
        INIT_LIST_HEAD(&root_ns->underlay_qpns);
 
@@ -2408,6 +2428,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
        cleanup_root_ns(steering->fdb_root_ns);
        cleanup_root_ns(steering->sniffer_rx_root_ns);
        cleanup_root_ns(steering->sniffer_tx_root_ns);
+       cleanup_root_ns(steering->egress_root_ns);
        mlx5_cleanup_fc_stats(dev);
        kmem_cache_destroy(steering->ftes_cache);
        kmem_cache_destroy(steering->fgs_cache);
@@ -2553,6 +2574,20 @@ static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
        return err;
 }
 
+static int init_egress_root_ns(struct mlx5_flow_steering *steering)
+{
+       struct fs_prio *prio;
+
+       steering->egress_root_ns = create_root_ns(steering,
+                                                 FS_FT_NIC_TX);
+       if (!steering->egress_root_ns)
+               return -ENOMEM;
+
+       /* create 1 prio*/
+       prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
+       return PTR_ERR_OR_ZERO(prio);
+}
+
 int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
        struct mlx5_flow_steering *steering;
@@ -2618,6 +2653,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
                        goto err;
        }
 
+       if (MLX5_IPSEC_DEV(dev)) {
+               err = init_egress_root_ns(steering);
+               if (err)
+                       goto err;
+       }
+
        return 0;
 err:
        mlx5_cleanup_fs(dev);
@@ -2641,7 +2682,8 @@ int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
                goto update_ft_fail;
        }
 
-       err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false);
+       err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+                                        false);
        if (err) {
                mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
                               underlay_qpn, err);
@@ -2684,7 +2726,8 @@ int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
                goto out;
        }
 
-       err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true);
+       err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+                                        true);
        if (err)
                mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
                               underlay_qpn, err);
index 05262708f14b1863293cb6c6d561cdef935ca227..e26d3e9d5f9f9b7210a0f1dae9b041a2686c415a 100644 (file)
@@ -48,6 +48,7 @@ enum fs_node_type {
 
 enum fs_flow_table_type {
        FS_FT_NIC_RX          = 0x0,
+       FS_FT_NIC_TX          = 0x1,
        FS_FT_ESW_EGRESS_ACL  = 0x2,
        FS_FT_ESW_INGRESS_ACL = 0x3,
        FS_FT_FDB             = 0X4,
@@ -75,6 +76,7 @@ struct mlx5_flow_steering {
        struct mlx5_flow_root_namespace **esw_ingress_root_ns;
        struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
        struct mlx5_flow_root_namespace *sniffer_rx_root_ns;
+       struct mlx5_flow_root_namespace *egress_root_ns;
 };
 
 struct fs_node {
@@ -174,11 +176,8 @@ struct fs_fte {
        struct fs_node                  node;
        u32                             val[MLX5_ST_SZ_DW_MATCH_PARAM];
        u32                             dests_size;
-       u32                             flow_tag;
        u32                             index;
-       u32                             action;
-       u32                             encap_id;
-       u32                             modify_id;
+       struct mlx5_flow_act            action;
        enum fs_fte_status              status;
        struct mlx5_fc                  *counter;
        struct rhash_head               hash;
@@ -224,6 +223,7 @@ struct mlx5_flow_root_namespace {
        /* Should be held when chaining flow tables */
        struct mutex                    chain_lock;
        struct list_head                underlay_qpns;
+       const struct mlx5_flow_cmds     *cmds;
 };
 
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
index 21d29f7936f6c5d1e26c6e0d3f10644fd0f096c8..d39b0b7011b2d9cf194180813a5a30d9fe226b6d 100644 (file)
@@ -124,7 +124,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
                trigger_cmd_completions(dev);
        }
 
-       mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
+       mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
        mlx5_core_err(dev, "end\n");
 
 unlock:
index 7142c90d466947f5ea127f0850a5ad5934b4e260..13b6f66310c98495304ae4062f4b9129a54dfba8 100644 (file)
@@ -58,6 +58,7 @@
 #include "eswitch.h"
 #include "lib/mlx5.h"
 #include "fpga/core.h"
+#include "fpga/ipsec.h"
 #include "accel/ipsec.h"
 #include "lib/clock.h"
 
@@ -1173,6 +1174,18 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_affinity_hints;
        }
 
+       err = mlx5_fpga_device_start(dev);
+       if (err) {
+               dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+               goto err_fpga_start;
+       }
+
+       err = mlx5_accel_ipsec_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+               goto err_ipsec_start;
+       }
+
        err = mlx5_init_fs(dev);
        if (err) {
                dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1191,17 +1204,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_sriov;
        }
 
-       err = mlx5_fpga_device_start(dev);
-       if (err) {
-               dev_err(&pdev->dev, "fpga device start failed %d\n", err);
-               goto err_fpga_start;
-       }
-       err = mlx5_accel_ipsec_init(dev);
-       if (err) {
-               dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
-               goto err_ipsec_start;
-       }
-
        if (mlx5_device_registered(dev)) {
                mlx5_attach_device(dev);
        } else {
@@ -1219,17 +1221,18 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        return 0;
 
 err_reg_dev:
-       mlx5_accel_ipsec_cleanup(dev);
-err_ipsec_start:
-       mlx5_fpga_device_stop(dev);
-
-err_fpga_start:
        mlx5_sriov_detach(dev);
 
 err_sriov:
        mlx5_cleanup_fs(dev);
 
 err_fs:
+       mlx5_accel_ipsec_cleanup(dev);
+
+err_ipsec_start:
+       mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
        mlx5_irq_clear_affinity_hints(dev);
 
 err_affinity_hints:
@@ -1296,11 +1299,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        if (mlx5_device_registered(dev))
                mlx5_detach_device(dev);
 
-       mlx5_accel_ipsec_cleanup(dev);
-       mlx5_fpga_device_stop(dev);
-
        mlx5_sriov_detach(dev);
        mlx5_cleanup_fs(dev);
+       mlx5_accel_ipsec_cleanup(dev);
+       mlx5_fpga_device_stop(dev);
        mlx5_irq_clear_affinity_hints(dev);
        free_comp_eqs(dev);
        mlx5_stop_eqs(dev);
@@ -1657,6 +1659,7 @@ static int __init init(void)
        get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
 
        mlx5_core_verify_params();
+       mlx5_fpga_ipsec_build_fs_cmds();
        mlx5_register_debugfs();
 
        err = pci_register_driver(&mlx5_core_driver);
index 23e17ac0cba5e852b0cc79c737ab9d43990c8df8..4e25f2b2e0bc46b86a4715b055fe69633d1f92f9 100644 (file)
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "5.0-0"
 
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
-       (MLX5_CAP_GEN(mdev, vport_group_manager) && \
-       (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
-        mlx5_core_is_pf(mdev))
-
 extern uint mlx5_core_debug_mask;
 
 #define mlx5_core_dbg(__dev, format, ...)                              \
@@ -207,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
 int mlx5_lag_allow(struct mlx5_core_dev *dev);
 int mlx5_lag_forbid(struct mlx5_core_dev *dev);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
 #endif /* __MLX5_CORE_H__ */
index d56eea3105090051aad3138a837f7ef67c850b45..f4d9c9975ac3d857f50ef255756ea23a7a11fdb5 100644 (file)
@@ -76,6 +76,8 @@ config MLXSW_SPECTRUM
        depends on PSAMPLE || PSAMPLE=n
        depends on BRIDGE || BRIDGE=n
        depends on IPV6 || IPV6=n
+       depends on NET_IPGRE || NET_IPGRE=n
+       depends on IPV6_GRE || IPV6_GRE=n
        select PARMAN
        select MLXFW
        default m
index b698fb481b2ecb4d7f79da60775b063f7da58dfc..3c0d882ba18380ae6d2f63d77f52163a603f1e47 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
@@ -443,6 +443,17 @@ int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id)
 }
 EXPORT_SYMBOL(mlxsw_afa_block_jump);
 
+int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block)
+{
+       if (block->finished)
+               return -EINVAL;
+       mlxsw_afa_set_goto_set(block->cur_set,
+                              MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0);
+       block->finished = true;
+       return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_terminate);
+
 static struct mlxsw_afa_fwd_entry *
 mlxsw_afa_fwd_entry_create(struct mlxsw_afa *mlxsw_afa, u8 local_port)
 {
@@ -838,7 +849,6 @@ struct mlxsw_afa_mirror {
        struct mlxsw_afa_resource resource;
        int span_id;
        u8 local_in_port;
-       u8 local_out_port;
        bool ingress;
 };
 
@@ -848,7 +858,7 @@ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
 {
        block->afa->ops->mirror_del(block->afa->ops_priv,
                                    mirror->local_in_port,
-                                   mirror->local_out_port,
+                                   mirror->span_id,
                                    mirror->ingress);
        kfree(mirror);
 }
@@ -864,9 +874,8 @@ mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block,
 }
 
 static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
-                       u8 local_in_port, u8 local_out_port,
-                       bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+                       const struct net_device *out_dev, bool ingress)
 {
        struct mlxsw_afa_mirror *mirror;
        int err;
@@ -876,13 +885,12 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
                return ERR_PTR(-ENOMEM);
 
        err = block->afa->ops->mirror_add(block->afa->ops_priv,
-                                         local_in_port, local_out_port,
+                                         local_in_port, out_dev,
                                          ingress, &mirror->span_id);
        if (err)
                goto err_mirror_add;
 
        mirror->ingress = ingress;
-       mirror->local_out_port = local_out_port;
        mirror->local_in_port = local_in_port;
        mirror->resource.destructor = mlxsw_afa_mirror_destructor;
        mlxsw_afa_resource_add(block, &mirror->resource);
@@ -909,13 +917,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
 }
 
 int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-                             u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+                             const struct net_device *out_dev, bool ingress)
 {
        struct mlxsw_afa_mirror *mirror;
        int err;
 
-       mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+       mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
                                         ingress);
        if (IS_ERR(mirror))
                return PTR_ERR(mirror);
index 43132293475ce9ce916eac0b49b8a9c822ffb214..3a155d1043845208e0809f83cee4227fcfe964c1 100644 (file)
@@ -36,6 +36,7 @@
 #define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
 
 #include <linux/types.h>
+#include <linux/netdevice.h>
 
 struct mlxsw_afa;
 struct mlxsw_afa_block;
@@ -48,9 +49,10 @@ struct mlxsw_afa_ops {
        void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
        int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
        void (*counter_index_put)(void *priv, unsigned int counter_index);
-       int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+       int (*mirror_add)(void *priv, u8 local_in_port,
+                         const struct net_device *out_dev,
                          bool ingress, int *p_span_id);
-       void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+       void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
                           bool ingress);
 };
 
@@ -65,12 +67,14 @@ char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block);
 u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
+int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
 int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
                                            u16 trap_id);
 int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-                                 u8 local_in_port, u8 local_out_port,
+                                 u8 local_in_port,
+                                 const struct net_device *out_dev,
                                  bool ingress);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
                               u8 local_port, bool in_port);
index f6963b0b4a550cc2997183d980b8a4300f637baa..122506daa586070321c079d53c9d9ff6a9037c45 100644 (file)
@@ -107,20 +107,20 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
        MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12),
        MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3),
        MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9),
-       MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8),
-       MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2),
-       MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6),
-       MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32),
-       MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32),
-       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8),
-       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_LO, 0x20, 8),
-       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_HI, 0x28, 8),
-       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_LO, 0x30, 8),
        MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16),
        MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16),
+       MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
+       MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
+       MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
+       MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x20, 0, 32),
+       MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x24, 0, 32),
+       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x20, 8),
+       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_LO, 0x28, 8),
+       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_HI, 0x30, 8),
+       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_LO, 0x38, 8),
 };
 
-#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x38
+#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40
 
 struct mlxsw_afk_element_inst { /* element instance in actual block */
        const struct mlxsw_afk_element_info *info;
index 85faa87bf42d5a88a6b7ed173ee675e093e72436..e30c6ce3dcb422b32dce48421324c2e88d600c92 100644 (file)
@@ -1519,8 +1519,7 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
                              u8 *p_status)
 {
        struct mlxsw_pci *mlxsw_pci = bus_priv;
-       dma_addr_t in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
-       dma_addr_t out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
+       dma_addr_t in_mapaddr = 0, out_mapaddr = 0;
        bool evreq = mlxsw_pci->cmd.nopoll;
        unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS);
        bool *p_wait_done = &mlxsw_pci->cmd.wait_done;
@@ -1532,11 +1531,15 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
        if (err)
                return err;
 
-       if (in_mbox)
+       if (in_mbox) {
                memcpy(mlxsw_pci->cmd.in_mbox.buf, in_mbox, in_mbox_size);
+               in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
+       }
        mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, upper_32_bits(in_mapaddr));
        mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, lower_32_bits(in_mapaddr));
 
+       if (out_mbox)
+               out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
        mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, upper_32_bits(out_mapaddr));
        mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, lower_32_bits(out_mapaddr));
 
index 0e08be41c8e093fd4390b5c7bff6c177260c413a..e002398364c858430531dd3efb1f78c44216e23d 100644 (file)
@@ -1,11 +1,11 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/reg.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -2872,6 +2872,14 @@ static inline void mlxsw_reg_pmtu_pack(char *payload, u8 local_port,
 
 MLXSW_REG_DEFINE(ptys, MLXSW_REG_PTYS_ID, MLXSW_REG_PTYS_LEN);
 
+/* an_disable_admin
+ * Auto negotiation disable administrative configuration
+ * 0 - Device doesn't support AN disable.
+ * 1 - Device supports AN disable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, an_disable_admin, 0x00, 30, 1);
+
 /* reg_ptys_local_port
  * Local port number.
  * Access: Index
@@ -3000,12 +3008,13 @@ MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16);
 MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
 
 static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port,
-                                          u32 proto_admin)
+                                          u32 proto_admin, bool autoneg)
 {
        MLXSW_REG_ZERO(ptys, payload);
        mlxsw_reg_ptys_local_port_set(payload, local_port);
        mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH);
        mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin);
+       mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg);
 }
 
 static inline void mlxsw_reg_ptys_eth_unpack(char *payload,
@@ -6772,8 +6781,104 @@ MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
  */
 MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
 
+enum mlxsw_reg_mpat_span_type {
+       /* Local SPAN Ethernet.
+        * The original packet is not encapsulated.
+        */
+       MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+       /* Encapsulated Remote SPAN Ethernet L3 GRE.
+        * The packet is encapsulated with GRE header.
+        */
+       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+       MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+       MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+       MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
 static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
-                                      u16 system_port, bool e)
+                                      u16 system_port, bool e,
+                                      enum mlxsw_reg_mpat_span_type span_type)
 {
        MLXSW_REG_ZERO(mpat, payload);
        mlxsw_reg_mpat_pa_id_set(payload, pa_id);
@@ -6781,6 +6886,49 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
        mlxsw_reg_mpat_e_set(payload, e);
        mlxsw_reg_mpat_qos_set(payload, 1);
        mlxsw_reg_mpat_be_set(payload, 1);
+       mlxsw_reg_mpat_span_type_set(payload, span_type);
+}
+
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+       mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+                                enum mlxsw_reg_mpat_eth_rspan_version version,
+                                const char *mac,
+                                bool tp)
+{
+       mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+       mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+       mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+                                     const char *smac,
+                                     u32 sip, u32 dip)
+{
+       mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+       mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+       mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+       mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+       mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+                                     const char *smac,
+                                     struct in6_addr sip, struct in6_addr dip)
+{
+       mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+       mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+       mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+       mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+       mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
 }
 
 /* MPAR - Monitoring Port Analyzer Register
index bfde93910f8248149ccb80035d5b2dc0139d9b1b..7885fc475f7e85cce875833facb79a18ef4e475a 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum.c
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
@@ -75,8 +75,8 @@
 #include "../mlxfw/mlxfw.h"
 
 #define MLXSW_FWREV_MAJOR 13
-#define MLXSW_FWREV_MINOR 1530
-#define MLXSW_FWREV_SUBMINOR 152
+#define MLXSW_FWREV_MINOR 1620
+#define MLXSW_FWREV_SUBMINOR 192
 #define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
 
 #define MLXSW_SP_FW_FILENAME \
@@ -1040,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
                xstats->tail_drop[i] =
                        mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
        }
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+                                                 i, ppcnt_pl);
+               if (err)
+                       continue;
+
+               xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+               xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+       }
 }
 
 static void update_stats_cache(struct work_struct *work)
@@ -1139,6 +1149,7 @@ mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
        }
 
        mlxsw_sp_port_vlan->mlxsw_sp_port = mlxsw_sp_port;
+       mlxsw_sp_port_vlan->ref_count = 1;
        mlxsw_sp_port_vlan->vid = vid;
        list_add(&mlxsw_sp_port_vlan->list, &mlxsw_sp_port->vlans_list);
 
@@ -1166,8 +1177,10 @@ mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 
        mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
-       if (mlxsw_sp_port_vlan)
+       if (mlxsw_sp_port_vlan) {
+               mlxsw_sp_port_vlan->ref_count++;
                return mlxsw_sp_port_vlan;
+       }
 
        return mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid);
 }
@@ -1176,6 +1189,9 @@ void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
 {
        struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
 
+       if (--mlxsw_sp_port_vlan->ref_count != 0)
+               return;
+
        if (mlxsw_sp_port_vlan->bridge_port)
                mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
        else if (fid)
@@ -1258,7 +1274,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
                                      bool ingress)
 {
        enum mlxsw_sp_span_type span_type;
-       struct mlxsw_sp_port *to_port;
        struct net_device *to_dev;
 
        to_dev = tcf_mirred_dev(a);
@@ -1267,17 +1282,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
                return -EINVAL;
        }
 
-       if (!mlxsw_sp_port_dev_check(to_dev)) {
-               netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
-               return -EOPNOTSUPP;
-       }
-       to_port = netdev_priv(to_dev);
-
-       mirror->to_local_port = to_port->local_port;
        mirror->ingress = ingress;
        span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-       return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
-                                       true);
+       return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+                                       true, &mirror->span_id);
 }
 
 static void
@@ -1288,7 +1296,7 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 
        span_type = mirror->ingress ?
                        MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-       mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port,
+       mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
                                 span_type, true);
 }
 
@@ -2382,7 +2390,7 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
        int err;
 
        autoneg = mlxsw_sp_port->link.autoneg;
-       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
        if (err)
                return err;
@@ -2416,7 +2424,7 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
        bool autoneg;
        int err;
 
-       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
        if (err)
                return err;
@@ -2434,7 +2442,7 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
        }
 
        mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-                               eth_proto_new);
+                               eth_proto_new, autoneg);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
        if (err)
                return err;
@@ -2645,7 +2653,7 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
 
        eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed);
        mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-                               eth_proto_admin);
+                               eth_proto_admin, mlxsw_sp_port->link.autoneg);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 }
 
@@ -3675,14 +3683,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_afa_init;
        }
 
+       err = mlxsw_sp_span_init(mlxsw_sp);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+               goto err_span_init;
+       }
+
+       /* Initialize router after SPAN is initialized, so that the FIB and
+        * neighbor event handlers can issue SPAN respin.
+        */
        err = mlxsw_sp_router_init(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
                goto err_router_init;
        }
 
-       /* Initialize netdevice notifier after router is initialized, so that
-        * the event handler can use router structures.
+       /* Initialize netdevice notifier after router and SPAN is initialized,
+        * so that the event handler can use router structures and call SPAN
+        * respin.
         */
        mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
        err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
@@ -3691,12 +3709,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_netdev_notifier;
        }
 
-       err = mlxsw_sp_span_init(mlxsw_sp);
-       if (err) {
-               dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
-               goto err_span_init;
-       }
-
        err = mlxsw_sp_acl_init(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
@@ -3722,12 +3734,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 err_dpipe_init:
        mlxsw_sp_acl_fini(mlxsw_sp);
 err_acl_init:
-       mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
        unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
        mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+       mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
        mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3753,9 +3765,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        mlxsw_sp_ports_remove(mlxsw_sp);
        mlxsw_sp_dpipe_fini(mlxsw_sp);
        mlxsw_sp_acl_fini(mlxsw_sp);
-       mlxsw_sp_span_fini(mlxsw_sp);
        unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
        mlxsw_sp_router_fini(mlxsw_sp);
+       mlxsw_sp_span_fini(mlxsw_sp);
        mlxsw_sp_afa_fini(mlxsw_sp);
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
        mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -3767,12 +3779,8 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 }
 
 static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
-       .used_max_vepa_channels         = 1,
-       .max_vepa_channels              = 0,
        .used_max_mid                   = 1,
        .max_mid                        = MLXSW_SP_MID_MAX,
-       .used_max_pgt                   = 1,
-       .max_pgt                        = 0,
        .used_flood_tables              = 1,
        .used_flood_mode                = 1,
        .flood_mode                     = 3,
@@ -3810,13 +3818,12 @@ static struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
        .occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
 };
 
-static struct devlink_resource_size_params mlxsw_sp_kvd_size_params;
-static struct devlink_resource_size_params mlxsw_sp_linear_size_params;
-static struct devlink_resource_size_params mlxsw_sp_hash_single_size_params;
-static struct devlink_resource_size_params mlxsw_sp_hash_double_size_params;
-
 static void
-mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core)
+mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core,
+                                     struct devlink_resource_size_params *kvd_size_params,
+                                     struct devlink_resource_size_params *linear_size_params,
+                                     struct devlink_resource_size_params *hash_double_size_params,
+                                     struct devlink_resource_size_params *hash_single_size_params)
 {
        u32 single_size_min = MLXSW_CORE_RES_GET(mlxsw_core,
                                                 KVD_SINGLE_MIN_SIZE);
@@ -3825,37 +3832,35 @@ mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core)
        u32 kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE);
        u32 linear_size_min = 0;
 
-       /* KVD top resource */
-       mlxsw_sp_kvd_size_params.size_min = kvd_size;
-       mlxsw_sp_kvd_size_params.size_max = kvd_size;
-       mlxsw_sp_kvd_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY;
-       mlxsw_sp_kvd_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY;
-
-       /* Linear part init */
-       mlxsw_sp_linear_size_params.size_min = linear_size_min;
-       mlxsw_sp_linear_size_params.size_max = kvd_size - single_size_min -
-                                              double_size_min;
-       mlxsw_sp_linear_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY;
-       mlxsw_sp_linear_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY;
-
-       /* Hash double part init */
-       mlxsw_sp_hash_double_size_params.size_min = double_size_min;
-       mlxsw_sp_hash_double_size_params.size_max = kvd_size - single_size_min -
-                                                   linear_size_min;
-       mlxsw_sp_hash_double_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY;
-       mlxsw_sp_hash_double_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY;
-
-       /* Hash single part init */
-       mlxsw_sp_hash_single_size_params.size_min = single_size_min;
-       mlxsw_sp_hash_single_size_params.size_max = kvd_size - double_size_min -
-                                                   linear_size_min;
-       mlxsw_sp_hash_single_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY;
-       mlxsw_sp_hash_single_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY;
+       devlink_resource_size_params_init(kvd_size_params, kvd_size, kvd_size,
+                                         MLXSW_SP_KVD_GRANULARITY,
+                                         DEVLINK_RESOURCE_UNIT_ENTRY);
+       devlink_resource_size_params_init(linear_size_params, linear_size_min,
+                                         kvd_size - single_size_min -
+                                         double_size_min,
+                                         MLXSW_SP_KVD_GRANULARITY,
+                                         DEVLINK_RESOURCE_UNIT_ENTRY);
+       devlink_resource_size_params_init(hash_double_size_params,
+                                         double_size_min,
+                                         kvd_size - single_size_min -
+                                         linear_size_min,
+                                         MLXSW_SP_KVD_GRANULARITY,
+                                         DEVLINK_RESOURCE_UNIT_ENTRY);
+       devlink_resource_size_params_init(hash_single_size_params,
+                                         single_size_min,
+                                         kvd_size - double_size_min -
+                                         linear_size_min,
+                                         MLXSW_SP_KVD_GRANULARITY,
+                                         DEVLINK_RESOURCE_UNIT_ENTRY);
 }
 
 static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 {
        struct devlink *devlink = priv_to_devlink(mlxsw_core);
+       struct devlink_resource_size_params hash_single_size_params;
+       struct devlink_resource_size_params hash_double_size_params;
+       struct devlink_resource_size_params linear_size_params;
+       struct devlink_resource_size_params kvd_size_params;
        u32 kvd_size, single_size, double_size, linear_size;
        const struct mlxsw_config_profile *profile;
        int err;
@@ -3864,23 +3869,26 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
        if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE))
                return -EIO;
 
-       mlxsw_sp_resource_size_params_prepare(mlxsw_core);
+       mlxsw_sp_resource_size_params_prepare(mlxsw_core, &kvd_size_params,
+                                             &linear_size_params,
+                                             &hash_double_size_params,
+                                             &hash_single_size_params);
+
        kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE);
        err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD,
-                                       true, kvd_size,
-                                       MLXSW_SP_RESOURCE_KVD,
+                                       kvd_size, MLXSW_SP_RESOURCE_KVD,
                                        DEVLINK_RESOURCE_ID_PARENT_TOP,
-                                       &mlxsw_sp_kvd_size_params,
+                                       &kvd_size_params,
                                        NULL);
        if (err)
                return err;
 
        linear_size = profile->kvd_linear_size;
        err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR,
-                                       false, linear_size,
+                                       linear_size,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR,
                                        MLXSW_SP_RESOURCE_KVD,
-                                       &mlxsw_sp_linear_size_params,
+                                       &linear_size_params,
                                        &mlxsw_sp_resource_kvd_linear_ops);
        if (err)
                return err;
@@ -3895,20 +3903,20 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
                       profile->kvd_hash_single_parts;
        double_size = rounddown(double_size, profile->kvd_hash_granularity);
        err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE,
-                                       false, double_size,
+                                       double_size,
                                        MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
                                        MLXSW_SP_RESOURCE_KVD,
-                                       &mlxsw_sp_hash_double_size_params,
+                                       &hash_double_size_params,
                                        NULL);
        if (err)
                return err;
 
        single_size = kvd_size - double_size - linear_size;
        err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE,
-                                       false, single_size,
+                                       single_size,
                                        MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
                                        MLXSW_SP_RESOURCE_KVD,
-                                       &mlxsw_sp_hash_single_size_params,
+                                       &hash_single_size_params,
                                        NULL);
        if (err)
                return err;
@@ -4639,10 +4647,18 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
                                    unsigned long event, void *ptr)
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct mlxsw_sp_span_entry *span_entry;
        struct mlxsw_sp *mlxsw_sp;
        int err = 0;
 
        mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+       if (event == NETDEV_UNREGISTER) {
+               span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+               if (span_entry)
+                       mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+       }
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
                err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
                                                       event, ptr);
index 675e03a892ed390358eda831c1ba518b98b976be..21bee8f1989427f7caa2237a45cb7681961d9de8 100644 (file)
@@ -124,7 +124,7 @@ enum mlxsw_sp_port_mall_action_type {
 };
 
 struct mlxsw_sp_port_mall_mirror_tc_entry {
-       u8 to_local_port;
+       int span_id;
        bool ingress;
 };
 
@@ -199,6 +199,7 @@ struct mlxsw_sp_port_vlan {
        struct list_head list;
        struct mlxsw_sp_port *mlxsw_sp_port;
        struct mlxsw_sp_fid *fid;
+       unsigned int ref_count;
        u16 vid;
        struct mlxsw_sp_bridge_port *bridge_port;
        struct list_head bridge_vlan_node;
@@ -210,6 +211,8 @@ struct mlxsw_sp_port_xstats {
        u64 wred_drop[TC_MAX_QUEUE];
        u64 tail_drop[TC_MAX_QUEUE];
        u64 backlog[TC_MAX_QUEUE];
+       u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+       u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
 };
 
 struct mlxsw_sp_port {
@@ -247,6 +250,7 @@ struct mlxsw_sp_port {
        struct mlxsw_sp_port_sample *sample;
        struct list_head vlans_list;
        struct mlxsw_sp_qdisc *root_qdisc;
+       struct mlxsw_sp_qdisc *tclass_qdiscs;
        unsigned acl_rule_count;
        struct mlxsw_sp_acl_block *ing_acl_block;
        struct mlxsw_sp_acl_block *eg_acl_block;
@@ -531,6 +535,7 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
 int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
                                u16 group_id);
+int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
index 0897a5435cc2e205957d2158d184b3bc465a71ac..79b1fa27a9a439301a544f621367f8b925cdf52d 100644 (file)
@@ -160,6 +160,13 @@ bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block)
        return block->disable_count;
 }
 
+static bool
+mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
+{
+       /* We hold a reference on ruleset ourselves */
+       return ruleset->ref_count == 2;
+}
+
 static int
 mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
                          struct mlxsw_sp_acl_block *block,
@@ -341,21 +348,8 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_ht_insert;
 
-       if (!chain_index) {
-               /* We only need ruleset with chain index 0, the implicit one,
-                * to be directly bound to device. The rest of the rulesets
-                * are bound by "Goto action set".
-                */
-               err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
-               if (err)
-                       goto err_ruleset_bind;
-       }
-
        return ruleset;
 
-err_ruleset_bind:
-       rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
-                              mlxsw_sp_acl_ruleset_ht_params);
 err_ht_insert:
        ops->ruleset_del(mlxsw_sp, ruleset->priv);
 err_ops_ruleset_add:
@@ -369,12 +363,8 @@ static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp,
                                         struct mlxsw_sp_acl_ruleset *ruleset)
 {
        const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
-       struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
-       u32 chain_index = ruleset->ht_key.chain_index;
        struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
 
-       if (!chain_index)
-               mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, block);
        rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
                               mlxsw_sp_acl_ruleset_ht_params);
        ops->ruleset_del(mlxsw_sp, ruleset->priv);
@@ -528,6 +518,11 @@ int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
        return mlxsw_afa_block_jump(rulei->act_block, group_id);
 }
 
+int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei)
+{
+       return mlxsw_afa_block_terminate(rulei->act_block);
+}
+
 int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei)
 {
        return mlxsw_afa_block_append_drop(rulei->act_block);
@@ -572,7 +567,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
                                  struct net_device *out_dev)
 {
        struct mlxsw_sp_acl_block_binding *binding;
-       struct mlxsw_sp_port *out_port;
        struct mlxsw_sp_port *in_port;
 
        if (!list_is_singular(&block->binding_list))
@@ -581,16 +575,10 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
        binding = list_first_entry(&block->binding_list,
                                   struct mlxsw_sp_acl_block_binding, list);
        in_port = binding->mlxsw_sp_port;
-       if (!mlxsw_sp_port_dev_check(out_dev))
-               return -EINVAL;
-
-       out_port = netdev_priv(out_dev);
-       if (out_port->mlxsw_sp != mlxsw_sp)
-               return -EINVAL;
 
        return mlxsw_afa_block_append_mirror(rulei->act_block,
                                             in_port->local_port,
-                                            out_port->local_port,
+                                            out_dev,
                                             binding->ingress);
 }
 
@@ -695,10 +683,25 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_rhashtable_insert;
 
+       if (!ruleset->ht_key.chain_index &&
+           mlxsw_sp_acl_ruleset_is_singular(ruleset)) {
+               /* We only need ruleset with chain index 0, the implicit
+                * one, to be directly bound to device. The rest of the
+                * rulesets are bound by "Goto action set".
+                */
+               err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
+                                                     ruleset->ht_key.block);
+               if (err)
+                       goto err_ruleset_block_bind;
+       }
+
        list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
        ruleset->ht_key.block->rule_count++;
        return 0;
 
+err_ruleset_block_bind:
+       rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
+                              mlxsw_sp_acl_rule_ht_params);
 err_rhashtable_insert:
        ops->rule_del(mlxsw_sp, rule->priv);
        return err;
@@ -712,6 +715,10 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
 
        ruleset->ht_key.block->rule_count--;
        list_del(&rule->list);
+       if (!ruleset->ht_key.chain_index &&
+           mlxsw_sp_acl_ruleset_is_singular(ruleset))
+               mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset,
+                                                 ruleset->ht_key.block);
        rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
                               mlxsw_sp_acl_rule_ht_params);
        ops->rule_del(mlxsw_sp, rule->priv);
index f7e61cecc42b103804391370e7fadef7dd980933..510ce48d87f7470fff02d524fe13416af2ae3c5f 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
  *
@@ -126,40 +126,23 @@ mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index)
 }
 
 static int
-mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+                       const struct net_device *out_dev,
                        bool ingress, int *p_span_id)
 {
-       struct mlxsw_sp_port *in_port, *out_port;
-       struct mlxsw_sp_span_entry *span_entry;
+       struct mlxsw_sp_port *in_port;
        struct mlxsw_sp *mlxsw_sp = priv;
        enum mlxsw_sp_span_type type;
-       int err;
 
        type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-       out_port = mlxsw_sp->ports[local_out_port];
        in_port = mlxsw_sp->ports[local_in_port];
 
-       err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false);
-       if (err)
-               return err;
-
-       span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port);
-       if (!span_entry) {
-               err = -ENOENT;
-               goto err_span_entry_find;
-       }
-
-       *p_span_id = span_entry->id;
-       return 0;
-
-err_span_entry_find:
-       mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
-       return err;
+       return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
+                                       false, p_span_id);
 }
 
 static void
-mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
-                       bool ingress)
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
 {
        struct mlxsw_sp *mlxsw_sp = priv;
        struct mlxsw_sp_port *in_port;
@@ -168,7 +151,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
        type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
        in_port = mlxsw_sp->ports[local_in_port];
 
-       mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
+       mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
 }
 
 static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
index c6e180c2be1e825e394662b38b8374c7c4c6e252..ad1b548e3cace26f149ec9b9ac7eea1e493f8b6e 100644 (file)
@@ -228,10 +228,6 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
        if (err)
                return err;
 
-       err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
-       if (err)
-               goto err_group_update;
-
        err = rhashtable_init(&group->chunk_ht,
                              &mlxsw_sp_acl_tcam_chunk_ht_params);
        if (err)
@@ -240,7 +236,6 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
        return 0;
 
 err_rhashtable_init:
-err_group_update:
        mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
        return err;
 }
index 93728c694e6df9985cadfccf72ab6f3a9f52f8da..0a9adc5962fb72b8dbeb3b61b3e0a28093982197 100644 (file)
@@ -385,13 +385,13 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = {
 
 static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
        MLXSW_SP_CPU_PORT_SB_CM,
+       MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+       MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+       MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+       MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+       MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
        MLXSW_SP_CPU_PORT_SB_CM,
-       MLXSW_SP_CPU_PORT_SB_CM,
-       MLXSW_SP_CPU_PORT_SB_CM,
-       MLXSW_SP_CPU_PORT_SB_CM,
-       MLXSW_SP_CPU_PORT_SB_CM,
-       MLXSW_SP_CPU_PORT_SB_CM,
-       MLXSW_SP_SB_CM(10000, 0, 0),
+       MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
        MLXSW_SP_CPU_PORT_SB_CM,
index bbd238e50f05488b3bcdfc133cb08d69bcec7c3b..54262af4e98f713b7533cb758c2dd6eda181eabc 100644 (file)
@@ -112,11 +112,11 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
        [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP]     = 1,
        [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL]                   = 1,
        [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST]                     = 1,
+       [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]       = 1,
 };
 
 static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
        [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4]       = 1,
-       [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]       = 1,
 };
 
 static const int *mlxsw_sp_packet_type_sfgc_types[] = {
index 6ce00e28d4eac8043abb8a77c86a0aabe30531b3..89dbf569dff50c0db7d97d3b4e80e8bd7cf494d6 100644 (file)
@@ -65,7 +65,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
        tcf_exts_to_list(exts, &actions);
        list_for_each_entry(a, &actions, list) {
                if (is_tcf_gact_ok(a)) {
-                       err = mlxsw_sp_acl_rulei_act_continue(rulei);
+                       err = mlxsw_sp_acl_rulei_act_terminate(rulei);
                        if (err)
                                return err;
                } else if (is_tcf_gact_shot(a)) {
index a1c4b1e63f8ddd58808021e7e28cc373402bda8e..98d896c14b87fdab950d3882443367fd06c80c1d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,6 +33,7 @@
  */
 
 #include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
 
 #include "spectrum_ipip.h"
 
@@ -44,6 +45,14 @@ mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
        return tun->parms;
 }
 
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+       struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+       return tun->parms;
+}
+
 static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
 {
        return !!(parms.i_flags & TUNNEL_KEY);
@@ -72,24 +81,38 @@ mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
        return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
 }
 
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
+{
+       return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
+}
+
 static union mlxsw_sp_l3addr
 mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
 {
        return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
 }
 
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+       return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
                           const struct net_device *ol_dev)
 {
        struct ip_tunnel_parm parms4;
+       struct __ip6_tnl_parm parms6;
 
        switch (proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
                return mlxsw_sp_ipip_parms4_saddr(parms4);
        case MLXSW_SP_L3_PROTO_IPV6:
-               break;
+               parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+               return mlxsw_sp_ipip_parms6_saddr(parms6);
        }
 
        WARN_ON(1);
@@ -109,19 +132,28 @@ mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
                           const struct net_device *ol_dev)
 {
        struct ip_tunnel_parm parms4;
+       struct __ip6_tnl_parm parms6;
 
        switch (proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
                return mlxsw_sp_ipip_parms4_daddr(parms4);
        case MLXSW_SP_L3_PROTO_IPV6:
-               break;
+               parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+               return mlxsw_sp_ipip_parms6_daddr(parms6);
        }
 
        WARN_ON(1);
        return (union mlxsw_sp_l3addr) {0};
 }
 
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
+{
+       union mlxsw_sp_l3addr naddr = {0};
+
+       return !memcmp(&addr, &naddr, sizeof(naddr));
+}
+
 static int
 mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
                                  struct mlxsw_sp_ipip_entry *ipip_entry)
@@ -215,15 +247,14 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
 {
        union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
        union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
-       union mlxsw_sp_l3addr naddr = {0};
 
        /* Tunnels with unset local or remote address are valid in Linux and
         * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
         * (NBMA) tunnels. In principle these can be offloaded, but the driver
         * currently doesn't support this. So punt.
         */
-       return memcmp(&saddr, &naddr, sizeof(naddr)) &&
-              memcmp(&daddr, &naddr, sizeof(naddr));
+       return !mlxsw_sp_l3addr_is_zero(saddr) &&
+              !mlxsw_sp_l3addr_is_zero(daddr);
 }
 
 static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
index a4ff5737ecccfd3ade11a76dfec9fbffc9a5e6fa..6909d867bb592cf9638efcca6b60adc6018cf9f5 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
 
 struct ip_tunnel_parm
 mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
 
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
                           const struct net_device *ol_dev);
 
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
 enum mlxsw_sp_ipip_type {
        MLXSW_SP_IPIP_TYPE_GRE4,
        MLXSW_SP_IPIP_TYPE_MAX,
index d27fa57ad3c36f83f7b641e0a958ffefab059f82..85503e93b93fb68c16b55616fc2ce78a47300099 100644 (file)
@@ -270,6 +270,8 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
        case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
                resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
                break;
+       default:
+               return -EINVAL;
        }
 
        err = devlink_resource_size_get(devlink, resource_id, &resource_size);
@@ -278,7 +280,7 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
                resource_size = info->end_index - info->start_index + 1;
        }
 
-       nr_entries = resource_size / info->alloc_size;
+       nr_entries = div_u64(resource_size, info->alloc_size);
        usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
        part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
        if (!part)
@@ -366,7 +368,7 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
        return occ;
 }
 
-u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
 {
        struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
@@ -379,7 +381,7 @@ u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
        return mlxsw_sp_kvdl_part_occ(part);
 }
 
-u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
 {
        struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
@@ -392,7 +394,7 @@ u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
        return mlxsw_sp_kvdl_part_occ(part);
 }
 
-u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
 {
        struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
@@ -457,7 +459,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 
        mlxsw_sp_kvdl_resource_size_params_prepare(devlink);
        err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
-                                       false, MLXSW_SP_KVDL_SINGLE_SIZE,
+                                       MLXSW_SP_KVDL_SINGLE_SIZE,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR,
                                        &mlxsw_sp_kvdl_single_size_params,
@@ -466,7 +468,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
                return err;
 
        err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
-                                       false, MLXSW_SP_KVDL_CHUNKS_SIZE,
+                                       MLXSW_SP_KVDL_CHUNKS_SIZE,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR,
                                        &mlxsw_sp_kvdl_chunks_size_params,
@@ -475,7 +477,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
                return err;
 
        err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
-                                       false, MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
+                                       MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR,
                                        &mlxsw_sp_kvdl_large_chunks_size_params,
index d20b143de3b479166f7da662d7ebf4e754b8f9aa..978a3c70653ad60f7b1371a49d0866f5dfa7c0b9 100644 (file)
@@ -126,8 +126,8 @@ mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
 
        switch (mr_route->mr_table->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
-               ivif = mr_route->mfc4->mfc_parent;
-               return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+               ivif = mr_route->mfc4->_c.mfc_parent;
+               return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255;
        case MLXSW_SP_L3_PROTO_IPV6:
                /* fall through */
        default:
@@ -364,7 +364,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
        mr_route->mfc4 = mfc;
        mr_route->mr_table = mr_table;
        for (i = 0; i < MAXVIFS; i++) {
-               if (mfc->mfc_un.res.ttls[i] != 255) {
+               if (mfc->_c.mfc_un.res.ttls[i] != 255) {
                        err = mlxsw_sp_mr_route_evif_link(mr_route,
                                                          &mr_table->vifs[i]);
                        if (err)
@@ -374,7 +374,8 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
                                mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
                }
        }
-       mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+       mlxsw_sp_mr_route_ivif_link(mr_route,
+                                   &mr_table->vifs[mfc->_c.mfc_parent]);
 
        mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
        return mr_route;
@@ -418,9 +419,9 @@ static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
        switch (mr_route->mr_table->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                if (offload)
-                       mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+                       mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD;
                else
-                       mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+                       mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD;
                break;
        case MLXSW_SP_L3_PROTO_IPV6:
                /* fall through */
@@ -943,10 +944,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
 
        switch (mr_route->mr_table->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
-               if (mr_route->mfc4->mfc_un.res.pkt != packets)
-                       mr_route->mfc4->mfc_un.res.lastuse = jiffies;
-               mr_route->mfc4->mfc_un.res.pkt = packets;
-               mr_route->mfc4->mfc_un.res.bytes = bytes;
+               if (mr_route->mfc4->_c.mfc_un.res.pkt != packets)
+                       mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies;
+               mr_route->mfc4->_c.mfc_un.res.pkt = packets;
+               mr_route->mfc4->_c.mfc_un.res.bytes = bytes;
                break;
        case MLXSW_SP_L3_PROTO_IPV6:
                /* fall through */
index 0b76704590512a75c20152f2a5ee8e62470d4e49..91262b0573e395c982bc97186f9b699a2cf0bbd0 100644 (file)
@@ -42,6 +42,8 @@
 #include "reg.h"
 
 #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+       MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
 
 enum mlxsw_sp_qdisc_type {
        MLXSW_SP_QDISC_NO_QDISC,
@@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops {
 struct mlxsw_sp_qdisc {
        u32 handle;
        u8 tclass_num;
+       u8 prio_bitmap;
        union {
                struct red_stats red;
        } xstats_base;
@@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
               mlxsw_sp_qdisc->handle == handle;
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+                   bool root_only)
+{
+       int tclass, child_index;
+
+       if (parent == TC_H_ROOT)
+               return mlxsw_sp_port->root_qdisc;
+
+       if (root_only || !mlxsw_sp_port->root_qdisc ||
+           !mlxsw_sp_port->root_qdisc->ops ||
+           TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+           TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+               return NULL;
+
+       child_index = TC_H_MIN(parent);
+       tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+       return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+       int i;
+
+       if (mlxsw_sp_port->root_qdisc->handle == handle)
+               return mlxsw_sp_port->root_qdisc;
+
+       if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+               return NULL;
+
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+               if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+                       return &mlxsw_sp_port->tclass_qdiscs[i];
+
+       return NULL;
+}
+
 static int
 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
                       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
        return -EOPNOTSUPP;
 }
 
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+                                      u8 prio_bitmap, u64 *tx_packets,
+                                      u64 *tx_bytes)
+{
+       int i;
+
+       *tx_packets = 0;
+       *tx_bytes = 0;
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               if (prio_bitmap & BIT(i)) {
+                       *tx_packets += xstats->tx_packets[i];
+                       *tx_bytes += xstats->tx_bytes[i];
+               }
+       }
+}
+
 static int
 mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
                                  int tclass_num, u32 min, u32 max,
@@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
        struct mlxsw_sp_qdisc_stats *stats_base;
        struct mlxsw_sp_port_xstats *xstats;
-       struct rtnl_link_stats64 *stats;
        struct red_stats *red_base;
 
        xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-       stats = &mlxsw_sp_port->periodic_hw_stats.stats;
        stats_base = &mlxsw_sp_qdisc->stats_base;
        red_base = &mlxsw_sp_qdisc->xstats_base.red;
 
-       stats_base->tx_packets = stats->tx_packets;
-       stats_base->tx_bytes = stats->tx_bytes;
-
+       mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+                                              mlxsw_sp_qdisc->prio_bitmap,
+                                              &stats_base->tx_packets,
+                                              &stats_base->tx_bytes);
        red_base->prob_mark = xstats->ecn;
        red_base->prob_drop = xstats->wred_drop[tclass_num];
        red_base->pdrop = xstats->tail_drop[tclass_num];
@@ -255,6 +312,12 @@ static int
 mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
                           struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
+       struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+       if (root_qdisc != mlxsw_sp_qdisc)
+               root_qdisc->stats_base.backlog -=
+                                       mlxsw_sp_qdisc->stats_base.backlog;
+
        return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
                                                  mlxsw_sp_qdisc->tclass_num);
 }
@@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
        backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
                                       mlxsw_sp_qdisc->stats_base.backlog);
        p->qstats->backlog -= backlog;
+       mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
 
 static int
@@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
        struct mlxsw_sp_qdisc_stats *stats_base;
        struct mlxsw_sp_port_xstats *xstats;
-       struct rtnl_link_stats64 *stats;
 
        xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-       stats = &mlxsw_sp_port->periodic_hw_stats.stats;
        stats_base = &mlxsw_sp_qdisc->stats_base;
 
-       tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
-       tx_packets = stats->tx_packets - stats_base->tx_packets;
+       mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+                                              mlxsw_sp_qdisc->prio_bitmap,
+                                              &tx_packets, &tx_bytes);
+       tx_bytes = tx_bytes - stats_base->tx_bytes;
+       tx_packets = tx_packets - stats_base->tx_packets;
+
        overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
                     stats_base->overlimits;
        drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
@@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 {
        struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-       if (p->parent != TC_H_ROOT)
+       mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+       if (!mlxsw_sp_qdisc)
                return -EOPNOTSUPP;
 
-       mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
        if (p->command == TC_RED_REPLACE)
                return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
                                              mlxsw_sp_qdisc,
@@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 {
        int i;
 
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
                                          MLXSW_SP_PORT_DEFAULT_TCLASS);
+               mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+                                      &mlxsw_sp_port->tclass_qdiscs[i]);
+               mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+       }
 
        return 0;
 }
@@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
                            void *params)
 {
        struct tc_prio_qopt_offload_params *p = params;
-       int tclass, i;
+       struct mlxsw_sp_qdisc *child_qdisc;
+       int tclass, i, band, backlog;
+       u8 old_priomap;
        int err;
 
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-               tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
-               err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
-               if (err)
-                       return err;
+       for (band = 0; band < p->bands; band++) {
+               tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+               child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+               old_priomap = child_qdisc->prio_bitmap;
+               child_qdisc->prio_bitmap = 0;
+               for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+                       if (p->priomap[i] == band) {
+                               child_qdisc->prio_bitmap |= BIT(i);
+                               if (BIT(i) & old_priomap)
+                                       continue;
+                               err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+                                                               i, tclass);
+                               if (err)
+                                       return err;
+                       }
+               }
+               if (old_priomap != child_qdisc->prio_bitmap &&
+                   child_qdisc->ops && child_qdisc->ops->clean_stats) {
+                       backlog = child_qdisc->stats_base.backlog;
+                       child_qdisc->ops->clean_stats(mlxsw_sp_port,
+                                                     child_qdisc);
+                       child_qdisc->stats_base.backlog = backlog;
+               }
+       }
+       for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+               tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+               child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+               child_qdisc->prio_bitmap = 0;
+               mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
        }
-
        return 0;
 }
 
@@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                drops += xstats->tail_drop[i];
+               drops += xstats->wred_drop[i];
                backlog += xstats->backlog[i];
        }
        drops = drops - stats_base->drops;
@@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
        stats_base->tx_bytes = stats->tx_bytes;
 
        stats_base->drops = 0;
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                stats_base->drops += xstats->tail_drop[i];
+               stats_base->drops += xstats->wred_drop[i];
+       }
 
        mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
@@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
        .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
 };
 
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+                         struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+                         struct tc_prio_qopt_offload_graft_params *p)
+{
+       int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+       struct mlxsw_sp_qdisc *old_qdisc;
+
+       /* Check if the grafted qdisc is already in its "new" location. If so -
+        * nothing needs to be done.
+        */
+       if (p->band < IEEE_8021QAZ_MAX_TCS &&
+           mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+               return 0;
+
+       /* See if the grafted qdisc is already offloaded on any tclass. If so,
+        * unoffload it.
+        */
+       old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+                                                 p->child_handle);
+       if (old_qdisc)
+               mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+       mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+                              &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+       return -EOPNOTSUPP;
+}
+
 int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
                           struct tc_prio_qopt_offload *p)
 {
        struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-       if (p->parent != TC_H_ROOT)
+       mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+       if (!mlxsw_sp_qdisc)
                return -EOPNOTSUPP;
 
-       mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
        if (p->command == TC_PRIO_REPLACE)
                return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
                                              mlxsw_sp_qdisc,
@@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
        case TC_PRIO_STATS:
                return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
                                                &p->stats);
+       case TC_PRIO_GRAFT:
+               return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+                                                &p->graft_params);
        default:
                return -EOPNOTSUPP;
        }
@@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-       mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
-                                           GFP_KERNEL);
-       if (!mlxsw_sp_port->root_qdisc)
-               return -ENOMEM;
+       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+       int i;
 
+       mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+       if (!mlxsw_sp_qdisc)
+               goto err_root_qdisc_init;
+
+       mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+       mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
        mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
 
+       mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+                                GFP_KERNEL);
+       if (!mlxsw_sp_qdisc)
+               goto err_tclass_qdiscs_init;
+
+       mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+               mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
        return 0;
+
+err_tclass_qdiscs_init:
+       kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+       return -ENOMEM;
 }
 
 void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+       kfree(mlxsw_sp_port->tclass_qdiscs);
        kfree(mlxsw_sp_port->root_qdisc);
 }
index 05146970c19cf829ad40267ad1e6ee622dff17ac..921bd1075edfff74241785779b39e3e8ada7e9ae 100644 (file)
@@ -70,6 +70,7 @@
 #include "spectrum_mr.h"
 #include "spectrum_mr_tcam.h"
 #include "spectrum_router.h"
+#include "spectrum_span.h"
 
 struct mlxsw_sp_fib;
 struct mlxsw_sp_vr;
@@ -2330,6 +2331,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
        read_unlock_bh(&n->lock);
 
        rtnl_lock();
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        entry_connected = nud_state & NUD_VALID && !dead;
        neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
        if (!entry_connected && !neigh_entry)
@@ -2427,7 +2430,8 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
                mlxsw_core_schedule_work(&net_work->work);
                mlxsw_sp_port_dev_put(mlxsw_sp_port);
                break;
-       case NETEVENT_MULTIPATH_HASH_UPDATE:
+       case NETEVENT_IPV4_MPATH_HASH_UPDATE:
+       case NETEVENT_IPV6_MPATH_HASH_UPDATE:
                net = ptr;
 
                if (!net_eq(net, &init_net))
@@ -5589,6 +5593,8 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 
        /* Protect internal structures from changes */
        rtnl_lock();
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        switch (fib_work->event) {
        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
        case FIB_EVENT_ENTRY_APPEND: /* fall through */
@@ -5631,6 +5637,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
        int err;
 
        rtnl_lock();
+       mlxsw_sp_span_respin(mlxsw_sp);
+
        switch (fib_work->event) {
        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
        case FIB_EVENT_ENTRY_ADD:
@@ -7023,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
 
 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
 {
+       bool only_l3 = !ip6_multipath_hash_policy(&init_net);
+
        mlxsw_sp_mp_hash_header_set(recr2_pl,
                                    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
        mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
        mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
        mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
-       mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
        mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+       if (only_l3) {
+               mlxsw_sp_mp_hash_field_set(recr2_pl,
+                                          MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+       } else {
+               mlxsw_sp_mp_hash_header_set(recr2_pl,
+                                           MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
+               mlxsw_sp_mp_hash_field_set(recr2_pl,
+                                          MLXSW_REG_RECR2_TCP_UDP_SPORT);
+               mlxsw_sp_mp_hash_field_set(recr2_pl,
+                                          MLXSW_REG_RECR2_TCP_UDP_DPORT);
+       }
 }
 
 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
index c3bec37d71ed6dee0c23c274103f8fda602f77c6..65a77708ff617b8f4b0714cfffd9ff8fa87c4999 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  */
 
 #include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
 
 #include "spectrum.h"
 #include "spectrum_span.h"
+#include "spectrum_ipip.h"
 
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 {
@@ -51,8 +57,12 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
        if (!mlxsw_sp->span.entries)
                return -ENOMEM;
 
-       for (i = 0; i < mlxsw_sp->span.entries_count; i++)
-               INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+               INIT_LIST_HEAD(&curr->bound_ports_list);
+               curr->id = i;
+       }
 
        return 0;
 }
@@ -69,80 +79,468 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
        kfree(mlxsw_sp->span.entries);
 }
 
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
+static int
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+                              struct mlxsw_sp_span_parms *sparmsp)
 {
-       struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-       struct mlxsw_sp_span_entry *span_entry;
+       sparmsp->dest_port = netdev_priv(to_dev);
+       return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+                                  struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       /* Create a new port analayzer entry for local_port. */
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+                                      enum mlxsw_reg_mpat_span_type span_type)
+{
+       struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                           MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+       .can_handle = mlxsw_sp_port_dev_check,
+       .parms = mlxsw_sp_span_entry_phys_parms,
+       .configure = mlxsw_sp_span_entry_phys_configure,
+       .deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+                             const void *pkey,
+                             struct net_device *l3edev,
+                             unsigned char dmac[ETH_ALEN])
+{
+       struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+       int err = 0;
+
+       if (!neigh) {
+               neigh = neigh_create(tbl, pkey, l3edev);
+               if (IS_ERR(neigh))
+                       return PTR_ERR(neigh);
+       }
+
+       neigh_event_send(neigh, NULL);
+
+       read_lock_bh(&neigh->lock);
+       if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+               memcpy(dmac, neigh->ha, ETH_ALEN);
+       else
+               err = -ENOENT;
+       read_unlock_bh(&neigh->lock);
+
+       neigh_release(neigh);
+       return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+       sparmsp->dest_port = NULL;
+       return 0;
+}
+
+static __maybe_unused int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+                                       union mlxsw_sp_l3addr saddr,
+                                       union mlxsw_sp_l3addr daddr,
+                                       union mlxsw_sp_l3addr gw,
+                                       __u8 ttl,
+                                       struct neigh_table *tbl,
+                                       struct mlxsw_sp_span_parms *sparmsp)
+{
+       unsigned char dmac[ETH_ALEN];
+
+       if (mlxsw_sp_l3addr_is_zero(gw))
+               gw = daddr;
+
+       if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
+           mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       sparmsp->dest_port = netdev_priv(l3edev);
+       sparmsp->ttl = ttl;
+       memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+       memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+       sparmsp->saddr = saddr;
+       sparmsp->daddr = daddr;
+       return 0;
+}
+
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+                           __be32 *saddrp, __be32 *daddrp)
+{
+       struct ip_tunnel *tun = netdev_priv(to_dev);
+       struct net_device *dev = NULL;
+       struct ip_tunnel_parm parms;
+       struct rtable *rt = NULL;
+       struct flowi4 fl4;
+
+       /* We assume "dev" stays valid after rt is put. */
+       ASSERT_RTNL();
+
+       parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+       ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+                           0, 0, parms.link, tun->fwmark);
+
+       rt = ip_route_output_key(tun->net, &fl4);
+       if (IS_ERR(rt))
+               return NULL;
+
+       if (rt->rt_type != RTN_UNICAST)
+               goto out;
+
+       dev = rt->dst.dev;
+       *saddrp = fl4.saddr;
+       *daddrp = rt->rt_gateway;
+
+out:
+       ip_rt_put(rt);
+       return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+                                 struct mlxsw_sp_span_parms *sparmsp)
+{
+       struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+       union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+       union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+       bool inherit_tos = tparm.iph.tos & 0x1;
+       bool inherit_ttl = !tparm.iph.ttl;
+       union mlxsw_sp_l3addr gw = daddr;
+       struct net_device *l3edev;
+
+       if (!(to_dev->flags & IFF_UP) ||
+           /* Reject tunnels with GRE keys, checksums, etc. */
+           tparm.i_flags || tparm.o_flags ||
+           /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+           inherit_ttl || !inherit_tos ||
+           /* A destination address may not be "any". */
+           mlxsw_sp_l3addr_is_zero(daddr))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+       return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+                                                      tparm.iph.ttl,
+                                                      &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+                                     struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
+       char mpat_pl[MLXSW_REG_MPAT_LEN];
+       int pa_id = span_entry->id;
+
+       /* Create a new port analayzer entry for local_port. */
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+       mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+                                   sparms.dmac, false);
+       mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+                                             sparms.ttl, sparms.smac,
+                                             be32_to_cpu(sparms.saddr.addr4),
+                                             be32_to_cpu(sparms.daddr.addr4));
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+       .can_handle = is_gretap_dev,
+       .parms = mlxsw_sp_span_entry_gretap4_parms,
+       .configure = mlxsw_sp_span_entry_gretap4_configure,
+       .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6_GRE)
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+                           struct in6_addr *saddrp,
+                           struct in6_addr *daddrp)
+{
+       struct ip6_tnl *t = netdev_priv(to_dev);
+       struct flowi6 fl6 = t->fl.u.ip6;
+       struct net_device *dev = NULL;
+       struct dst_entry *dst;
+       struct rt6_info *rt6;
+
+       /* We assume "dev" stays valid after dst is released. */
+       ASSERT_RTNL();
+
+       fl6.flowi6_mark = t->parms.fwmark;
+       if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+               return NULL;
+
+       dst = ip6_route_output(t->net, NULL, &fl6);
+       if (!dst || dst->error)
+               goto out;
+
+       rt6 = container_of(dst, struct rt6_info, dst);
+
+       dev = dst->dev;
+       *saddrp = fl6.saddr;
+       *daddrp = rt6->rt6i_gateway;
+
+out:
+       dst_release(dst);
+       return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+                                 struct mlxsw_sp_span_parms *sparmsp)
+{
+       struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+       bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+       union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+       union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+       bool inherit_ttl = !tparm.hop_limit;
+       union mlxsw_sp_l3addr gw = daddr;
+       struct net_device *l3edev;
+
+       if (!(to_dev->flags & IFF_UP) ||
+           /* Reject tunnels with GRE keys, checksums, etc. */
+           tparm.i_flags || tparm.o_flags ||
+           /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+           inherit_ttl || !inherit_tos ||
+           /* A destination address may not be "any". */
+           mlxsw_sp_l3addr_is_zero(daddr))
+               return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+       l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+       return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+                                                      tparm.hop_limit,
+                                                      &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+                                     struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_port *dest_port = sparms.dest_port;
+       struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+       u8 local_port = dest_port->local_port;
        char mpat_pl[MLXSW_REG_MPAT_LEN];
-       u8 local_port = port->local_port;
-       int index;
+       int pa_id = span_entry->id;
+
+       /* Create a new port analayzer entry for local_port. */
+       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+                           MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+       mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+                                   MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+                                   sparms.dmac, false);
+       mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+                                             sparms.saddr.addr6,
+                                             sparms.daddr.addr6);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure_common(span_entry,
+                                       MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+       .can_handle = is_ip6gretap_dev,
+       .parms = mlxsw_sp_span_entry_gretap6_parms,
+       .configure = mlxsw_sp_span_entry_gretap6_configure,
+       .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+#endif
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+       &mlxsw_sp_span_entry_ops_phys,
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+       &mlxsw_sp_span_entry_ops_gretap4,
+#endif
+#if IS_ENABLED(CONFIG_IPV6_GRE)
+       &mlxsw_sp_span_entry_ops_gretap6,
+#endif
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+                             struct mlxsw_sp_span_parms *sparmsp)
+{
+       return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+                                 struct mlxsw_sp_span_parms sparms)
+{
+       return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+       .parms = mlxsw_sp_span_entry_nop_parms,
+       .configure = mlxsw_sp_span_entry_nop_configure,
+       .deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_span_entry *span_entry,
+                             struct mlxsw_sp_span_parms sparms)
+{
+       if (sparms.dest_port) {
+               if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+                       netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+                                  sparms.dest_port->dev->name);
+                       sparms.dest_port = NULL;
+               } else if (span_entry->ops->configure(span_entry, sparms)) {
+                       netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+                                  sparms.dest_port->dev->name);
+                       sparms.dest_port = NULL;
+               }
+       }
+
+       span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+       if (span_entry->parms.dest_port)
+               span_entry->ops->deconfigure(span_entry);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+                          const struct net_device *to_dev,
+                          const struct mlxsw_sp_span_entry_ops *ops,
+                          struct mlxsw_sp_span_parms sparms)
+{
+       struct mlxsw_sp_span_entry *span_entry = NULL;
        int i;
-       int err;
 
        /* find a free entry to use */
-       index = -1;
        for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
                if (!mlxsw_sp->span.entries[i].ref_count) {
-                       index = i;
                        span_entry = &mlxsw_sp->span.entries[i];
                        break;
                }
        }
-       if (index < 0)
-               return NULL;
-
-       /* create a new port analayzer entry for local_port */
-       mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-       if (err)
+       if (!span_entry)
                return NULL;
 
-       span_entry->id = index;
+       span_entry->ops = ops;
        span_entry->ref_count = 1;
-       span_entry->local_port = local_port;
+       span_entry->to_dev = to_dev;
+       mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
        return span_entry;
 }
 
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-                                       struct mlxsw_sp_span_entry *span_entry)
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
 {
-       u8 local_port = span_entry->local_port;
-       char mpat_pl[MLXSW_REG_MPAT_LEN];
-       int pa_id = span_entry->id;
-
-       mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
-       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+       mlxsw_sp_span_entry_deconfigure(span_entry);
 }
 
 struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+                                const struct net_device *to_dev)
 {
        int i;
 
        for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
                struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
 
-               if (curr->ref_count && curr->local_port == local_port)
+               if (curr->ref_count && curr->to_dev == to_dev)
                        return curr;
        }
        return NULL;
 }
 
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_span_entry *span_entry)
+{
+       mlxsw_sp_span_entry_deconfigure(span_entry);
+       span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
+}
+
 static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+       int i;
+
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+               if (curr->ref_count && curr->id == span_id)
+                       return curr;
+       }
+       return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+                       const struct net_device *to_dev,
+                       const struct mlxsw_sp_span_entry_ops *ops,
+                       struct mlxsw_sp_span_parms sparms)
 {
        struct mlxsw_sp_span_entry *span_entry;
 
-       span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
-                                             port->local_port);
+       span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
        if (span_entry) {
                /* Already exists, just take a reference */
                span_entry->ref_count++;
                return span_entry;
        }
 
-       return mlxsw_sp_span_entry_create(port);
+       return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
 }
 
 static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
@@ -150,7 +548,7 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
 {
        WARN_ON(!span_entry->ref_count);
        if (--span_entry->ref_count == 0)
-               mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+               mlxsw_sp_span_entry_destroy(span_entry);
        return 0;
 }
 
@@ -202,13 +600,17 @@ int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
 }
 
 static struct mlxsw_sp_span_inspected_port *
-mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
-                                   struct mlxsw_sp_span_entry *span_entry)
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_span_entry *span_entry,
+                                   enum mlxsw_sp_span_type type,
+                                   struct mlxsw_sp_port *port,
+                                   bool bind)
 {
        struct mlxsw_sp_span_inspected_port *p;
 
        list_for_each_entry(p, &span_entry->bound_ports_list, list)
-               if (port->local_port == p->local_port)
+               if (type == p->type &&
+                   port->local_port == p->local_port &&
+                   bind == p->bound)
                        return p;
        return NULL;
 }
@@ -238,8 +640,22 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
        struct mlxsw_sp_span_inspected_port *inspected_port;
        struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
        char sbib_pl[MLXSW_REG_SBIB_LEN];
+       int i;
        int err;
 
+       /* A given (source port, direction) can only be bound to one analyzer,
+        * so if a binding is requested, check for conflicts.
+        */
+       if (bind)
+               for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+                       struct mlxsw_sp_span_entry *curr =
+                               &mlxsw_sp->span.entries[i];
+
+                       if (mlxsw_sp_span_entry_bound_port_find(curr, type,
+                                                               port, bind))
+                               return -EEXIST;
+               }
+
        /* if it is an egress SPAN, bind a shared buffer to it */
        if (type == MLXSW_SP_SPAN_EGRESS) {
                u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
@@ -267,6 +683,7 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
        }
        inspected_port->local_port = port->local_port;
        inspected_port->type = type;
+       inspected_port->bound = bind;
        list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
 
        return 0;
@@ -293,7 +710,8 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
        struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
        char sbib_pl[MLXSW_REG_SBIB_LEN];
 
-       inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
+       inspected_port = mlxsw_sp_span_entry_bound_port_find(span_entry, type,
+                                                            port, bind);
        if (!inspected_port)
                return;
 
@@ -312,15 +730,41 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
        kfree(inspected_port);
 }
 
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+                       const struct net_device *to_dev)
+{
+       size_t i;
+
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+               if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+                       return mlxsw_sp_span_entry_types[i];
+
+       return NULL;
+}
+
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-                            struct mlxsw_sp_port *to,
-                            enum mlxsw_sp_span_type type, bool bind)
+                            const struct net_device *to_dev,
+                            enum mlxsw_sp_span_type type, bool bind,
+                            int *p_span_id)
 {
        struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+       const struct mlxsw_sp_span_entry_ops *ops;
+       struct mlxsw_sp_span_parms sparms = {NULL};
        struct mlxsw_sp_span_entry *span_entry;
        int err;
 
-       span_entry = mlxsw_sp_span_entry_get(to);
+       ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+       if (!ops) {
+               netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+               return -EOPNOTSUPP;
+       }
+
+       err = ops->parms(to_dev, &sparms);
+       if (err)
+               return err;
+
+       span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
        if (!span_entry)
                return -ENOENT;
 
@@ -331,6 +775,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
        if (err)
                goto err_port_bind;
 
+       *p_span_id = span_entry->id;
        return 0;
 
 err_port_bind:
@@ -338,13 +783,12 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
        return err;
 }
 
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
                              enum mlxsw_sp_span_type type, bool bind)
 {
        struct mlxsw_sp_span_entry *span_entry;
 
-       span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
-                                             destination_port);
+       span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
        if (!span_entry) {
                netdev_err(from->dev, "no span entry found\n");
                return;
@@ -354,3 +798,27 @@ void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
                   span_entry->id);
        mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
 }
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+       int i;
+       int err;
+
+       ASSERT_RTNL();
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+               struct mlxsw_sp_span_parms sparms = {NULL};
+
+               if (!curr->ref_count)
+                       continue;
+
+               err = curr->ops->parms(curr->to_dev, &sparms);
+               if (err)
+                       continue;
+
+               if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+                       mlxsw_sp_span_entry_deconfigure(curr);
+                       mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+               }
+       }
+}
index 069050e385fff6d6b4fff2e8dbf10a9f96f4ee40..4b87ec20e65810b82ebcec18d65e73f98f54bb0e 100644 (file)
@@ -35,6 +35,9 @@
 #define _MLXSW_SPECTRUM_SPAN_H
 
 #include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
 
 struct mlxsw_sp;
 struct mlxsw_sp_port;
@@ -48,25 +51,56 @@ struct mlxsw_sp_span_inspected_port {
        struct list_head list;
        enum mlxsw_sp_span_type type;
        u8 local_port;
+
+       /* Whether this is a directly bound mirror (port-to-port) or an ACL. */
+       bool bound;
 };
 
+struct mlxsw_sp_span_parms {
+       struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+       unsigned int ttl;
+       unsigned char dmac[ETH_ALEN];
+       unsigned char smac[ETH_ALEN];
+       union mlxsw_sp_l3addr daddr;
+       union mlxsw_sp_l3addr saddr;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
 struct mlxsw_sp_span_entry {
-       u8 local_port;
+       const struct net_device *to_dev;
+       const struct mlxsw_sp_span_entry_ops *ops;
+       struct mlxsw_sp_span_parms parms;
        struct list_head bound_ports_list;
        int ref_count;
        int id;
 };
 
+struct mlxsw_sp_span_entry_ops {
+       bool (*can_handle)(const struct net_device *to_dev);
+       int (*parms)(const struct net_device *to_dev,
+                    struct mlxsw_sp_span_parms *sparmsp);
+       int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+                        struct mlxsw_sp_span_parms sparms);
+       void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
 
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-                            struct mlxsw_sp_port *to,
-                            enum mlxsw_sp_span_type type, bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+                            const struct net_device *to_dev,
+                            enum mlxsw_sp_span_type type,
+                            bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
                              enum mlxsw_sp_span_type type, bool bind);
 struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+                                const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_span_entry *span_entry);
 
 int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
 
index f9f53af04fe189f858eae371ee66ed9b1bc43268..c11c9a635866a4eb3feb0c6a6761d3c2afabadff 100644 (file)
@@ -1203,6 +1203,7 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                                     bool dynamic)
 {
        char *sfd_pl;
+       u8 num_rec;
        int err;
 
        sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
@@ -1212,9 +1213,16 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
        mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
        mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
                              mac, fid, action, local_port);
+       num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
-       kfree(sfd_pl);
+       if (err)
+               goto out;
+
+       if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+               err = -EBUSY;
 
+out:
+       kfree(sfd_pl);
        return err;
 }
 
@@ -1239,6 +1247,7 @@ static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
                                       bool adding, bool dynamic)
 {
        char *sfd_pl;
+       u8 num_rec;
        int err;
 
        sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
@@ -1249,9 +1258,16 @@ static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
        mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
                                  mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP,
                                  lag_vid, lag_id);
+       num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
-       kfree(sfd_pl);
+       if (err)
+               goto out;
+
+       if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+               err = -EBUSY;
 
+out:
+       kfree(sfd_pl);
        return err;
 }
 
@@ -1296,6 +1312,7 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
                                u16 fid, u16 mid_idx, bool adding)
 {
        char *sfd_pl;
+       u8 num_rec;
        int err;
 
        sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
@@ -1305,7 +1322,15 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
        mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
        mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid,
                              MLXSW_REG_SFD_REC_ACTION_NOP, mid_idx);
+       num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+       if (err)
+               goto out;
+
+       if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+               err = -EBUSY;
+
+out:
        kfree(sfd_pl);
        return err;
 }
@@ -1882,14 +1907,10 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
                                struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+       struct net_device *dev = bridge_port->dev;
        u16 vid;
 
-       if (!is_vlan_dev(bridge_port->dev)) {
-               NL_SET_ERR_MSG_MOD(extack, "Only VLAN devices can be enslaved to a VLAN-unaware bridge");
-               return -EINVAL;
-       }
-       vid = vlan_dev_vlan_id(bridge_port->dev);
-
+       vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
        mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
        if (WARN_ON(!mlxsw_sp_port_vlan))
                return -EINVAL;
@@ -1912,8 +1933,10 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
                                 struct mlxsw_sp_port *mlxsw_sp_port)
 {
        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-       u16 vid = vlan_dev_vlan_id(bridge_port->dev);
+       struct net_device *dev = bridge_port->dev;
+       u16 vid;
 
+       vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
        mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
        if (WARN_ON(!mlxsw_sp_port_vlan))
                return;
index f3c29bbf07e22e245492e7d456d89686770061d4..c87b0934a40526bb964bf747f2a91a93c18c7a21 100644 (file)
@@ -789,7 +789,7 @@ mlxsw_sx_port_get_link_ksettings(struct net_device *dev,
        u32 supported, advertising, lp_advertising;
        int err;
 
-       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
        err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
        if (err) {
                netdev_err(dev, "Failed to get proto");
@@ -879,7 +879,7 @@ mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
                mlxsw_sx_to_ptys_advert_link(advertising) :
                mlxsw_sx_to_ptys_speed(speed);
 
-       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
        err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
        if (err) {
                netdev_err(dev, "Failed to get proto");
@@ -897,7 +897,7 @@ mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
                return 0;
 
        mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-                               eth_proto_new);
+                               eth_proto_new, true);
        err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
        if (err) {
                netdev_err(dev, "Failed to set proto admin");
@@ -1029,7 +1029,7 @@ mlxsw_sx_port_speed_by_width_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 width)
 
        eth_proto_admin = mlxsw_sx_to_ptys_upper_speed(upper_speed);
        mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-                               eth_proto_admin);
+                               eth_proto_admin, true);
        return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 }
 
index 36a09d94b368ae18508cb46e6dbb0b7e03fa2c77..71dca8bd51acf4686aeb3f33ad7012e4d6a49985 100644 (file)
@@ -42,4 +42,14 @@ config ENCX24J600
       To compile this driver as a module, choose M here. The module will be
       called encx24j600.
 
+config LAN743X
+       tristate "LAN743x support"
+       depends on PCI
+       select PHYLIB
+       ---help---
+         Support for the Microchip LAN743x PCI Express Gigabit Ethernet chip
+
+         To compile this driver as a module, choose M here. The module will be
+         called lan743x.
+
 endif # NET_VENDOR_MICROCHIP
index ff78f621b59a7153ec9e3dabfd99d6b028de0b7a..2e982cc249fbb59b76b54057576f989d7be7ee0b 100644 (file)
@@ -4,3 +4,6 @@
 
 obj-$(CONFIG_ENC28J60) += enc28j60.o
 obj-$(CONFIG_ENCX24J600) += encx24j600.o encx24j600-regmap.o
+obj-$(CONFIG_LAN743X) += lan743x.o
+
+lan743x-objs := lan743x_main.o
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
new file mode 100644 (file)
index 0000000..dd947e4
--- /dev/null
@@ -0,0 +1,2771 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/crc32.h>
+#include <linux/microchipphy.h>
+#include <linux/net_tstamp.h>
+#include <linux/phy.h>
+#include <linux/rtnetlink.h>
+#include <linux/iopoll.h>
+#include "lan743x_main.h"
+
+static void lan743x_pci_cleanup(struct lan743x_adapter *adapter)
+{
+       pci_release_selected_regions(adapter->pdev,
+                                    pci_select_bars(adapter->pdev,
+                                                    IORESOURCE_MEM));
+       pci_disable_device(adapter->pdev);
+}
+
+static int lan743x_pci_init(struct lan743x_adapter *adapter,
+                           struct pci_dev *pdev)
+{
+       unsigned long bars = 0;
+       int ret;
+
+       adapter->pdev = pdev;
+       ret = pci_enable_device_mem(pdev);
+       if (ret)
+               goto return_error;
+
+       netif_info(adapter, probe, adapter->netdev,
+                  "PCI: Vendor ID = 0x%04X, Device ID = 0x%04X\n",
+                  pdev->vendor, pdev->device);
+       bars = pci_select_bars(pdev, IORESOURCE_MEM);
+       if (!test_bit(0, &bars))
+               goto disable_device;
+
+       ret = pci_request_selected_regions(pdev, bars, DRIVER_NAME);
+       if (ret)
+               goto disable_device;
+
+       pci_set_master(pdev);
+       return 0;
+
+disable_device:
+       pci_disable_device(adapter->pdev);
+
+return_error:
+       return ret;
+}
+
+static u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
+{
+       return ioread32(&adapter->csr.csr_address[offset]);
+}
+
+static void lan743x_csr_write(struct lan743x_adapter *adapter, int offset,
+                             u32 data)
+{
+       iowrite32(data, &adapter->csr.csr_address[offset]);
+}
+
+#define LAN743X_CSR_READ_OP(offset)    lan743x_csr_read(adapter, offset)
+
+static int lan743x_csr_light_reset(struct lan743x_adapter *adapter)
+{
+       u32 data;
+
+       data = lan743x_csr_read(adapter, HW_CFG);
+       data |= HW_CFG_LRST_;
+       lan743x_csr_write(adapter, HW_CFG, data);
+
+       return readx_poll_timeout(LAN743X_CSR_READ_OP, HW_CFG, data,
+                                 !(data & HW_CFG_LRST_), 100000, 10000000);
+}
+
+static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter,
+                                   int offset, u32 bit_mask,
+                                   int target_value, int usleep_min,
+                                   int usleep_max, int count)
+{
+       u32 data;
+
+       return readx_poll_timeout(LAN743X_CSR_READ_OP, offset, data,
+                                 target_value == ((data & bit_mask) ? 1 : 0),
+                                 usleep_max, usleep_min * count);
+}
+
+static int lan743x_csr_init(struct lan743x_adapter *adapter)
+{
+       struct lan743x_csr *csr = &adapter->csr;
+       resource_size_t bar_start, bar_length;
+       int result;
+
+       bar_start = pci_resource_start(adapter->pdev, 0);
+       bar_length = pci_resource_len(adapter->pdev, 0);
+       csr->csr_address = devm_ioremap(&adapter->pdev->dev,
+                                       bar_start, bar_length);
+       if (!csr->csr_address) {
+               result = -ENOMEM;
+               goto clean_up;
+       }
+
+       csr->id_rev = lan743x_csr_read(adapter, ID_REV);
+       csr->fpga_rev = lan743x_csr_read(adapter, FPGA_REV);
+       netif_info(adapter, probe, adapter->netdev,
+                  "ID_REV = 0x%08X, FPGA_REV = %d.%d\n",
+                  csr->id_rev, FPGA_REV_GET_MAJOR_(csr->fpga_rev),
+                  FPGA_REV_GET_MINOR_(csr->fpga_rev));
+       if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev)) {
+               result = -ENODEV;
+               goto clean_up;
+       }
+
+       csr->flags = LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR;
+       switch (csr->id_rev & ID_REV_CHIP_REV_MASK_) {
+       case ID_REV_CHIP_REV_A0_:
+               csr->flags |= LAN743X_CSR_FLAG_IS_A0;
+               csr->flags &= ~LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR;
+               break;
+       case ID_REV_CHIP_REV_B0_:
+               csr->flags |= LAN743X_CSR_FLAG_IS_B0;
+               break;
+       }
+
+       result = lan743x_csr_light_reset(adapter);
+       if (result)
+               goto clean_up;
+       return 0;
+clean_up:
+       return result;
+}
+
+static void lan743x_intr_software_isr(void *context)
+{
+       struct lan743x_adapter *adapter = context;
+       struct lan743x_intr *intr = &adapter->intr;
+       u32 int_sts;
+
+       int_sts = lan743x_csr_read(adapter, INT_STS);
+       if (int_sts & INT_BIT_SW_GP_) {
+               lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_);
+               intr->software_isr_flag = 1;
+       }
+}
+
+static void lan743x_tx_isr(void *context, u32 int_sts, u32 flags)
+{
+       struct lan743x_tx *tx = context;
+       struct lan743x_adapter *adapter = tx->adapter;
+       bool enable_flag = true;
+       u32 int_en = 0;
+
+       int_en = lan743x_csr_read(adapter, INT_EN_SET);
+       if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
+               lan743x_csr_write(adapter, INT_EN_CLR,
+                                 INT_BIT_DMA_TX_(tx->channel_number));
+       }
+
+       if (int_sts & INT_BIT_DMA_TX_(tx->channel_number)) {
+               u32 ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
+               u32 dmac_int_sts;
+               u32 dmac_int_en;
+
+               if (flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ)
+                       dmac_int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+               else
+                       dmac_int_sts = ioc_bit;
+               if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK)
+                       dmac_int_en = lan743x_csr_read(adapter,
+                                                      DMAC_INT_EN_SET);
+               else
+                       dmac_int_en = ioc_bit;
+
+               dmac_int_en &= ioc_bit;
+               dmac_int_sts &= dmac_int_en;
+               if (dmac_int_sts & ioc_bit) {
+                       napi_schedule(&tx->napi);
+                       enable_flag = false;/* poll func will enable later */
+               }
+       }
+
+       if (enable_flag)
+               /* enable isr */
+               lan743x_csr_write(adapter, INT_EN_SET,
+                                 INT_BIT_DMA_TX_(tx->channel_number));
+}
+
+static void lan743x_rx_isr(void *context, u32 int_sts, u32 flags)
+{
+       struct lan743x_rx *rx = context;
+       struct lan743x_adapter *adapter = rx->adapter;
+       bool enable_flag = true;
+
+       if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
+               lan743x_csr_write(adapter, INT_EN_CLR,
+                                 INT_BIT_DMA_RX_(rx->channel_number));
+       }
+
+       if (int_sts & INT_BIT_DMA_RX_(rx->channel_number)) {
+               u32 rx_frame_bit = DMAC_INT_BIT_RXFRM_(rx->channel_number);
+               u32 dmac_int_sts;
+               u32 dmac_int_en;
+
+               if (flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ)
+                       dmac_int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+               else
+                       dmac_int_sts = rx_frame_bit;
+               if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK)
+                       dmac_int_en = lan743x_csr_read(adapter,
+                                                      DMAC_INT_EN_SET);
+               else
+                       dmac_int_en = rx_frame_bit;
+
+               dmac_int_en &= rx_frame_bit;
+               dmac_int_sts &= dmac_int_en;
+               if (dmac_int_sts & rx_frame_bit) {
+                       napi_schedule(&rx->napi);
+                       enable_flag = false;/* poll funct will enable later */
+               }
+       }
+
+       if (enable_flag) {
+               /* enable isr */
+               lan743x_csr_write(adapter, INT_EN_SET,
+                                 INT_BIT_DMA_RX_(rx->channel_number));
+       }
+}
+
+static void lan743x_intr_shared_isr(void *context, u32 int_sts, u32 flags)
+{
+       struct lan743x_adapter *adapter = context;
+       unsigned int channel;
+
+       if (int_sts & INT_BIT_ALL_RX_) {
+               for (channel = 0; channel < LAN743X_USED_RX_CHANNELS;
+                       channel++) {
+                       u32 int_bit = INT_BIT_DMA_RX_(channel);
+
+                       if (int_sts & int_bit) {
+                               lan743x_rx_isr(&adapter->rx[channel],
+                                              int_bit, flags);
+                               int_sts &= ~int_bit;
+                       }
+               }
+       }
+       if (int_sts & INT_BIT_ALL_TX_) {
+               for (channel = 0; channel < LAN743X_USED_TX_CHANNELS;
+                       channel++) {
+                       u32 int_bit = INT_BIT_DMA_TX_(channel);
+
+                       if (int_sts & int_bit) {
+                               lan743x_tx_isr(&adapter->tx[channel],
+                                              int_bit, flags);
+                               int_sts &= ~int_bit;
+                       }
+               }
+       }
+       if (int_sts & INT_BIT_ALL_OTHER_) {
+               if (int_sts & INT_BIT_SW_GP_) {
+                       lan743x_intr_software_isr(adapter);
+                       int_sts &= ~INT_BIT_SW_GP_;
+               }
+       }
+       if (int_sts)
+               lan743x_csr_write(adapter, INT_EN_CLR, int_sts);
+}
+
+static irqreturn_t lan743x_intr_entry_isr(int irq, void *ptr)
+{
+       struct lan743x_vector *vector = ptr;
+       struct lan743x_adapter *adapter = vector->adapter;
+       irqreturn_t result = IRQ_NONE;
+       u32 int_enables;
+       u32 int_sts;
+
+       if (vector->flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ) {
+               int_sts = lan743x_csr_read(adapter, INT_STS);
+       } else if (vector->flags &
+                  (LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C |
+                  LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)) {
+               int_sts = lan743x_csr_read(adapter, INT_STS_R2C);
+       } else {
+               /* use mask as implied status */
+               int_sts = vector->int_mask | INT_BIT_MAS_;
+       }
+
+       if (!(int_sts & INT_BIT_MAS_))
+               goto irq_done;
+
+       if (vector->flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR)
+               /* disable vector interrupt */
+               lan743x_csr_write(adapter,
+                                 INT_VEC_EN_CLR,
+                                 INT_VEC_EN_(vector->vector_index));
+
+       if (vector->flags & LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR)
+               /* disable master interrupt */
+               lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_MAS_);
+
+       if (vector->flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK) {
+               int_enables = lan743x_csr_read(adapter, INT_EN_SET);
+       } else {
+               /*  use vector mask as implied enable mask */
+               int_enables = vector->int_mask;
+       }
+
+       int_sts &= int_enables;
+       int_sts &= vector->int_mask;
+       if (int_sts) {
+               if (vector->handler) {
+                       vector->handler(vector->context,
+                                       int_sts, vector->flags);
+               } else {
+                       /* disable interrupts on this vector */
+                       lan743x_csr_write(adapter, INT_EN_CLR,
+                                         vector->int_mask);
+               }
+               result = IRQ_HANDLED;
+       }
+
+       if (vector->flags & LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET)
+               /* enable master interrupt */
+               lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_MAS_);
+
+       if (vector->flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET)
+               /* enable vector interrupt */
+               lan743x_csr_write(adapter,
+                                 INT_VEC_EN_SET,
+                                 INT_VEC_EN_(vector->vector_index));
+irq_done:
+       return result;
+}
+
+static int lan743x_intr_test_isr(struct lan743x_adapter *adapter)
+{
+       struct lan743x_intr *intr = &adapter->intr;
+       int result = -ENODEV;
+       int timeout = 10;
+
+       intr->software_isr_flag = 0;
+
+       /* enable interrupt */
+       lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_SW_GP_);
+
+       /* activate interrupt here */
+       lan743x_csr_write(adapter, INT_SET, INT_BIT_SW_GP_);
+       while ((timeout > 0) && (!(intr->software_isr_flag))) {
+               usleep_range(1000, 20000);
+               timeout--;
+       }
+
+       if (intr->software_isr_flag)
+               result = 0;
+
+       /* disable interrupts */
+       lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_SW_GP_);
+       return result;
+}
+
+static int lan743x_intr_register_isr(struct lan743x_adapter *adapter,
+                                    int vector_index, u32 flags,
+                                    u32 int_mask,
+                                    lan743x_vector_handler handler,
+                                    void *context)
+{
+       struct lan743x_vector *vector = &adapter->intr.vector_list
+                                       [vector_index];
+       int ret;
+
+       vector->adapter = adapter;
+       vector->flags = flags;
+       vector->vector_index = vector_index;
+       vector->int_mask = int_mask;
+       vector->handler = handler;
+       vector->context = context;
+
+       ret = request_irq(vector->irq,
+                         lan743x_intr_entry_isr,
+                         (flags & LAN743X_VECTOR_FLAG_IRQ_SHARED) ?
+                         IRQF_SHARED : 0, DRIVER_NAME, vector);
+       if (ret) {
+               vector->handler = NULL;
+               vector->context = NULL;
+               vector->int_mask = 0;
+               vector->flags = 0;
+       }
+       return ret;
+}
+
+static void lan743x_intr_unregister_isr(struct lan743x_adapter *adapter,
+                                       int vector_index)
+{
+       struct lan743x_vector *vector = &adapter->intr.vector_list
+                                       [vector_index];
+
+       free_irq(vector->irq, vector);
+       vector->handler = NULL;
+       vector->context = NULL;
+       vector->int_mask = 0;
+       vector->flags = 0;
+}
+
+static u32 lan743x_intr_get_vector_flags(struct lan743x_adapter *adapter,
+                                        u32 int_mask)
+{
+       int index;
+
+       for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) {
+               if (adapter->intr.vector_list[index].int_mask & int_mask)
+                       return adapter->intr.vector_list[index].flags;
+       }
+       return 0;
+}
+
+static void lan743x_intr_close(struct lan743x_adapter *adapter)
+{
+       struct lan743x_intr *intr = &adapter->intr;
+       int index = 0;
+
+       lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_MAS_);
+       lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0x000000FF);
+
+       for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) {
+               if (intr->flags & INTR_FLAG_IRQ_REQUESTED(index)) {
+                       lan743x_intr_unregister_isr(adapter, index);
+                       intr->flags &= ~INTR_FLAG_IRQ_REQUESTED(index);
+               }
+       }
+
+       if (intr->flags & INTR_FLAG_MSI_ENABLED) {
+               pci_disable_msi(adapter->pdev);
+               intr->flags &= ~INTR_FLAG_MSI_ENABLED;
+       }
+
+       if (intr->flags & INTR_FLAG_MSIX_ENABLED) {
+               pci_disable_msix(adapter->pdev);
+               intr->flags &= ~INTR_FLAG_MSIX_ENABLED;
+       }
+}
+
+static int lan743x_intr_open(struct lan743x_adapter *adapter)
+{
+       struct msix_entry msix_entries[LAN743X_MAX_VECTOR_COUNT];
+       struct lan743x_intr *intr = &adapter->intr;
+       u32 int_vec_en_auto_clr = 0;
+       u32 int_vec_map0 = 0;
+       u32 int_vec_map1 = 0;
+       int ret = -ENODEV;
+       int index = 0;
+       u32 flags = 0;
+
+       intr->number_of_vectors = 0;
+
+       /* Try to set up MSIX interrupts */
+       memset(&msix_entries[0], 0,
+              sizeof(struct msix_entry) * LAN743X_MAX_VECTOR_COUNT);
+       for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++)
+               msix_entries[index].entry = index;
+       ret = pci_enable_msix_range(adapter->pdev,
+                                   msix_entries, 1,
+                                   1 + LAN743X_USED_TX_CHANNELS +
+                                   LAN743X_USED_RX_CHANNELS);
+
+       if (ret > 0) {
+               intr->flags |= INTR_FLAG_MSIX_ENABLED;
+               intr->number_of_vectors = ret;
+               intr->using_vectors = true;
+               for (index = 0; index < intr->number_of_vectors; index++)
+                       intr->vector_list[index].irq = msix_entries
+                                                      [index].vector;
+               netif_info(adapter, ifup, adapter->netdev,
+                          "using MSIX interrupts, number of vectors = %d\n",
+                          intr->number_of_vectors);
+       }
+
+       /* If MSIX failed try to setup using MSI interrupts */
+       if (!intr->number_of_vectors) {
+               if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+                       if (!pci_enable_msi(adapter->pdev)) {
+                               intr->flags |= INTR_FLAG_MSI_ENABLED;
+                               intr->number_of_vectors = 1;
+                               intr->using_vectors = true;
+                               intr->vector_list[0].irq =
+                                       adapter->pdev->irq;
+                               netif_info(adapter, ifup, adapter->netdev,
+                                          "using MSI interrupts, number of vectors = %d\n",
+                                          intr->number_of_vectors);
+                       }
+               }
+       }
+
+       /* If MSIX, and MSI failed, setup using legacy interrupt */
+       if (!intr->number_of_vectors) {
+               intr->number_of_vectors = 1;
+               intr->using_vectors = false;
+               intr->vector_list[0].irq = intr->irq;
+               netif_info(adapter, ifup, adapter->netdev,
+                          "using legacy interrupts\n");
+       }
+
+       /* At this point we must have at least one irq */
+       lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0xFFFFFFFF);
+
+       /* map all interrupts to vector 0 */
+       lan743x_csr_write(adapter, INT_VEC_MAP0, 0x00000000);
+       lan743x_csr_write(adapter, INT_VEC_MAP1, 0x00000000);
+       lan743x_csr_write(adapter, INT_VEC_MAP2, 0x00000000);
+       flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+               LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+               LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+               LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR;
+
+       if (intr->using_vectors) {
+               flags |= LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+                        LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+       } else {
+               flags |= LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR |
+                        LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET |
+                        LAN743X_VECTOR_FLAG_IRQ_SHARED;
+       }
+
+       if (adapter->csr.flags & LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+               flags &= ~LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ;
+               flags &= ~LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C;
+               flags &= ~LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR;
+               flags &= ~LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK;
+               flags |= LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C;
+               flags |= LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C;
+       }
+
+       ret = lan743x_intr_register_isr(adapter, 0, flags,
+                                       INT_BIT_ALL_RX_ | INT_BIT_ALL_TX_ |
+                                       INT_BIT_ALL_OTHER_,
+                                       lan743x_intr_shared_isr, adapter);
+       if (ret)
+               goto clean_up;
+       intr->flags |= INTR_FLAG_IRQ_REQUESTED(0);
+
+       if (intr->using_vectors)
+               lan743x_csr_write(adapter, INT_VEC_EN_SET,
+                                 INT_VEC_EN_(0));
+
+       if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+               lan743x_csr_write(adapter, INT_MOD_CFG0, LAN743X_INT_MOD);
+               lan743x_csr_write(adapter, INT_MOD_CFG1, LAN743X_INT_MOD);
+               lan743x_csr_write(adapter, INT_MOD_CFG2, LAN743X_INT_MOD);
+               lan743x_csr_write(adapter, INT_MOD_CFG3, LAN743X_INT_MOD);
+               lan743x_csr_write(adapter, INT_MOD_CFG4, LAN743X_INT_MOD);
+               lan743x_csr_write(adapter, INT_MOD_CFG5, LAN743X_INT_MOD);
+               lan743x_csr_write(adapter, INT_MOD_CFG6, LAN743X_INT_MOD);
+               lan743x_csr_write(adapter, INT_MOD_CFG7, LAN743X_INT_MOD);
+               lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00005432);
+               lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00000001);
+               lan743x_csr_write(adapter, INT_MOD_MAP2, 0x00FFFFFF);
+       }
+
+       /* enable interrupts */
+       lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_MAS_);
+       ret = lan743x_intr_test_isr(adapter);
+       if (ret)
+               goto clean_up;
+
+       if (intr->number_of_vectors > 1) {
+               int number_of_tx_vectors = intr->number_of_vectors - 1;
+
+               if (number_of_tx_vectors > LAN743X_USED_TX_CHANNELS)
+                       number_of_tx_vectors = LAN743X_USED_TX_CHANNELS;
+               flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+                       LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+                       LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+                       LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR |
+                       LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+                       LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+
+               if (adapter->csr.flags &
+                  LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+                       flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR |
+                               LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
+                               LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET |
+                               LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR |
+                               LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR;
+               }
+
+               for (index = 0; index < number_of_tx_vectors; index++) {
+                       u32 int_bit = INT_BIT_DMA_TX_(index);
+                       int vector = index + 1;
+
+                       /* map TX interrupt to vector */
+                       int_vec_map1 |= INT_VEC_MAP1_TX_VEC_(index, vector);
+                       lan743x_csr_write(adapter, INT_VEC_MAP1, int_vec_map1);
+                       if (flags &
+                           LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) {
+                               int_vec_en_auto_clr |= INT_VEC_EN_(vector);
+                               lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR,
+                                                 int_vec_en_auto_clr);
+                       }
+
+                       /* Remove TX interrupt from shared mask */
+                       intr->vector_list[0].int_mask &= ~int_bit;
+                       ret = lan743x_intr_register_isr(adapter, vector, flags,
+                                                       int_bit, lan743x_tx_isr,
+                                                       &adapter->tx[index]);
+                       if (ret)
+                               goto clean_up;
+                       intr->flags |= INTR_FLAG_IRQ_REQUESTED(vector);
+                       if (!(flags &
+                           LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET))
+                               lan743x_csr_write(adapter, INT_VEC_EN_SET,
+                                                 INT_VEC_EN_(vector));
+               }
+       }
+       if ((intr->number_of_vectors - LAN743X_USED_TX_CHANNELS) > 1) {
+               int number_of_rx_vectors = intr->number_of_vectors -
+                                          LAN743X_USED_TX_CHANNELS - 1;
+
+               if (number_of_rx_vectors > LAN743X_USED_RX_CHANNELS)
+                       number_of_rx_vectors = LAN743X_USED_RX_CHANNELS;
+
+               flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+                       LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+                       LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+                       LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR |
+                       LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+                       LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+
+               if (adapter->csr.flags &
+                   LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+                       flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR |
+                               LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
+                               LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET |
+                               LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR |
+                               LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR;
+               }
+               for (index = 0; index < number_of_rx_vectors; index++) {
+                       int vector = index + 1 + LAN743X_USED_TX_CHANNELS;
+                       u32 int_bit = INT_BIT_DMA_RX_(index);
+
+                       /* map RX interrupt to vector */
+                       int_vec_map0 |= INT_VEC_MAP0_RX_VEC_(index, vector);
+                       lan743x_csr_write(adapter, INT_VEC_MAP0, int_vec_map0);
+                       if (flags &
+                           LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) {
+                               int_vec_en_auto_clr |= INT_VEC_EN_(vector);
+                               lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR,
+                                                 int_vec_en_auto_clr);
+                       }
+
+                       /* Remove RX interrupt from shared mask */
+                       intr->vector_list[0].int_mask &= ~int_bit;
+                       ret = lan743x_intr_register_isr(adapter, vector, flags,
+                                                       int_bit, lan743x_rx_isr,
+                                                       &adapter->rx[index]);
+                       if (ret)
+                               goto clean_up;
+                       intr->flags |= INTR_FLAG_IRQ_REQUESTED(vector);
+
+                       lan743x_csr_write(adapter, INT_VEC_EN_SET,
+                                         INT_VEC_EN_(vector));
+               }
+       }
+       return 0;
+
+clean_up:
+       lan743x_intr_close(adapter);
+       return ret;
+}
+
+static int lan743x_dp_write(struct lan743x_adapter *adapter,
+                           u32 select, u32 addr, u32 length, u32 *buf)
+{
+       int ret = -EIO;
+       u32 dp_sel;
+       int i;
+
+       mutex_lock(&adapter->dp_lock);
+       if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+                                    1, 40, 100, 100))
+               goto unlock;
+       dp_sel = lan743x_csr_read(adapter, DP_SEL);
+       dp_sel &= ~DP_SEL_MASK_;
+       dp_sel |= select;
+       lan743x_csr_write(adapter, DP_SEL, dp_sel);
+
+       for (i = 0; i < length; i++) {
+               lan743x_csr_write(adapter, DP_ADDR, addr + i);
+               lan743x_csr_write(adapter, DP_DATA_0, buf[i]);
+               lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_);
+               if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+                                            1, 40, 100, 100))
+                       goto unlock;
+       }
+       ret = 0;
+
+unlock:
+       mutex_unlock(&adapter->dp_lock);
+       return ret;
+}
+
+static u32 lan743x_mac_mii_access(u16 id, u16 index, int read)
+{
+       u32 ret;
+
+       ret = (id << MAC_MII_ACC_PHY_ADDR_SHIFT_) &
+               MAC_MII_ACC_PHY_ADDR_MASK_;
+       ret |= (index << MAC_MII_ACC_MIIRINDA_SHIFT_) &
+               MAC_MII_ACC_MIIRINDA_MASK_;
+
+       if (read)
+               ret |= MAC_MII_ACC_MII_READ_;
+       else
+               ret |= MAC_MII_ACC_MII_WRITE_;
+       ret |= MAC_MII_ACC_MII_BUSY_;
+
+       return ret;
+}
+
+static int lan743x_mac_mii_wait_till_not_busy(struct lan743x_adapter *adapter)
+{
+       u32 data;
+
+       return readx_poll_timeout(LAN743X_CSR_READ_OP, MAC_MII_ACC, data,
+                                 !(data & MAC_MII_ACC_MII_BUSY_), 0, 1000000);
+}
+
+static int lan743x_mdiobus_read(struct mii_bus *bus, int phy_id, int index)
+{
+       struct lan743x_adapter *adapter = bus->priv;
+       u32 val, mii_access;
+       int ret;
+
+       /* comfirm MII not busy */
+       ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+       if (ret < 0)
+               return ret;
+
+       /* set the address, index & direction (read from PHY) */
+       mii_access = lan743x_mac_mii_access(phy_id, index, MAC_MII_READ);
+       lan743x_csr_write(adapter, MAC_MII_ACC, mii_access);
+       ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+       if (ret < 0)
+               return ret;
+
+       val = lan743x_csr_read(adapter, MAC_MII_DATA);
+       return (int)(val & 0xFFFF);
+}
+
+static int lan743x_mdiobus_write(struct mii_bus *bus,
+                                int phy_id, int index, u16 regval)
+{
+       struct lan743x_adapter *adapter = bus->priv;
+       u32 val, mii_access;
+       int ret;
+
+       /* confirm MII not busy */
+       ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+       if (ret < 0)
+               return ret;
+       val = (u32)regval;
+       lan743x_csr_write(adapter, MAC_MII_DATA, val);
+
+       /* set the address, index & direction (write to PHY) */
+       mii_access = lan743x_mac_mii_access(phy_id, index, MAC_MII_WRITE);
+       lan743x_csr_write(adapter, MAC_MII_ACC, mii_access);
+       ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+       return ret;
+}
+
+static void lan743x_mac_set_address(struct lan743x_adapter *adapter,
+                                   u8 *addr)
+{
+       u32 addr_lo, addr_hi;
+
+       addr_lo = addr[0] |
+               addr[1] << 8 |
+               addr[2] << 16 |
+               addr[3] << 24;
+       addr_hi = addr[4] |
+               addr[5] << 8;
+       lan743x_csr_write(adapter, MAC_RX_ADDRL, addr_lo);
+       lan743x_csr_write(adapter, MAC_RX_ADDRH, addr_hi);
+
+       ether_addr_copy(adapter->mac_address, addr);
+       netif_info(adapter, drv, adapter->netdev,
+                  "MAC address set to %pM\n", addr);
+}
+
+static int lan743x_mac_init(struct lan743x_adapter *adapter)
+{
+       bool mac_address_valid = true;
+       struct net_device *netdev;
+       u32 mac_addr_hi = 0;
+       u32 mac_addr_lo = 0;
+       u32 data;
+       int ret;
+
+       netdev = adapter->netdev;
+       lan743x_csr_write(adapter, MAC_CR, MAC_CR_RST_);
+       ret = lan743x_csr_wait_for_bit(adapter, MAC_CR, MAC_CR_RST_,
+                                      0, 1000, 20000, 100);
+       if (ret)
+               return ret;
+
+       /* setup auto duplex, and speed detection */
+       data = lan743x_csr_read(adapter, MAC_CR);
+       data |= MAC_CR_ADD_ | MAC_CR_ASD_;
+       data |= MAC_CR_CNTR_RST_;
+       lan743x_csr_write(adapter, MAC_CR, data);
+
+       mac_addr_hi = lan743x_csr_read(adapter, MAC_RX_ADDRH);
+       mac_addr_lo = lan743x_csr_read(adapter, MAC_RX_ADDRL);
+       adapter->mac_address[0] = mac_addr_lo & 0xFF;
+       adapter->mac_address[1] = (mac_addr_lo >> 8) & 0xFF;
+       adapter->mac_address[2] = (mac_addr_lo >> 16) & 0xFF;
+       adapter->mac_address[3] = (mac_addr_lo >> 24) & 0xFF;
+       adapter->mac_address[4] = mac_addr_hi & 0xFF;
+       adapter->mac_address[5] = (mac_addr_hi >> 8) & 0xFF;
+
+       if (((mac_addr_hi & 0x0000FFFF) == 0x0000FFFF) &&
+           mac_addr_lo == 0xFFFFFFFF) {
+               mac_address_valid = false;
+       } else if (!is_valid_ether_addr(adapter->mac_address)) {
+               mac_address_valid = false;
+       }
+
+       if (!mac_address_valid)
+               random_ether_addr(adapter->mac_address);
+       lan743x_mac_set_address(adapter, adapter->mac_address);
+       ether_addr_copy(netdev->dev_addr, adapter->mac_address);
+       return 0;
+}
+
+static int lan743x_mac_open(struct lan743x_adapter *adapter)
+{
+       int ret = 0;
+       u32 temp;
+
+       temp = lan743x_csr_read(adapter, MAC_RX);
+       lan743x_csr_write(adapter, MAC_RX, temp | MAC_RX_RXEN_);
+       temp = lan743x_csr_read(adapter, MAC_TX);
+       lan743x_csr_write(adapter, MAC_TX, temp | MAC_TX_TXEN_);
+       return ret;
+}
+
+static void lan743x_mac_close(struct lan743x_adapter *adapter)
+{
+       u32 temp;
+
+       temp = lan743x_csr_read(adapter, MAC_TX);
+       temp &= ~MAC_TX_TXEN_;
+       lan743x_csr_write(adapter, MAC_TX, temp);
+       lan743x_csr_wait_for_bit(adapter, MAC_TX, MAC_TX_TXD_,
+                                1, 1000, 20000, 100);
+
+       temp = lan743x_csr_read(adapter, MAC_RX);
+       temp &= ~MAC_RX_RXEN_;
+       lan743x_csr_write(adapter, MAC_RX, temp);
+       lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+                                1, 1000, 20000, 100);
+}
+
+static void lan743x_mac_flow_ctrl_set_enables(struct lan743x_adapter *adapter,
+                                             bool tx_enable, bool rx_enable)
+{
+       u32 flow_setting = 0;
+
+       /* set maximum pause time because when fifo space frees
+        * up a zero value pause frame will be sent to release the pause
+        */
+       flow_setting = MAC_FLOW_CR_FCPT_MASK_;
+       if (tx_enable)
+               flow_setting |= MAC_FLOW_CR_TX_FCEN_;
+       if (rx_enable)
+               flow_setting |= MAC_FLOW_CR_RX_FCEN_;
+       lan743x_csr_write(adapter, MAC_FLOW, flow_setting);
+}
+
+static int lan743x_mac_set_mtu(struct lan743x_adapter *adapter, int new_mtu)
+{
+       int enabled = 0;
+       u32 mac_rx = 0;
+
+       mac_rx = lan743x_csr_read(adapter, MAC_RX);
+       if (mac_rx & MAC_RX_RXEN_) {
+               enabled = 1;
+               if (mac_rx & MAC_RX_RXD_) {
+                       lan743x_csr_write(adapter, MAC_RX, mac_rx);
+                       mac_rx &= ~MAC_RX_RXD_;
+               }
+               mac_rx &= ~MAC_RX_RXEN_;
+               lan743x_csr_write(adapter, MAC_RX, mac_rx);
+               lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+                                        1, 1000, 20000, 100);
+               lan743x_csr_write(adapter, MAC_RX, mac_rx | MAC_RX_RXD_);
+       }
+
+       mac_rx &= ~(MAC_RX_MAX_SIZE_MASK_);
+       mac_rx |= (((new_mtu + ETH_HLEN + 4) << MAC_RX_MAX_SIZE_SHIFT_) &
+                 MAC_RX_MAX_SIZE_MASK_);
+       lan743x_csr_write(adapter, MAC_RX, mac_rx);
+
+       if (enabled) {
+               mac_rx |= MAC_RX_RXEN_;
+               lan743x_csr_write(adapter, MAC_RX, mac_rx);
+       }
+       return 0;
+}
+
+/* PHY */
+static int lan743x_phy_reset(struct lan743x_adapter *adapter)
+{
+       u32 data;
+
+       /* Only called with in probe, and before mdiobus_register */
+
+       data = lan743x_csr_read(adapter, PMT_CTL);
+       data |= PMT_CTL_ETH_PHY_RST_;
+       lan743x_csr_write(adapter, PMT_CTL, data);
+
+       return readx_poll_timeout(LAN743X_CSR_READ_OP, PMT_CTL, data,
+                                 (!(data & PMT_CTL_ETH_PHY_RST_) &&
+                                 (data & PMT_CTL_READY_)),
+                                 50000, 1000000);
+}
+
+static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter,
+                                          u8 duplex, u16 local_adv,
+                                          u16 remote_adv)
+{
+       struct lan743x_phy *phy = &adapter->phy;
+       u8 cap;
+
+       if (phy->fc_autoneg)
+               cap = mii_resolve_flowctrl_fdx(local_adv, remote_adv);
+       else
+               cap = phy->fc_request_control;
+
+       lan743x_mac_flow_ctrl_set_enables(adapter,
+                                         cap & FLOW_CTRL_TX,
+                                         cap & FLOW_CTRL_RX);
+}
+
+static int lan743x_phy_init(struct lan743x_adapter *adapter)
+{
+       return lan743x_phy_reset(adapter);
+}
+
+static void lan743x_phy_link_status_change(struct net_device *netdev)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       struct phy_device *phydev = netdev->phydev;
+
+       phy_print_status(phydev);
+       if (phydev->state == PHY_RUNNING) {
+               struct ethtool_link_ksettings ksettings;
+               int remote_advertisement = 0;
+               int local_advertisement = 0;
+
+               memset(&ksettings, 0, sizeof(ksettings));
+               phy_ethtool_get_link_ksettings(netdev, &ksettings);
+               local_advertisement = phy_read(phydev, MII_ADVERTISE);
+               if (local_advertisement < 0)
+                       return;
+
+               remote_advertisement = phy_read(phydev, MII_LPA);
+               if (remote_advertisement < 0)
+                       return;
+
+               lan743x_phy_update_flowcontrol(adapter,
+                                              ksettings.base.duplex,
+                                              local_advertisement,
+                                              remote_advertisement);
+       }
+}
+
+static void lan743x_phy_close(struct lan743x_adapter *adapter)
+{
+       struct net_device *netdev = adapter->netdev;
+
+       phy_stop(netdev->phydev);
+       phy_disconnect(netdev->phydev);
+       netdev->phydev = NULL;
+}
+
+static int lan743x_phy_open(struct lan743x_adapter *adapter)
+{
+       struct lan743x_phy *phy = &adapter->phy;
+       struct phy_device *phydev;
+       struct net_device *netdev;
+       int ret = -EIO;
+       u32 mii_adv;
+
+       netdev = adapter->netdev;
+       phydev = phy_find_first(adapter->mdiobus);
+       if (!phydev)
+               goto return_error;
+
+       ret = phy_connect_direct(netdev, phydev,
+                                lan743x_phy_link_status_change,
+                                PHY_INTERFACE_MODE_GMII);
+       if (ret)
+               goto return_error;
+
+       /* MAC doesn't support 1000T Half */
+       phydev->supported &= ~SUPPORTED_1000baseT_Half;
+
+       /* support both flow controls */
+       phy->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX);
+       phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+       mii_adv = (u32)mii_advertise_flowctrl(phy->fc_request_control);
+       phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv);
+       phy->fc_autoneg = phydev->autoneg;
+
+       phy_start(phydev);
+       phy_start_aneg(phydev);
+       return 0;
+
+return_error:
+       return ret;
+}
+
+static void lan743x_rfe_update_mac_address(struct lan743x_adapter *adapter)
+{
+       u8 *mac_addr;
+       u32 mac_addr_hi = 0;
+       u32 mac_addr_lo = 0;
+
+       /* Add mac address to perfect Filter */
+       mac_addr = adapter->mac_address;
+       mac_addr_lo = ((((u32)(mac_addr[0])) << 0) |
+                     (((u32)(mac_addr[1])) << 8) |
+                     (((u32)(mac_addr[2])) << 16) |
+                     (((u32)(mac_addr[3])) << 24));
+       mac_addr_hi = ((((u32)(mac_addr[4])) << 0) |
+                     (((u32)(mac_addr[5])) << 8));
+
+       lan743x_csr_write(adapter, RFE_ADDR_FILT_LO(0), mac_addr_lo);
+       lan743x_csr_write(adapter, RFE_ADDR_FILT_HI(0),
+                         mac_addr_hi | RFE_ADDR_FILT_HI_VALID_);
+}
+
+static void lan743x_rfe_set_multicast(struct lan743x_adapter *adapter)
+{
+       struct net_device *netdev = adapter->netdev;
+       u32 hash_table[DP_SEL_VHF_HASH_LEN];
+       u32 rfctl;
+       u32 data;
+
+       rfctl = lan743x_csr_read(adapter, RFE_CTL);
+       rfctl &= ~(RFE_CTL_AU_ | RFE_CTL_AM_ |
+                RFE_CTL_DA_PERFECT_ | RFE_CTL_MCAST_HASH_);
+       rfctl |= RFE_CTL_AB_;
+       if (netdev->flags & IFF_PROMISC) {
+               rfctl |= RFE_CTL_AM_ | RFE_CTL_AU_;
+       } else {
+               if (netdev->flags & IFF_ALLMULTI)
+                       rfctl |= RFE_CTL_AM_;
+       }
+
+       memset(hash_table, 0, DP_SEL_VHF_HASH_LEN * sizeof(u32));
+       if (netdev_mc_count(netdev)) {
+               struct netdev_hw_addr *ha;
+               int i;
+
+               rfctl |= RFE_CTL_DA_PERFECT_;
+               i = 1;
+               netdev_for_each_mc_addr(ha, netdev) {
+                       /* set first 32 into Perfect Filter */
+                       if (i < 33) {
+                               lan743x_csr_write(adapter,
+                                                 RFE_ADDR_FILT_HI(i), 0);
+                               data = ha->addr[3];
+                               data = ha->addr[2] | (data << 8);
+                               data = ha->addr[1] | (data << 8);
+                               data = ha->addr[0] | (data << 8);
+                               lan743x_csr_write(adapter,
+                                                 RFE_ADDR_FILT_LO(i), data);
+                               data = ha->addr[5];
+                               data = ha->addr[4] | (data << 8);
+                               data |= RFE_ADDR_FILT_HI_VALID_;
+                               lan743x_csr_write(adapter,
+                                                 RFE_ADDR_FILT_HI(i), data);
+                       } else {
+                               u32 bitnum = (ether_crc(ETH_ALEN, ha->addr) >>
+                                            23) & 0x1FF;
+                               hash_table[bitnum / 32] |= (1 << (bitnum % 32));
+                               rfctl |= RFE_CTL_MCAST_HASH_;
+                       }
+                       i++;
+               }
+       }
+
+       lan743x_dp_write(adapter, DP_SEL_RFE_RAM,
+                        DP_SEL_VHF_VLAN_LEN,
+                        DP_SEL_VHF_HASH_LEN, hash_table);
+       lan743x_csr_write(adapter, RFE_CTL, rfctl);
+}
+
+static int lan743x_dmac_init(struct lan743x_adapter *adapter)
+{
+       u32 data = 0;
+
+       lan743x_csr_write(adapter, DMAC_CMD, DMAC_CMD_SWR_);
+       lan743x_csr_wait_for_bit(adapter, DMAC_CMD, DMAC_CMD_SWR_,
+                                0, 1000, 20000, 100);
+       switch (DEFAULT_DMA_DESCRIPTOR_SPACING) {
+       case DMA_DESCRIPTOR_SPACING_16:
+               data = DMAC_CFG_MAX_DSPACE_16_;
+               break;
+       case DMA_DESCRIPTOR_SPACING_32:
+               data = DMAC_CFG_MAX_DSPACE_32_;
+               break;
+       case DMA_DESCRIPTOR_SPACING_64:
+               data = DMAC_CFG_MAX_DSPACE_64_;
+               break;
+       case DMA_DESCRIPTOR_SPACING_128:
+               data = DMAC_CFG_MAX_DSPACE_128_;
+               break;
+       default:
+               return -EPERM;
+       }
+       if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+               data |= DMAC_CFG_COAL_EN_;
+       data |= DMAC_CFG_CH_ARB_SEL_RX_HIGH_;
+       data |= DMAC_CFG_MAX_READ_REQ_SET_(6);
+       lan743x_csr_write(adapter, DMAC_CFG, data);
+       data = DMAC_COAL_CFG_TIMER_LIMIT_SET_(1);
+       data |= DMAC_COAL_CFG_TIMER_TX_START_;
+       data |= DMAC_COAL_CFG_FLUSH_INTS_;
+       data |= DMAC_COAL_CFG_INT_EXIT_COAL_;
+       data |= DMAC_COAL_CFG_CSR_EXIT_COAL_;
+       data |= DMAC_COAL_CFG_TX_THRES_SET_(0x0A);
+       data |= DMAC_COAL_CFG_RX_THRES_SET_(0x0C);
+       lan743x_csr_write(adapter, DMAC_COAL_CFG, data);
+       data = DMAC_OBFF_TX_THRES_SET_(0x08);
+       data |= DMAC_OBFF_RX_THRES_SET_(0x0A);
+       lan743x_csr_write(adapter, DMAC_OBFF_CFG, data);
+       return 0;
+}
+
+static int lan743x_dmac_tx_get_state(struct lan743x_adapter *adapter,
+                                    int tx_channel)
+{
+       u32 dmac_cmd = 0;
+
+       dmac_cmd = lan743x_csr_read(adapter, DMAC_CMD);
+       return DMAC_CHANNEL_STATE_SET((dmac_cmd &
+                                     DMAC_CMD_START_T_(tx_channel)),
+                                     (dmac_cmd &
+                                     DMAC_CMD_STOP_T_(tx_channel)));
+}
+
+static int lan743x_dmac_tx_wait_till_stopped(struct lan743x_adapter *adapter,
+                                            int tx_channel)
+{
+       int timeout = 100;
+       int result = 0;
+
+       while (timeout &&
+              ((result = lan743x_dmac_tx_get_state(adapter, tx_channel)) ==
+              DMAC_CHANNEL_STATE_STOP_PENDING)) {
+               usleep_range(1000, 20000);
+               timeout--;
+       }
+       if (result == DMAC_CHANNEL_STATE_STOP_PENDING)
+               result = -ENODEV;
+       return result;
+}
+
+static int lan743x_dmac_rx_get_state(struct lan743x_adapter *adapter,
+                                    int rx_channel)
+{
+       u32 dmac_cmd = 0;
+
+       dmac_cmd = lan743x_csr_read(adapter, DMAC_CMD);
+       return DMAC_CHANNEL_STATE_SET((dmac_cmd &
+                                     DMAC_CMD_START_R_(rx_channel)),
+                                     (dmac_cmd &
+                                     DMAC_CMD_STOP_R_(rx_channel)));
+}
+
+static int lan743x_dmac_rx_wait_till_stopped(struct lan743x_adapter *adapter,
+                                            int rx_channel)
+{
+       int timeout = 100;
+       int result = 0;
+
+       while (timeout &&
+              ((result = lan743x_dmac_rx_get_state(adapter, rx_channel)) ==
+              DMAC_CHANNEL_STATE_STOP_PENDING)) {
+               usleep_range(1000, 20000);
+               timeout--;
+       }
+       if (result == DMAC_CHANNEL_STATE_STOP_PENDING)
+               result = -ENODEV;
+       return result;
+}
+
+static void lan743x_tx_release_desc(struct lan743x_tx *tx,
+                                   int descriptor_index, bool cleanup)
+{
+       struct lan743x_tx_buffer_info *buffer_info = NULL;
+       struct lan743x_tx_descriptor *descriptor = NULL;
+       u32 descriptor_type = 0;
+
+       descriptor = &tx->ring_cpu_ptr[descriptor_index];
+       buffer_info = &tx->buffer_info[descriptor_index];
+       if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_ACTIVE))
+               goto done;
+
+       descriptor_type = (descriptor->data0) &
+                         TX_DESC_DATA0_DTYPE_MASK_;
+       if (descriptor_type == TX_DESC_DATA0_DTYPE_DATA_)
+               goto clean_up_data_descriptor;
+       else
+               goto clear_active;
+
+clean_up_data_descriptor:
+       if (buffer_info->dma_ptr) {
+               if (buffer_info->flags &
+                   TX_BUFFER_INFO_FLAG_SKB_FRAGMENT) {
+                       dma_unmap_page(&tx->adapter->pdev->dev,
+                                      buffer_info->dma_ptr,
+                                      buffer_info->buffer_length,
+                                      DMA_TO_DEVICE);
+               } else {
+                       dma_unmap_single(&tx->adapter->pdev->dev,
+                                        buffer_info->dma_ptr,
+                                        buffer_info->buffer_length,
+                                        DMA_TO_DEVICE);
+               }
+               buffer_info->dma_ptr = 0;
+               buffer_info->buffer_length = 0;
+       }
+       if (buffer_info->skb) {
+               dev_kfree_skb(buffer_info->skb);
+               buffer_info->skb = NULL;
+       }
+
+clear_active:
+       buffer_info->flags &= ~TX_BUFFER_INFO_FLAG_ACTIVE;
+
+done:
+       memset(buffer_info, 0, sizeof(*buffer_info));
+       memset(descriptor, 0, sizeof(*descriptor));
+}
+
+static int lan743x_tx_next_index(struct lan743x_tx *tx, int index)
+{
+       return ((++index) % tx->ring_size);
+}
+
+static void lan743x_tx_release_completed_descriptors(struct lan743x_tx *tx)
+{
+       while ((*tx->head_cpu_ptr) != (tx->last_head)) {
+               lan743x_tx_release_desc(tx, tx->last_head, false);
+               tx->last_head = lan743x_tx_next_index(tx, tx->last_head);
+       }
+}
+
+static void lan743x_tx_release_all_descriptors(struct lan743x_tx *tx)
+{
+       u32 original_head = 0;
+
+       original_head = tx->last_head;
+       do {
+               lan743x_tx_release_desc(tx, tx->last_head, true);
+               tx->last_head = lan743x_tx_next_index(tx, tx->last_head);
+       } while (tx->last_head != original_head);
+       memset(tx->ring_cpu_ptr, 0,
+              sizeof(*tx->ring_cpu_ptr) * (tx->ring_size));
+       memset(tx->buffer_info, 0,
+              sizeof(*tx->buffer_info) * (tx->ring_size));
+}
+
+static int lan743x_tx_get_desc_cnt(struct lan743x_tx *tx,
+                                  struct sk_buff *skb)
+{
+       int result = 1; /* 1 for the main skb buffer */
+       int nr_frags = 0;
+
+       if (skb_is_gso(skb))
+               result++; /* requires an extension descriptor */
+       nr_frags = skb_shinfo(skb)->nr_frags;
+       result += nr_frags; /* 1 for each fragment buffer */
+       return result;
+}
+
+static int lan743x_tx_get_avail_desc(struct lan743x_tx *tx)
+{
+       int last_head = tx->last_head;
+       int last_tail = tx->last_tail;
+
+       if (last_tail >= last_head)
+               return tx->ring_size - last_tail + last_head - 1;
+       else
+               return last_head - last_tail - 1;
+}
+
+static int lan743x_tx_frame_start(struct lan743x_tx *tx,
+                                 unsigned char *first_buffer,
+                                 unsigned int first_buffer_length,
+                                 unsigned int frame_length,
+                                 bool check_sum)
+{
+       /* called only from within lan743x_tx_xmit_frame.
+        * assuming tx->ring_lock has already been acquired.
+        */
+       struct lan743x_tx_descriptor *tx_descriptor = NULL;
+       struct lan743x_tx_buffer_info *buffer_info = NULL;
+       struct lan743x_adapter *adapter = tx->adapter;
+       struct device *dev = &adapter->pdev->dev;
+       dma_addr_t dma_ptr;
+
+       tx->frame_flags |= TX_FRAME_FLAG_IN_PROGRESS;
+       tx->frame_first = tx->last_tail;
+       tx->frame_tail = tx->frame_first;
+
+       tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+       buffer_info = &tx->buffer_info[tx->frame_tail];
+       dma_ptr = dma_map_single(dev, first_buffer, first_buffer_length,
+                                DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, dma_ptr))
+               return -ENOMEM;
+
+       tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr);
+       tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr);
+       tx_descriptor->data3 = (frame_length << 16) &
+               TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_;
+
+       buffer_info->skb = NULL;
+       buffer_info->dma_ptr = dma_ptr;
+       buffer_info->buffer_length = first_buffer_length;
+       buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+
+       tx->frame_data0 = (first_buffer_length &
+               TX_DESC_DATA0_BUF_LENGTH_MASK_) |
+               TX_DESC_DATA0_DTYPE_DATA_ |
+               TX_DESC_DATA0_FS_ |
+               TX_DESC_DATA0_FCS_;
+
+       if (check_sum)
+               tx->frame_data0 |= TX_DESC_DATA0_ICE_ |
+                                  TX_DESC_DATA0_IPE_ |
+                                  TX_DESC_DATA0_TPE_;
+
+       /* data0 will be programmed in one of other frame assembler functions */
+       return 0;
+}
+
+static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx,
+                                    unsigned int frame_length)
+{
+       /* called only from within lan743x_tx_xmit_frame.
+        * assuming tx->ring_lock has already been acquired.
+        */
+       struct lan743x_tx_descriptor *tx_descriptor = NULL;
+       struct lan743x_tx_buffer_info *buffer_info = NULL;
+
+       /* wrap up previous descriptor */
+       tx->frame_data0 |= TX_DESC_DATA0_EXT_;
+       tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+       tx_descriptor->data0 = tx->frame_data0;
+
+       /* move to next descriptor */
+       tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+       tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+       buffer_info = &tx->buffer_info[tx->frame_tail];
+
+       /* add extension descriptor */
+       tx_descriptor->data1 = 0;
+       tx_descriptor->data2 = 0;
+       tx_descriptor->data3 = 0;
+
+       buffer_info->skb = NULL;
+       buffer_info->dma_ptr = 0;
+       buffer_info->buffer_length = 0;
+       buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+
+       tx->frame_data0 = (frame_length & TX_DESC_DATA0_EXT_PAY_LENGTH_MASK_) |
+                         TX_DESC_DATA0_DTYPE_EXT_ |
+                         TX_DESC_DATA0_EXT_LSO_;
+
+       /* data0 will be programmed in one of other frame assembler functions */
+}
+
+static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx,
+                                        const struct skb_frag_struct *fragment,
+                                        unsigned int frame_length)
+{
+       /* called only from within lan743x_tx_xmit_frame
+        * assuming tx->ring_lock has already been acquired
+        */
+       struct lan743x_tx_descriptor *tx_descriptor = NULL;
+       struct lan743x_tx_buffer_info *buffer_info = NULL;
+       struct lan743x_adapter *adapter = tx->adapter;
+       struct device *dev = &adapter->pdev->dev;
+       unsigned int fragment_length = 0;
+       dma_addr_t dma_ptr;
+
+       fragment_length = skb_frag_size(fragment);
+       if (!fragment_length)
+               return 0;
+
+       /* wrap up previous descriptor */
+       tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+       tx_descriptor->data0 = tx->frame_data0;
+
+       /* move to next descriptor */
+       tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+       tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+       buffer_info = &tx->buffer_info[tx->frame_tail];
+       dma_ptr = skb_frag_dma_map(dev, fragment,
+                                  0, fragment_length,
+                                  DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, dma_ptr)) {
+               int desc_index;
+
+               /* cleanup all previously setup descriptors */
+               desc_index = tx->frame_first;
+               while (desc_index != tx->frame_tail) {
+                       lan743x_tx_release_desc(tx, desc_index, true);
+                       desc_index = lan743x_tx_next_index(tx, desc_index);
+               }
+               dma_wmb();
+               tx->frame_flags &= ~TX_FRAME_FLAG_IN_PROGRESS;
+               tx->frame_first = 0;
+               tx->frame_data0 = 0;
+               tx->frame_tail = 0;
+               return -ENOMEM;
+       }
+
+       tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr);
+       tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr);
+       tx_descriptor->data3 = (frame_length << 16) &
+                              TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_;
+
+       buffer_info->skb = NULL;
+       buffer_info->dma_ptr = dma_ptr;
+       buffer_info->buffer_length = fragment_length;
+       buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+       buffer_info->flags |= TX_BUFFER_INFO_FLAG_SKB_FRAGMENT;
+
+       tx->frame_data0 = (fragment_length & TX_DESC_DATA0_BUF_LENGTH_MASK_) |
+                         TX_DESC_DATA0_DTYPE_DATA_ |
+                         TX_DESC_DATA0_FCS_;
+
+       /* data0 will be programmed in one of other frame assembler functions */
+       return 0;
+}
+
+static void lan743x_tx_frame_end(struct lan743x_tx *tx,
+                                struct sk_buff *skb,
+                                bool ignore_sync)
+{
+       /* called only from within lan743x_tx_xmit_frame
+        * assuming tx->ring_lock has already been acquired
+        */
+       struct lan743x_tx_descriptor *tx_descriptor = NULL;
+       struct lan743x_tx_buffer_info *buffer_info = NULL;
+       struct lan743x_adapter *adapter = tx->adapter;
+       u32 tx_tail_flags = 0;
+
+       /* wrap up previous descriptor */
+       tx->frame_data0 |= TX_DESC_DATA0_LS_;
+       tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+
+       tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+       buffer_info = &tx->buffer_info[tx->frame_tail];
+       buffer_info->skb = skb;
+       if (ignore_sync)
+               buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC;
+
+       tx_descriptor->data0 = tx->frame_data0;
+       tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+       tx->last_tail = tx->frame_tail;
+
+       dma_wmb();
+
+       if (tx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET)
+               tx_tail_flags |= TX_TAIL_SET_TOP_INT_VEC_EN_;
+       if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET)
+               tx_tail_flags |= TX_TAIL_SET_DMAC_INT_EN_ |
+               TX_TAIL_SET_TOP_INT_EN_;
+
+       lan743x_csr_write(adapter, TX_TAIL(tx->channel_number),
+                         tx_tail_flags | tx->frame_tail);
+       tx->frame_flags &= ~TX_FRAME_FLAG_IN_PROGRESS;
+}
+
+static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
+                                        struct sk_buff *skb)
+{
+       int required_number_of_descriptors = 0;
+       unsigned int start_frame_length = 0;
+       unsigned int frame_length = 0;
+       unsigned int head_length = 0;
+       unsigned long irq_flags = 0;
+       bool ignore_sync = false;
+       int nr_frags = 0;
+       bool gso = false;
+       int j;
+
+       required_number_of_descriptors = lan743x_tx_get_desc_cnt(tx, skb);
+
+       spin_lock_irqsave(&tx->ring_lock, irq_flags);
+       if (required_number_of_descriptors >
+               lan743x_tx_get_avail_desc(tx)) {
+               if (required_number_of_descriptors > (tx->ring_size - 1)) {
+                       dev_kfree_skb(skb);
+               } else {
+                       /* save to overflow buffer */
+                       tx->overflow_skb = skb;
+                       netif_stop_queue(tx->adapter->netdev);
+               }
+               goto unlock;
+       }
+
+       /* space available, transmit skb  */
+       head_length = skb_headlen(skb);
+       frame_length = skb_pagelen(skb);
+       nr_frags = skb_shinfo(skb)->nr_frags;
+       start_frame_length = frame_length;
+       gso = skb_is_gso(skb);
+       if (gso) {
+               start_frame_length = max(skb_shinfo(skb)->gso_size,
+                                        (unsigned short)8);
+       }
+
+       if (lan743x_tx_frame_start(tx,
+                                  skb->data, head_length,
+                                  start_frame_length,
+                                  skb->ip_summed == CHECKSUM_PARTIAL)) {
+               dev_kfree_skb(skb);
+               goto unlock;
+       }
+
+       if (gso)
+               lan743x_tx_frame_add_lso(tx, frame_length);
+
+       if (nr_frags <= 0)
+               goto finish;
+
+       for (j = 0; j < nr_frags; j++) {
+               const struct skb_frag_struct *frag;
+
+               frag = &(skb_shinfo(skb)->frags[j]);
+               if (lan743x_tx_frame_add_fragment(tx, frag, frame_length)) {
+                       /* upon error no need to call
+                        *      lan743x_tx_frame_end
+                        * frame assembler clean up was performed inside
+                        *      lan743x_tx_frame_add_fragment
+                        */
+                       dev_kfree_skb(skb);
+                       goto unlock;
+               }
+       }
+
+finish:
+       lan743x_tx_frame_end(tx, skb, ignore_sync);
+
+unlock:
+       spin_unlock_irqrestore(&tx->ring_lock, irq_flags);
+       return NETDEV_TX_OK;
+}
+
+static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
+{
+       struct lan743x_tx *tx = container_of(napi, struct lan743x_tx, napi);
+       struct lan743x_adapter *adapter = tx->adapter;
+       bool start_transmitter = false;
+       unsigned long irq_flags = 0;
+       u32 ioc_bit = 0;
+       u32 int_sts = 0;
+
+       ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
+       int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+       if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C)
+               lan743x_csr_write(adapter, DMAC_INT_STS, ioc_bit);
+       spin_lock_irqsave(&tx->ring_lock, irq_flags);
+
+       /* clean up tx ring */
+       lan743x_tx_release_completed_descriptors(tx);
+       if (netif_queue_stopped(adapter->netdev)) {
+               if (tx->overflow_skb) {
+                       if (lan743x_tx_get_desc_cnt(tx, tx->overflow_skb) <=
+                               lan743x_tx_get_avail_desc(tx))
+                               start_transmitter = true;
+               } else {
+                       netif_wake_queue(adapter->netdev);
+               }
+       }
+       spin_unlock_irqrestore(&tx->ring_lock, irq_flags);
+
+       if (start_transmitter) {
+               /* space is now available, transmit overflow skb */
+               lan743x_tx_xmit_frame(tx, tx->overflow_skb);
+               tx->overflow_skb = NULL;
+               netif_wake_queue(adapter->netdev);
+       }
+
+       if (!napi_complete_done(napi, weight))
+               goto done;
+
+       /* enable isr */
+       lan743x_csr_write(adapter, INT_EN_SET,
+                         INT_BIT_DMA_TX_(tx->channel_number));
+       lan743x_csr_read(adapter, INT_STS);
+
+done:
+       return weight;
+}
+
+static void lan743x_tx_ring_cleanup(struct lan743x_tx *tx)
+{
+       if (tx->head_cpu_ptr) {
+               pci_free_consistent(tx->adapter->pdev,
+                                   sizeof(*tx->head_cpu_ptr),
+                                   (void *)(tx->head_cpu_ptr),
+                                   tx->head_dma_ptr);
+               tx->head_cpu_ptr = NULL;
+               tx->head_dma_ptr = 0;
+       }
+       kfree(tx->buffer_info);
+       tx->buffer_info = NULL;
+
+       if (tx->ring_cpu_ptr) {
+               pci_free_consistent(tx->adapter->pdev,
+                                   tx->ring_allocation_size,
+                                   tx->ring_cpu_ptr,
+                                   tx->ring_dma_ptr);
+               tx->ring_allocation_size = 0;
+               tx->ring_cpu_ptr = NULL;
+               tx->ring_dma_ptr = 0;
+       }
+       tx->ring_size = 0;
+}
+
+static int lan743x_tx_ring_init(struct lan743x_tx *tx)
+{
+       size_t ring_allocation_size = 0;
+       void *cpu_ptr = NULL;
+       dma_addr_t dma_ptr;
+       int ret = -ENOMEM;
+
+       tx->ring_size = LAN743X_TX_RING_SIZE;
+       if (tx->ring_size & ~TX_CFG_B_TX_RING_LEN_MASK_) {
+               ret = -EINVAL;
+               goto cleanup;
+       }
+       ring_allocation_size = ALIGN(tx->ring_size *
+                                    sizeof(struct lan743x_tx_descriptor),
+                                    PAGE_SIZE);
+       dma_ptr = 0;
+       cpu_ptr = pci_zalloc_consistent(tx->adapter->pdev,
+                                       ring_allocation_size, &dma_ptr);
+       if (!cpu_ptr) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }
+
+       tx->ring_allocation_size = ring_allocation_size;
+       tx->ring_cpu_ptr = (struct lan743x_tx_descriptor *)cpu_ptr;
+       tx->ring_dma_ptr = dma_ptr;
+
+       cpu_ptr = kcalloc(tx->ring_size, sizeof(*tx->buffer_info), GFP_KERNEL);
+       if (!cpu_ptr) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }
+       tx->buffer_info = (struct lan743x_tx_buffer_info *)cpu_ptr;
+       dma_ptr = 0;
+       cpu_ptr = pci_zalloc_consistent(tx->adapter->pdev,
+                                       sizeof(*tx->head_cpu_ptr), &dma_ptr);
+       if (!cpu_ptr) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }
+
+       tx->head_cpu_ptr = cpu_ptr;
+       tx->head_dma_ptr = dma_ptr;
+       if (tx->head_dma_ptr & 0x3) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }
+
+       return 0;
+
+cleanup:
+       lan743x_tx_ring_cleanup(tx);
+       return ret;
+}
+
+static void lan743x_tx_close(struct lan743x_tx *tx)
+{
+       struct lan743x_adapter *adapter = tx->adapter;
+
+       lan743x_csr_write(adapter,
+                         DMAC_CMD,
+                         DMAC_CMD_STOP_T_(tx->channel_number));
+       lan743x_dmac_tx_wait_till_stopped(adapter, tx->channel_number);
+
+       lan743x_csr_write(adapter,
+                         DMAC_INT_EN_CLR,
+                         DMAC_INT_BIT_TX_IOC_(tx->channel_number));
+       lan743x_csr_write(adapter, INT_EN_CLR,
+                         INT_BIT_DMA_TX_(tx->channel_number));
+       napi_disable(&tx->napi);
+       netif_napi_del(&tx->napi);
+
+       lan743x_csr_write(adapter, FCT_TX_CTL,
+                         FCT_TX_CTL_DIS_(tx->channel_number));
+       lan743x_csr_wait_for_bit(adapter, FCT_TX_CTL,
+                                FCT_TX_CTL_EN_(tx->channel_number),
+                                0, 1000, 20000, 100);
+
+       lan743x_tx_release_all_descriptors(tx);
+
+       if (tx->overflow_skb) {
+               dev_kfree_skb(tx->overflow_skb);
+               tx->overflow_skb = NULL;
+       }
+
+       lan743x_tx_ring_cleanup(tx);
+}
+
+static int lan743x_tx_open(struct lan743x_tx *tx)
+{
+       struct lan743x_adapter *adapter = NULL;
+       u32 data = 0;
+       int ret;
+
+       adapter = tx->adapter;
+       ret = lan743x_tx_ring_init(tx);
+       if (ret)
+               return ret;
+
+       /* initialize fifo */
+       lan743x_csr_write(adapter, FCT_TX_CTL,
+                         FCT_TX_CTL_RESET_(tx->channel_number));
+       lan743x_csr_wait_for_bit(adapter, FCT_TX_CTL,
+                                FCT_TX_CTL_RESET_(tx->channel_number),
+                                0, 1000, 20000, 100);
+
+       /* enable fifo */
+       lan743x_csr_write(adapter, FCT_TX_CTL,
+                         FCT_TX_CTL_EN_(tx->channel_number));
+
+       /* reset tx channel */
+       lan743x_csr_write(adapter, DMAC_CMD,
+                         DMAC_CMD_TX_SWR_(tx->channel_number));
+       lan743x_csr_wait_for_bit(adapter, DMAC_CMD,
+                                DMAC_CMD_TX_SWR_(tx->channel_number),
+                                0, 1000, 20000, 100);
+
+       /* Write TX_BASE_ADDR */
+       lan743x_csr_write(adapter,
+                         TX_BASE_ADDRH(tx->channel_number),
+                         DMA_ADDR_HIGH32(tx->ring_dma_ptr));
+       lan743x_csr_write(adapter,
+                         TX_BASE_ADDRL(tx->channel_number),
+                         DMA_ADDR_LOW32(tx->ring_dma_ptr));
+
+       /* Write TX_CFG_B */
+       data = lan743x_csr_read(adapter, TX_CFG_B(tx->channel_number));
+       data &= ~TX_CFG_B_TX_RING_LEN_MASK_;
+       data |= ((tx->ring_size) & TX_CFG_B_TX_RING_LEN_MASK_);
+       if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+               data |= TX_CFG_B_TDMABL_512_;
+       lan743x_csr_write(adapter, TX_CFG_B(tx->channel_number), data);
+
+       /* Write TX_CFG_A */
+       data = TX_CFG_A_TX_TMR_HPWB_SEL_IOC_ | TX_CFG_A_TX_HP_WB_EN_;
+       if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+               data |= TX_CFG_A_TX_HP_WB_ON_INT_TMR_;
+               data |= TX_CFG_A_TX_PF_THRES_SET_(0x10);
+               data |= TX_CFG_A_TX_PF_PRI_THRES_SET_(0x04);
+               data |= TX_CFG_A_TX_HP_WB_THRES_SET_(0x07);
+       }
+       lan743x_csr_write(adapter, TX_CFG_A(tx->channel_number), data);
+
+       /* Write TX_HEAD_WRITEBACK_ADDR */
+       lan743x_csr_write(adapter,
+                         TX_HEAD_WRITEBACK_ADDRH(tx->channel_number),
+                         DMA_ADDR_HIGH32(tx->head_dma_ptr));
+       lan743x_csr_write(adapter,
+                         TX_HEAD_WRITEBACK_ADDRL(tx->channel_number),
+                         DMA_ADDR_LOW32(tx->head_dma_ptr));
+
+       /* set last head */
+       tx->last_head = lan743x_csr_read(adapter, TX_HEAD(tx->channel_number));
+
+       /* write TX_TAIL */
+       tx->last_tail = 0;
+       lan743x_csr_write(adapter, TX_TAIL(tx->channel_number),
+                         (u32)(tx->last_tail));
+       tx->vector_flags = lan743x_intr_get_vector_flags(adapter,
+                                                        INT_BIT_DMA_TX_
+                                                        (tx->channel_number));
+       netif_napi_add(adapter->netdev,
+                      &tx->napi, lan743x_tx_napi_poll,
+                      tx->ring_size - 1);
+       napi_enable(&tx->napi);
+
+       data = 0;
+       if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR)
+               data |= TX_CFG_C_TX_TOP_INT_EN_AUTO_CLR_;
+       if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR)
+               data |= TX_CFG_C_TX_DMA_INT_STS_AUTO_CLR_;
+       if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C)
+               data |= TX_CFG_C_TX_INT_STS_R2C_MODE_MASK_;
+       if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)
+               data |= TX_CFG_C_TX_INT_EN_R2C_;
+       lan743x_csr_write(adapter, TX_CFG_C(tx->channel_number), data);
+
+       if (!(tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET))
+               lan743x_csr_write(adapter, INT_EN_SET,
+                                 INT_BIT_DMA_TX_(tx->channel_number));
+       lan743x_csr_write(adapter, DMAC_INT_EN_SET,
+                         DMAC_INT_BIT_TX_IOC_(tx->channel_number));
+
+       /*  start dmac channel */
+       lan743x_csr_write(adapter, DMAC_CMD,
+                         DMAC_CMD_START_T_(tx->channel_number));
+       return 0;
+}
+
+static int lan743x_rx_next_index(struct lan743x_rx *rx, int index)
+{
+       return ((++index) % rx->ring_size);
+}
+
+static int lan743x_rx_allocate_ring_element(struct lan743x_rx *rx, int index)
+{
+       struct lan743x_rx_buffer_info *buffer_info;
+       struct lan743x_rx_descriptor *descriptor;
+       int length = 0;
+
+       length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING);
+       descriptor = &rx->ring_cpu_ptr[index];
+       buffer_info = &rx->buffer_info[index];
+       buffer_info->skb = __netdev_alloc_skb(rx->adapter->netdev,
+                                             length,
+                                             GFP_ATOMIC | GFP_DMA);
+       if (!(buffer_info->skb))
+               return -ENOMEM;
+       buffer_info->dma_ptr = dma_map_single(&rx->adapter->pdev->dev,
+                                             buffer_info->skb->data,
+                                             length,
+                                             DMA_FROM_DEVICE);
+       if (dma_mapping_error(&rx->adapter->pdev->dev,
+                             buffer_info->dma_ptr)) {
+               buffer_info->dma_ptr = 0;
+               return -ENOMEM;
+       }
+
+       buffer_info->buffer_length = length;
+       descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr);
+       descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr);
+       descriptor->data3 = 0;
+       descriptor->data0 = (RX_DESC_DATA0_OWN_ |
+                           (length & RX_DESC_DATA0_BUF_LENGTH_MASK_));
+       skb_reserve(buffer_info->skb, RX_HEAD_PADDING);
+
+       return 0;
+}
+
+static void lan743x_rx_reuse_ring_element(struct lan743x_rx *rx, int index)
+{
+       struct lan743x_rx_buffer_info *buffer_info;
+       struct lan743x_rx_descriptor *descriptor;
+
+       descriptor = &rx->ring_cpu_ptr[index];
+       buffer_info = &rx->buffer_info[index];
+
+       descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr);
+       descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr);
+       descriptor->data3 = 0;
+       descriptor->data0 = (RX_DESC_DATA0_OWN_ |
+                           ((buffer_info->buffer_length) &
+                           RX_DESC_DATA0_BUF_LENGTH_MASK_));
+}
+
+static void lan743x_rx_release_ring_element(struct lan743x_rx *rx, int index)
+{
+       struct lan743x_rx_buffer_info *buffer_info;
+       struct lan743x_rx_descriptor *descriptor;
+
+       descriptor = &rx->ring_cpu_ptr[index];
+       buffer_info = &rx->buffer_info[index];
+
+       memset(descriptor, 0, sizeof(*descriptor));
+
+       if (buffer_info->dma_ptr) {
+               dma_unmap_single(&rx->adapter->pdev->dev,
+                                buffer_info->dma_ptr,
+                                buffer_info->buffer_length,
+                                DMA_FROM_DEVICE);
+               buffer_info->dma_ptr = 0;
+       }
+
+       if (buffer_info->skb) {
+               dev_kfree_skb(buffer_info->skb);
+               buffer_info->skb = NULL;
+       }
+
+       memset(buffer_info, 0, sizeof(*buffer_info));
+}
+
+static int lan743x_rx_process_packet(struct lan743x_rx *rx)
+{
+       struct skb_shared_hwtstamps *hwtstamps = NULL;
+       int result = RX_PROCESS_RESULT_NOTHING_TO_DO;
+       struct lan743x_rx_buffer_info *buffer_info;
+       struct lan743x_rx_descriptor *descriptor;
+       int current_head_index = -1;
+       int extension_index = -1;
+       int first_index = -1;
+       int last_index = -1;
+
+       current_head_index = *rx->head_cpu_ptr;
+       if (current_head_index < 0 || current_head_index >= rx->ring_size)
+               goto done;
+
+       if (rx->last_head < 0 || rx->last_head >= rx->ring_size)
+               goto done;
+
+       if (rx->last_head != current_head_index) {
+               descriptor = &rx->ring_cpu_ptr[rx->last_head];
+               if (descriptor->data0 & RX_DESC_DATA0_OWN_)
+                       goto done;
+
+               if (!(descriptor->data0 & RX_DESC_DATA0_FS_))
+                       goto done;
+
+               first_index = rx->last_head;
+               if (descriptor->data0 & RX_DESC_DATA0_LS_) {
+                       last_index = rx->last_head;
+               } else {
+                       int index;
+
+                       index = lan743x_rx_next_index(rx, first_index);
+                       while (index != current_head_index) {
+                               descriptor = &rx->ring_cpu_ptr[index];
+                               if (descriptor->data0 & RX_DESC_DATA0_OWN_)
+                                       goto done;
+
+                               if (descriptor->data0 & RX_DESC_DATA0_LS_) {
+                                       last_index = index;
+                                       break;
+                               }
+                               index = lan743x_rx_next_index(rx, index);
+                       }
+               }
+               if (last_index >= 0) {
+                       descriptor = &rx->ring_cpu_ptr[last_index];
+                       if (descriptor->data0 & RX_DESC_DATA0_EXT_) {
+                               /* extension is expected to follow */
+                               int index = lan743x_rx_next_index(rx,
+                                                                 last_index);
+                               if (index != current_head_index) {
+                                       descriptor = &rx->ring_cpu_ptr[index];
+                                       if (descriptor->data0 &
+                                           RX_DESC_DATA0_OWN_) {
+                                               goto done;
+                                       }
+                                       if (descriptor->data0 &
+                                           RX_DESC_DATA0_EXT_) {
+                                               extension_index = index;
+                                       } else {
+                                               goto done;
+                                       }
+                               } else {
+                                       /* extension is not yet available */
+                                       /* prevent processing of this packet */
+                                       first_index = -1;
+                                       last_index = -1;
+                               }
+                       }
+               }
+       }
+       if (first_index >= 0 && last_index >= 0) {
+               int real_last_index = last_index;
+               struct sk_buff *skb = NULL;
+               u32 ts_sec = 0;
+               u32 ts_nsec = 0;
+
+               /* packet is available */
+               if (first_index == last_index) {
+                       /* single buffer packet */
+                       int packet_length;
+
+                       buffer_info = &rx->buffer_info[first_index];
+                       skb = buffer_info->skb;
+                       descriptor = &rx->ring_cpu_ptr[first_index];
+
+                       /* unmap from dma */
+                       if (buffer_info->dma_ptr) {
+                               dma_unmap_single(&rx->adapter->pdev->dev,
+                                                buffer_info->dma_ptr,
+                                                buffer_info->buffer_length,
+                                                DMA_FROM_DEVICE);
+                               buffer_info->dma_ptr = 0;
+                               buffer_info->buffer_length = 0;
+                       }
+                       buffer_info->skb = NULL;
+                       packet_length = RX_DESC_DATA0_FRAME_LENGTH_GET_
+                                       (descriptor->data0);
+                       skb_put(skb, packet_length - 4);
+                       skb->protocol = eth_type_trans(skb,
+                                                      rx->adapter->netdev);
+                       lan743x_rx_allocate_ring_element(rx, first_index);
+               } else {
+                       int index = first_index;
+
+                       /* multi buffer packet not supported */
+                       /* this should not happen since
+                        * buffers are allocated to be at least jumbo size
+                        */
+
+                       /* clean up buffers */
+                       if (first_index <= last_index) {
+                               while ((index >= first_index) &&
+                                      (index <= last_index)) {
+                                       lan743x_rx_release_ring_element(rx,
+                                                                       index);
+                                       lan743x_rx_allocate_ring_element(rx,
+                                                                        index);
+                                       index = lan743x_rx_next_index(rx,
+                                                                     index);
+                               }
+                       } else {
+                               while ((index >= first_index) ||
+                                      (index <= last_index)) {
+                                       lan743x_rx_release_ring_element(rx,
+                                                                       index);
+                                       lan743x_rx_allocate_ring_element(rx,
+                                                                        index);
+                                       index = lan743x_rx_next_index(rx,
+                                                                     index);
+                               }
+                       }
+               }
+
+               if (extension_index >= 0) {
+                       descriptor = &rx->ring_cpu_ptr[extension_index];
+                       buffer_info = &rx->buffer_info[extension_index];
+
+                       ts_sec = descriptor->data1;
+                       ts_nsec = (descriptor->data2 &
+                                 RX_DESC_DATA2_TS_NS_MASK_);
+                       lan743x_rx_reuse_ring_element(rx, extension_index);
+                       real_last_index = extension_index;
+               }
+
+               if (!skb) {
+                       result = RX_PROCESS_RESULT_PACKET_DROPPED;
+                       goto move_forward;
+               }
+
+               if (extension_index < 0)
+                       goto pass_packet_to_os;
+               hwtstamps = skb_hwtstamps(skb);
+               if (hwtstamps)
+                       hwtstamps->hwtstamp = ktime_set(ts_sec, ts_nsec);
+
+pass_packet_to_os:
+               /* pass packet to OS */
+               napi_gro_receive(&rx->napi, skb);
+               result = RX_PROCESS_RESULT_PACKET_RECEIVED;
+
+move_forward:
+               /* push tail and head forward */
+               rx->last_tail = real_last_index;
+               rx->last_head = lan743x_rx_next_index(rx, real_last_index);
+       }
+done:
+       return result;
+}
+
+static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight)
+{
+       struct lan743x_rx *rx = container_of(napi, struct lan743x_rx, napi);
+       struct lan743x_adapter *adapter = rx->adapter;
+       u32 rx_tail_flags = 0;
+       int count;
+
+       if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C) {
+               /* clear int status bit before reading packet */
+               lan743x_csr_write(adapter, DMAC_INT_STS,
+                                 DMAC_INT_BIT_RXFRM_(rx->channel_number));
+       }
+       count = 0;
+       while (count < weight) {
+               int rx_process_result = -1;
+
+               rx_process_result = lan743x_rx_process_packet(rx);
+               if (rx_process_result == RX_PROCESS_RESULT_PACKET_RECEIVED) {
+                       count++;
+               } else if (rx_process_result ==
+                       RX_PROCESS_RESULT_NOTHING_TO_DO) {
+                       break;
+               } else if (rx_process_result ==
+                       RX_PROCESS_RESULT_PACKET_DROPPED) {
+                       continue;
+               }
+       }
+       rx->frame_count += count;
+       if (count == weight)
+               goto done;
+
+       if (!napi_complete_done(napi, count))
+               goto done;
+
+       if (rx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET)
+               rx_tail_flags |= RX_TAIL_SET_TOP_INT_VEC_EN_;
+       if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET) {
+               rx_tail_flags |= RX_TAIL_SET_TOP_INT_EN_;
+       } else {
+               lan743x_csr_write(adapter, INT_EN_SET,
+                                 INT_BIT_DMA_RX_(rx->channel_number));
+       }
+
+       /* update RX_TAIL */
+       lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
+                         rx_tail_flags | rx->last_tail);
+done:
+       return count;
+}
+
+static void lan743x_rx_ring_cleanup(struct lan743x_rx *rx)
+{
+       if (rx->buffer_info && rx->ring_cpu_ptr) {
+               int index;
+
+               for (index = 0; index < rx->ring_size; index++)
+                       lan743x_rx_release_ring_element(rx, index);
+       }
+
+       if (rx->head_cpu_ptr) {
+               pci_free_consistent(rx->adapter->pdev,
+                                   sizeof(*rx->head_cpu_ptr),
+                                   rx->head_cpu_ptr,
+                                   rx->head_dma_ptr);
+               rx->head_cpu_ptr = NULL;
+               rx->head_dma_ptr = 0;
+       }
+
+       kfree(rx->buffer_info);
+       rx->buffer_info = NULL;
+
+       if (rx->ring_cpu_ptr) {
+               pci_free_consistent(rx->adapter->pdev,
+                                   rx->ring_allocation_size,
+                                   rx->ring_cpu_ptr,
+                                   rx->ring_dma_ptr);
+               rx->ring_allocation_size = 0;
+               rx->ring_cpu_ptr = NULL;
+               rx->ring_dma_ptr = 0;
+       }
+
+       rx->ring_size = 0;
+       rx->last_head = 0;
+}
+
+static int lan743x_rx_ring_init(struct lan743x_rx *rx)
+{
+       size_t ring_allocation_size = 0;
+       dma_addr_t dma_ptr = 0;
+       void *cpu_ptr = NULL;
+       int ret = -ENOMEM;
+       int index = 0;
+
+       rx->ring_size = LAN743X_RX_RING_SIZE;
+       if (rx->ring_size <= 1) {
+               ret = -EINVAL;
+               goto cleanup;
+       }
+       if (rx->ring_size & ~RX_CFG_B_RX_RING_LEN_MASK_) {
+               ret = -EINVAL;
+               goto cleanup;
+       }
+       ring_allocation_size = ALIGN(rx->ring_size *
+                                    sizeof(struct lan743x_rx_descriptor),
+                                    PAGE_SIZE);
+       dma_ptr = 0;
+       cpu_ptr = pci_zalloc_consistent(rx->adapter->pdev,
+                                       ring_allocation_size, &dma_ptr);
+       if (!cpu_ptr) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }
+       rx->ring_allocation_size = ring_allocation_size;
+       rx->ring_cpu_ptr = (struct lan743x_rx_descriptor *)cpu_ptr;
+       rx->ring_dma_ptr = dma_ptr;
+
+       cpu_ptr = kcalloc(rx->ring_size, sizeof(*rx->buffer_info),
+                         GFP_KERNEL);
+       if (!cpu_ptr) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }
+       rx->buffer_info = (struct lan743x_rx_buffer_info *)cpu_ptr;
+       dma_ptr = 0;
+       cpu_ptr = pci_zalloc_consistent(rx->adapter->pdev,
+                                       sizeof(*rx->head_cpu_ptr), &dma_ptr);
+       if (!cpu_ptr) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }
+
+       rx->head_cpu_ptr = cpu_ptr;
+       rx->head_dma_ptr = dma_ptr;
+       if (rx->head_dma_ptr & 0x3) {
+               ret = -ENOMEM;
+               goto cleanup;
+       }
+
+       rx->last_head = 0;
+       for (index = 0; index < rx->ring_size; index++) {
+               ret = lan743x_rx_allocate_ring_element(rx, index);
+               if (ret)
+                       goto cleanup;
+       }
+       return 0;
+
+cleanup:
+       lan743x_rx_ring_cleanup(rx);
+       return ret;
+}
+
+static void lan743x_rx_close(struct lan743x_rx *rx)
+{
+       struct lan743x_adapter *adapter = rx->adapter;
+
+       lan743x_csr_write(adapter, FCT_RX_CTL,
+                         FCT_RX_CTL_DIS_(rx->channel_number));
+       lan743x_csr_wait_for_bit(adapter, FCT_RX_CTL,
+                                FCT_RX_CTL_EN_(rx->channel_number),
+                                0, 1000, 20000, 100);
+
+       lan743x_csr_write(adapter, DMAC_CMD,
+                         DMAC_CMD_STOP_R_(rx->channel_number));
+       lan743x_dmac_rx_wait_till_stopped(adapter, rx->channel_number);
+
+       lan743x_csr_write(adapter, DMAC_INT_EN_CLR,
+                         DMAC_INT_BIT_RXFRM_(rx->channel_number));
+       lan743x_csr_write(adapter, INT_EN_CLR,
+                         INT_BIT_DMA_RX_(rx->channel_number));
+       napi_disable(&rx->napi);
+
+       netif_napi_del(&rx->napi);
+
+       lan743x_rx_ring_cleanup(rx);
+}
+
+static int lan743x_rx_open(struct lan743x_rx *rx)
+{
+       struct lan743x_adapter *adapter = rx->adapter;
+       u32 data = 0;
+       int ret;
+
+       rx->frame_count = 0;
+       ret = lan743x_rx_ring_init(rx);
+       if (ret)
+               goto return_error;
+
+       netif_napi_add(adapter->netdev,
+                      &rx->napi, lan743x_rx_napi_poll,
+                      rx->ring_size - 1);
+
+       lan743x_csr_write(adapter, DMAC_CMD,
+                         DMAC_CMD_RX_SWR_(rx->channel_number));
+       lan743x_csr_wait_for_bit(adapter, DMAC_CMD,
+                                DMAC_CMD_RX_SWR_(rx->channel_number),
+                                0, 1000, 20000, 100);
+
+       /* set ring base address */
+       lan743x_csr_write(adapter,
+                         RX_BASE_ADDRH(rx->channel_number),
+                         DMA_ADDR_HIGH32(rx->ring_dma_ptr));
+       lan743x_csr_write(adapter,
+                         RX_BASE_ADDRL(rx->channel_number),
+                         DMA_ADDR_LOW32(rx->ring_dma_ptr));
+
+       /* set rx write back address */
+       lan743x_csr_write(adapter,
+                         RX_HEAD_WRITEBACK_ADDRH(rx->channel_number),
+                         DMA_ADDR_HIGH32(rx->head_dma_ptr));
+       lan743x_csr_write(adapter,
+                         RX_HEAD_WRITEBACK_ADDRL(rx->channel_number),
+                         DMA_ADDR_LOW32(rx->head_dma_ptr));
+       data = RX_CFG_A_RX_HP_WB_EN_;
+       if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+               data |= (RX_CFG_A_RX_WB_ON_INT_TMR_ |
+                       RX_CFG_A_RX_WB_THRES_SET_(0x7) |
+                       RX_CFG_A_RX_PF_THRES_SET_(16) |
+                       RX_CFG_A_RX_PF_PRI_THRES_SET_(4));
+       }
+
+       /* set RX_CFG_A */
+       lan743x_csr_write(adapter,
+                         RX_CFG_A(rx->channel_number), data);
+
+       /* set RX_CFG_B */
+       data = lan743x_csr_read(adapter, RX_CFG_B(rx->channel_number));
+       data &= ~RX_CFG_B_RX_PAD_MASK_;
+       if (!RX_HEAD_PADDING)
+               data |= RX_CFG_B_RX_PAD_0_;
+       else
+               data |= RX_CFG_B_RX_PAD_2_;
+       data &= ~RX_CFG_B_RX_RING_LEN_MASK_;
+       data |= ((rx->ring_size) & RX_CFG_B_RX_RING_LEN_MASK_);
+       data |= RX_CFG_B_TS_ALL_RX_;
+       if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+               data |= RX_CFG_B_RDMABL_512_;
+
+       lan743x_csr_write(adapter, RX_CFG_B(rx->channel_number), data);
+       rx->vector_flags = lan743x_intr_get_vector_flags(adapter,
+                                                        INT_BIT_DMA_RX_
+                                                        (rx->channel_number));
+
+       /* set RX_CFG_C */
+       data = 0;
+       if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR)
+               data |= RX_CFG_C_RX_TOP_INT_EN_AUTO_CLR_;
+       if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR)
+               data |= RX_CFG_C_RX_DMA_INT_STS_AUTO_CLR_;
+       if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C)
+               data |= RX_CFG_C_RX_INT_STS_R2C_MODE_MASK_;
+       if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)
+               data |= RX_CFG_C_RX_INT_EN_R2C_;
+       lan743x_csr_write(adapter, RX_CFG_C(rx->channel_number), data);
+
+       rx->last_tail = ((u32)(rx->ring_size - 1));
+       lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
+                         rx->last_tail);
+       rx->last_head = lan743x_csr_read(adapter, RX_HEAD(rx->channel_number));
+       if (rx->last_head) {
+               ret = -EIO;
+               goto napi_delete;
+       }
+
+       napi_enable(&rx->napi);
+
+       lan743x_csr_write(adapter, INT_EN_SET,
+                         INT_BIT_DMA_RX_(rx->channel_number));
+       lan743x_csr_write(adapter, DMAC_INT_STS,
+                         DMAC_INT_BIT_RXFRM_(rx->channel_number));
+       lan743x_csr_write(adapter, DMAC_INT_EN_SET,
+                         DMAC_INT_BIT_RXFRM_(rx->channel_number));
+       lan743x_csr_write(adapter, DMAC_CMD,
+                         DMAC_CMD_START_R_(rx->channel_number));
+
+       /* initialize fifo */
+       lan743x_csr_write(adapter, FCT_RX_CTL,
+                         FCT_RX_CTL_RESET_(rx->channel_number));
+       lan743x_csr_wait_for_bit(adapter, FCT_RX_CTL,
+                                FCT_RX_CTL_RESET_(rx->channel_number),
+                                0, 1000, 20000, 100);
+       lan743x_csr_write(adapter, FCT_FLOW(rx->channel_number),
+                         FCT_FLOW_CTL_REQ_EN_ |
+                         FCT_FLOW_CTL_ON_THRESHOLD_SET_(0x2A) |
+                         FCT_FLOW_CTL_OFF_THRESHOLD_SET_(0xA));
+
+       /* enable fifo */
+       lan743x_csr_write(adapter, FCT_RX_CTL,
+                         FCT_RX_CTL_EN_(rx->channel_number));
+       return 0;
+
+napi_delete:
+       netif_napi_del(&rx->napi);
+       lan743x_rx_ring_cleanup(rx);
+
+return_error:
+       return ret;
+}
+
+static int lan743x_netdev_close(struct net_device *netdev)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       int index;
+
+       lan743x_tx_close(&adapter->tx[0]);
+
+       for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++)
+               lan743x_rx_close(&adapter->rx[index]);
+
+       lan743x_phy_close(adapter);
+
+       lan743x_mac_close(adapter);
+
+       lan743x_intr_close(adapter);
+
+       return 0;
+}
+
+static int lan743x_netdev_open(struct net_device *netdev)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       int index;
+       int ret;
+
+       ret = lan743x_intr_open(adapter);
+       if (ret)
+               goto return_error;
+
+       ret = lan743x_mac_open(adapter);
+       if (ret)
+               goto close_intr;
+
+       ret = lan743x_phy_open(adapter);
+       if (ret)
+               goto close_mac;
+
+       for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+               ret = lan743x_rx_open(&adapter->rx[index]);
+               if (ret)
+                       goto close_rx;
+       }
+
+       ret = lan743x_tx_open(&adapter->tx[0]);
+       if (ret)
+               goto close_rx;
+
+       return 0;
+
+close_rx:
+       for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+               if (adapter->rx[index].ring_cpu_ptr)
+                       lan743x_rx_close(&adapter->rx[index]);
+       }
+       lan743x_phy_close(adapter);
+
+close_mac:
+       lan743x_mac_close(adapter);
+
+close_intr:
+       lan743x_intr_close(adapter);
+
+return_error:
+       netif_warn(adapter, ifup, adapter->netdev,
+                  "Error opening LAN743x\n");
+       return ret;
+}
+
+static netdev_tx_t lan743x_netdev_xmit_frame(struct sk_buff *skb,
+                                            struct net_device *netdev)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       return lan743x_tx_xmit_frame(&adapter->tx[0], skb);
+}
+
+static int lan743x_netdev_ioctl(struct net_device *netdev,
+                               struct ifreq *ifr, int cmd)
+{
+       if (!netif_running(netdev))
+               return -EINVAL;
+       return phy_mii_ioctl(netdev->phydev, ifr, cmd);
+}
+
+static void lan743x_netdev_set_multicast(struct net_device *netdev)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       lan743x_rfe_set_multicast(adapter);
+}
+
+static int lan743x_netdev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       int ret = 0;
+
+       ret = lan743x_mac_set_mtu(adapter, new_mtu);
+       if (!ret)
+               netdev->mtu = new_mtu;
+       return ret;
+}
+
+static void lan743x_netdev_get_stats64(struct net_device *netdev,
+                                      struct rtnl_link_stats64 *stats)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       stats->rx_packets = lan743x_csr_read(adapter, STAT_RX_TOTAL_FRAMES);
+       stats->tx_packets = lan743x_csr_read(adapter, STAT_TX_TOTAL_FRAMES);
+       stats->rx_bytes = lan743x_csr_read(adapter,
+                                          STAT_RX_UNICAST_BYTE_COUNT) +
+                         lan743x_csr_read(adapter,
+                                          STAT_RX_BROADCAST_BYTE_COUNT) +
+                         lan743x_csr_read(adapter,
+                                          STAT_RX_MULTICAST_BYTE_COUNT);
+       stats->tx_bytes = lan743x_csr_read(adapter,
+                                          STAT_TX_UNICAST_BYTE_COUNT) +
+                         lan743x_csr_read(adapter,
+                                          STAT_TX_BROADCAST_BYTE_COUNT) +
+                         lan743x_csr_read(adapter,
+                                          STAT_TX_MULTICAST_BYTE_COUNT);
+       stats->rx_errors = lan743x_csr_read(adapter, STAT_RX_FCS_ERRORS) +
+                          lan743x_csr_read(adapter,
+                                           STAT_RX_ALIGNMENT_ERRORS) +
+                          lan743x_csr_read(adapter, STAT_RX_JABBER_ERRORS) +
+                          lan743x_csr_read(adapter,
+                                           STAT_RX_UNDERSIZE_FRAME_ERRORS) +
+                          lan743x_csr_read(adapter,
+                                           STAT_RX_OVERSIZE_FRAME_ERRORS);
+       stats->tx_errors = lan743x_csr_read(adapter, STAT_TX_FCS_ERRORS) +
+                          lan743x_csr_read(adapter,
+                                           STAT_TX_EXCESS_DEFERRAL_ERRORS) +
+                          lan743x_csr_read(adapter, STAT_TX_CARRIER_ERRORS);
+       stats->rx_dropped = lan743x_csr_read(adapter,
+                                            STAT_RX_DROPPED_FRAMES);
+       stats->tx_dropped = lan743x_csr_read(adapter,
+                                            STAT_TX_EXCESSIVE_COLLISION);
+       stats->multicast = lan743x_csr_read(adapter,
+                                           STAT_RX_MULTICAST_FRAMES) +
+                          lan743x_csr_read(adapter,
+                                           STAT_TX_MULTICAST_FRAMES);
+       stats->collisions = lan743x_csr_read(adapter,
+                                            STAT_TX_SINGLE_COLLISIONS) +
+                           lan743x_csr_read(adapter,
+                                            STAT_TX_MULTIPLE_COLLISIONS) +
+                           lan743x_csr_read(adapter,
+                                            STAT_TX_LATE_COLLISIONS);
+}
+
+static int lan743x_netdev_set_mac_address(struct net_device *netdev,
+                                         void *addr)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       struct sockaddr *sock_addr = addr;
+       int ret;
+
+       ret = eth_prepare_mac_addr_change(netdev, sock_addr);
+       if (ret)
+               return ret;
+       ether_addr_copy(netdev->dev_addr, sock_addr->sa_data);
+       lan743x_mac_set_address(adapter, sock_addr->sa_data);
+       lan743x_rfe_update_mac_address(adapter);
+       return 0;
+}
+
+static const struct net_device_ops lan743x_netdev_ops = {
+       .ndo_open               = lan743x_netdev_open,
+       .ndo_stop               = lan743x_netdev_close,
+       .ndo_start_xmit         = lan743x_netdev_xmit_frame,
+       .ndo_do_ioctl           = lan743x_netdev_ioctl,
+       .ndo_set_rx_mode        = lan743x_netdev_set_multicast,
+       .ndo_change_mtu         = lan743x_netdev_change_mtu,
+       .ndo_get_stats64        = lan743x_netdev_get_stats64,
+       .ndo_set_mac_address    = lan743x_netdev_set_mac_address,
+};
+
+static void lan743x_hardware_cleanup(struct lan743x_adapter *adapter)
+{
+       lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
+}
+
+static void lan743x_mdiobus_cleanup(struct lan743x_adapter *adapter)
+{
+       mdiobus_unregister(adapter->mdiobus);
+}
+
+static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
+{
+       unregister_netdev(adapter->netdev);
+
+       lan743x_mdiobus_cleanup(adapter);
+       lan743x_hardware_cleanup(adapter);
+       lan743x_pci_cleanup(adapter);
+}
+
+static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+                                struct pci_dev *pdev)
+{
+       struct lan743x_tx *tx;
+       int index;
+       int ret;
+
+       adapter->intr.irq = adapter->pdev->irq;
+       lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
+       mutex_init(&adapter->dp_lock);
+       ret = lan743x_mac_init(adapter);
+       if (ret)
+               return ret;
+
+       ret = lan743x_phy_init(adapter);
+       if (ret)
+               return ret;
+
+       lan743x_rfe_update_mac_address(adapter);
+
+       ret = lan743x_dmac_init(adapter);
+       if (ret)
+               return ret;
+
+       for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+               adapter->rx[index].adapter = adapter;
+               adapter->rx[index].channel_number = index;
+       }
+
+       tx = &adapter->tx[0];
+       tx->adapter = adapter;
+       tx->channel_number = 0;
+       spin_lock_init(&tx->ring_lock);
+       return 0;
+}
+
+static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
+{
+       int ret;
+
+       adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev);
+       if (!(adapter->mdiobus)) {
+               ret = -ENOMEM;
+               goto return_error;
+       }
+
+       adapter->mdiobus->priv = (void *)adapter;
+       adapter->mdiobus->read = lan743x_mdiobus_read;
+       adapter->mdiobus->write = lan743x_mdiobus_write;
+       adapter->mdiobus->name = "lan743x-mdiobus";
+       snprintf(adapter->mdiobus->id, MII_BUS_ID_SIZE,
+                "pci-%s", pci_name(adapter->pdev));
+
+       /* set to internal PHY id */
+       adapter->mdiobus->phy_mask = ~(u32)BIT(1);
+
+       /* register mdiobus */
+       ret = mdiobus_register(adapter->mdiobus);
+       if (ret < 0)
+               goto return_error;
+       return 0;
+
+return_error:
+       return ret;
+}
+
+/* lan743x_pcidev_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @id: entry in lan743x_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int lan743x_pcidev_probe(struct pci_dev *pdev,
+                               const struct pci_device_id *id)
+{
+       struct lan743x_adapter *adapter = NULL;
+       struct net_device *netdev = NULL;
+       int ret = -ENODEV;
+
+       netdev = devm_alloc_etherdev(&pdev->dev,
+                                    sizeof(struct lan743x_adapter));
+       if (!netdev)
+               goto return_error;
+
+       SET_NETDEV_DEV(netdev, &pdev->dev);
+       pci_set_drvdata(pdev, netdev);
+       adapter = netdev_priv(netdev);
+       adapter->netdev = netdev;
+       adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE |
+                             NETIF_MSG_LINK | NETIF_MSG_IFUP |
+                             NETIF_MSG_IFDOWN | NETIF_MSG_TX_QUEUED;
+       netdev->max_mtu = LAN743X_MAX_FRAME_SIZE;
+
+       ret = lan743x_pci_init(adapter, pdev);
+       if (ret)
+               goto return_error;
+
+       ret = lan743x_csr_init(adapter);
+       if (ret)
+               goto cleanup_pci;
+
+       ret = lan743x_hardware_init(adapter, pdev);
+       if (ret)
+               goto cleanup_pci;
+
+       ret = lan743x_mdiobus_init(adapter);
+       if (ret)
+               goto cleanup_hardware;
+
+       adapter->netdev->netdev_ops = &lan743x_netdev_ops;
+       adapter->netdev->features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_CSUM;
+       adapter->netdev->hw_features = adapter->netdev->features;
+
+       /* carrier off reporting is important to ethtool even BEFORE open */
+       netif_carrier_off(netdev);
+
+       ret = register_netdev(adapter->netdev);
+       if (ret < 0)
+               goto cleanup_mdiobus;
+       return 0;
+
+cleanup_mdiobus:
+       lan743x_mdiobus_cleanup(adapter);
+
+cleanup_hardware:
+       lan743x_hardware_cleanup(adapter);
+
+cleanup_pci:
+       lan743x_pci_cleanup(adapter);
+
+return_error:
+       pr_warn("Initialization failed\n");
+       return ret;
+}
+
+/**
+ * lan743x_pcidev_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * this is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void lan743x_pcidev_remove(struct pci_dev *pdev)
+{
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       lan743x_full_cleanup(adapter);
+}
+
+static void lan743x_pcidev_shutdown(struct pci_dev *pdev)
+{
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       rtnl_lock();
+       netif_device_detach(netdev);
+
+       /* close netdev when netdev is at running state.
+        * For instance, it is true when system goes to sleep by pm-suspend
+        * However, it is false when system goes to sleep by suspend GUI menu
+        */
+       if (netif_running(netdev))
+               lan743x_netdev_close(netdev);
+       rtnl_unlock();
+
+       /* clean up lan743x portion */
+       lan743x_hardware_cleanup(adapter);
+}
+
+static const struct pci_device_id lan743x_pcidev_tbl[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) },
+       { 0, }
+};
+
+static struct pci_driver lan743x_pcidev_driver = {
+       .name     = DRIVER_NAME,
+       .id_table = lan743x_pcidev_tbl,
+       .probe    = lan743x_pcidev_probe,
+       .remove   = lan743x_pcidev_remove,
+       .shutdown = lan743x_pcidev_shutdown,
+};
+
+module_pci_driver(lan743x_pcidev_driver);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
new file mode 100644 (file)
index 0000000..73b463a
--- /dev/null
@@ -0,0 +1,597 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#ifndef _LAN743X_H
+#define _LAN743X_H
+
+#define DRIVER_AUTHOR   "Bryan Whitehead <Bryan.Whitehead@microchip.com>"
+#define DRIVER_DESC "LAN743x PCIe Gigabit Ethernet Driver"
+#define DRIVER_NAME "lan743x"
+
+/* Register Definitions */
+#define ID_REV                         (0x00)
+#define ID_REV_IS_VALID_CHIP_ID_(id_rev)       \
+       (((id_rev) & 0xFFF00000) == 0x74300000)
+#define ID_REV_CHIP_REV_MASK_          (0x0000FFFF)
+#define ID_REV_CHIP_REV_A0_            (0x00000000)
+#define ID_REV_CHIP_REV_B0_            (0x00000010)
+
+#define FPGA_REV                       (0x04)
+#define FPGA_REV_GET_MINOR_(fpga_rev)  (((fpga_rev) >> 8) & 0x000000FF)
+#define FPGA_REV_GET_MAJOR_(fpga_rev)  ((fpga_rev) & 0x000000FF)
+
+#define HW_CFG                                 (0x010)
+#define HW_CFG_LRST_                           BIT(1)
+
+#define PMT_CTL                                        (0x014)
+#define PMT_CTL_READY_                         BIT(7)
+#define PMT_CTL_ETH_PHY_RST_                   BIT(4)
+
+#define DP_SEL                         (0x024)
+#define DP_SEL_DPRDY_                  BIT(31)
+#define DP_SEL_MASK_                   (0x0000001F)
+#define DP_SEL_RFE_RAM                 (0x00000001)
+
+#define DP_SEL_VHF_HASH_LEN            (16)
+#define DP_SEL_VHF_VLAN_LEN            (128)
+
+#define DP_CMD                         (0x028)
+#define DP_CMD_WRITE_                  (0x00000001)
+
+#define DP_ADDR                                (0x02C)
+
+#define DP_DATA_0                      (0x030)
+
+#define FCT_RX_CTL                     (0xAC)
+#define FCT_RX_CTL_EN_(channel)                BIT(28 + (channel))
+#define FCT_RX_CTL_DIS_(channel)       BIT(24 + (channel))
+#define FCT_RX_CTL_RESET_(channel)     BIT(20 + (channel))
+
+#define FCT_TX_CTL                     (0xC4)
+#define FCT_TX_CTL_EN_(channel)                BIT(28 + (channel))
+#define FCT_TX_CTL_DIS_(channel)       BIT(24 + (channel))
+#define FCT_TX_CTL_RESET_(channel)     BIT(20 + (channel))
+
+#define FCT_FLOW(rx_channel)                   (0xE0 + ((rx_channel) << 2))
+#define FCT_FLOW_CTL_OFF_THRESHOLD_            (0x00007F00)
+#define FCT_FLOW_CTL_OFF_THRESHOLD_SET_(value) \
+       ((value << 8) & FCT_FLOW_CTL_OFF_THRESHOLD_)
+#define FCT_FLOW_CTL_REQ_EN_                   BIT(7)
+#define FCT_FLOW_CTL_ON_THRESHOLD_             (0x0000007F)
+#define FCT_FLOW_CTL_ON_THRESHOLD_SET_(value)  \
+       ((value << 0) & FCT_FLOW_CTL_ON_THRESHOLD_)
+
+#define MAC_CR                         (0x100)
+#define MAC_CR_ADD_                    BIT(12)
+#define MAC_CR_ASD_                    BIT(11)
+#define MAC_CR_CNTR_RST_               BIT(5)
+#define MAC_CR_RST_                    BIT(0)
+
+#define MAC_RX                         (0x104)
+#define MAC_RX_MAX_SIZE_SHIFT_         (16)
+#define MAC_RX_MAX_SIZE_MASK_          (0x3FFF0000)
+#define MAC_RX_RXD_                    BIT(1)
+#define MAC_RX_RXEN_                   BIT(0)
+
+#define MAC_TX                         (0x108)
+#define MAC_TX_TXD_                    BIT(1)
+#define MAC_TX_TXEN_                   BIT(0)
+
+#define MAC_FLOW                       (0x10C)
+#define MAC_FLOW_CR_TX_FCEN_           BIT(30)
+#define MAC_FLOW_CR_RX_FCEN_           BIT(29)
+#define MAC_FLOW_CR_FCPT_MASK_         (0x0000FFFF)
+
+#define MAC_RX_ADDRH                   (0x118)
+
+#define MAC_RX_ADDRL                   (0x11C)
+
+#define MAC_MII_ACC                    (0x120)
+#define MAC_MII_ACC_PHY_ADDR_SHIFT_    (11)
+#define MAC_MII_ACC_PHY_ADDR_MASK_     (0x0000F800)
+#define MAC_MII_ACC_MIIRINDA_SHIFT_    (6)
+#define MAC_MII_ACC_MIIRINDA_MASK_     (0x000007C0)
+#define MAC_MII_ACC_MII_READ_          (0x00000000)
+#define MAC_MII_ACC_MII_WRITE_         (0x00000002)
+#define MAC_MII_ACC_MII_BUSY_          BIT(0)
+
+#define MAC_MII_DATA                   (0x124)
+
+/* offset 0x400 - 0x500, x may range from 0 to 32, for a total of 33 entries */
+#define RFE_ADDR_FILT_HI(x)            (0x400 + (8 * (x)))
+#define RFE_ADDR_FILT_HI_VALID_                BIT(31)
+
+/* offset 0x404 - 0x504, x may range from 0 to 32, for a total of 33 entries */
+#define RFE_ADDR_FILT_LO(x)            (0x404 + (8 * (x)))
+
+#define RFE_CTL                                (0x508)
+#define RFE_CTL_AB_                    BIT(10)
+#define RFE_CTL_AM_                    BIT(9)
+#define RFE_CTL_AU_                    BIT(8)
+#define RFE_CTL_MCAST_HASH_            BIT(3)
+#define RFE_CTL_DA_PERFECT_            BIT(1)
+
+#define INT_STS                                (0x780)
+#define INT_BIT_DMA_RX_(channel)       BIT(24 + (channel))
+#define INT_BIT_ALL_RX_                        (0x0F000000)
+#define INT_BIT_DMA_TX_(channel)       BIT(16 + (channel))
+#define INT_BIT_ALL_TX_                        (0x000F0000)
+#define INT_BIT_SW_GP_                 BIT(9)
+#define INT_BIT_ALL_OTHER_             (0x00000280)
+#define INT_BIT_MAS_                   BIT(0)
+
+#define INT_SET                                (0x784)
+
+#define INT_EN_SET                     (0x788)
+
+#define INT_EN_CLR                     (0x78C)
+
+#define INT_STS_R2C                    (0x790)
+
+#define INT_VEC_EN_SET                 (0x794)
+#define INT_VEC_EN_CLR                 (0x798)
+#define INT_VEC_EN_AUTO_CLR            (0x79C)
+#define INT_VEC_EN_(vector_index)      BIT(0 + vector_index)
+
+#define INT_VEC_MAP0                   (0x7A0)
+#define INT_VEC_MAP0_RX_VEC_(channel, vector)  \
+       (((u32)(vector)) << ((channel) << 2))
+
+#define INT_VEC_MAP1                   (0x7A4)
+#define INT_VEC_MAP1_TX_VEC_(channel, vector)  \
+       (((u32)(vector)) << ((channel) << 2))
+
+#define INT_VEC_MAP2                   (0x7A8)
+
+#define INT_MOD_MAP0                   (0x7B0)
+
+#define INT_MOD_MAP1                   (0x7B4)
+
+#define INT_MOD_MAP2                   (0x7B8)
+
+#define INT_MOD_CFG0                   (0x7C0)
+#define INT_MOD_CFG1                   (0x7C4)
+#define INT_MOD_CFG2                   (0x7C8)
+#define INT_MOD_CFG3                   (0x7CC)
+#define INT_MOD_CFG4                   (0x7D0)
+#define INT_MOD_CFG5                   (0x7D4)
+#define INT_MOD_CFG6                   (0x7D8)
+#define INT_MOD_CFG7                   (0x7DC)
+
+#define DMAC_CFG                               (0xC00)
+#define DMAC_CFG_COAL_EN_                      BIT(16)
+#define DMAC_CFG_CH_ARB_SEL_RX_HIGH_           (0x00000000)
+#define DMAC_CFG_MAX_READ_REQ_MASK_            (0x00000070)
+#define DMAC_CFG_MAX_READ_REQ_SET_(val)        \
+       ((((u32)(val)) << 4) & DMAC_CFG_MAX_READ_REQ_MASK_)
+#define DMAC_CFG_MAX_DSPACE_16_                        (0x00000000)
+#define DMAC_CFG_MAX_DSPACE_32_                        (0x00000001)
+#define DMAC_CFG_MAX_DSPACE_64_                        BIT(1)
+#define DMAC_CFG_MAX_DSPACE_128_               (0x00000003)
+
+#define DMAC_COAL_CFG                          (0xC04)
+#define DMAC_COAL_CFG_TIMER_LIMIT_MASK_                (0xFFF00000)
+#define DMAC_COAL_CFG_TIMER_LIMIT_SET_(val)    \
+       ((((u32)(val)) << 20) & DMAC_COAL_CFG_TIMER_LIMIT_MASK_)
+#define DMAC_COAL_CFG_TIMER_TX_START_          BIT(19)
+#define DMAC_COAL_CFG_FLUSH_INTS_              BIT(18)
+#define DMAC_COAL_CFG_INT_EXIT_COAL_           BIT(17)
+#define DMAC_COAL_CFG_CSR_EXIT_COAL_           BIT(16)
+#define DMAC_COAL_CFG_TX_THRES_MASK_           (0x0000FF00)
+#define DMAC_COAL_CFG_TX_THRES_SET_(val)       \
+       ((((u32)(val)) << 8) & DMAC_COAL_CFG_TX_THRES_MASK_)
+#define DMAC_COAL_CFG_RX_THRES_MASK_           (0x000000FF)
+#define DMAC_COAL_CFG_RX_THRES_SET_(val)       \
+       (((u32)(val)) & DMAC_COAL_CFG_RX_THRES_MASK_)
+
+#define DMAC_OBFF_CFG                          (0xC08)
+#define DMAC_OBFF_TX_THRES_MASK_               (0x0000FF00)
+#define DMAC_OBFF_TX_THRES_SET_(val)   \
+       ((((u32)(val)) << 8) & DMAC_OBFF_TX_THRES_MASK_)
+#define DMAC_OBFF_RX_THRES_MASK_               (0x000000FF)
+#define DMAC_OBFF_RX_THRES_SET_(val)   \
+       (((u32)(val)) & DMAC_OBFF_RX_THRES_MASK_)
+
+#define DMAC_CMD                               (0xC0C)
+#define DMAC_CMD_SWR_                          BIT(31)
+#define DMAC_CMD_TX_SWR_(channel)              BIT(24 + (channel))
+#define DMAC_CMD_START_T_(channel)             BIT(20 + (channel))
+#define DMAC_CMD_STOP_T_(channel)              BIT(16 + (channel))
+#define DMAC_CMD_RX_SWR_(channel)              BIT(8 + (channel))
+#define DMAC_CMD_START_R_(channel)             BIT(4 + (channel))
+#define DMAC_CMD_STOP_R_(channel)              BIT(0 + (channel))
+
+#define DMAC_INT_STS                           (0xC10)
+#define DMAC_INT_EN_SET                                (0xC14)
+#define DMAC_INT_EN_CLR                                (0xC18)
+#define DMAC_INT_BIT_RXFRM_(channel)           BIT(16 + (channel))
+#define DMAC_INT_BIT_TX_IOC_(channel)          BIT(0 + (channel))
+
+#define RX_CFG_A(channel)                      (0xC40 + ((channel) << 6))
+#define RX_CFG_A_RX_WB_ON_INT_TMR_             BIT(30)
+#define RX_CFG_A_RX_WB_THRES_MASK_             (0x1F000000)
+#define RX_CFG_A_RX_WB_THRES_SET_(val) \
+       ((((u32)(val)) << 24) & RX_CFG_A_RX_WB_THRES_MASK_)
+#define RX_CFG_A_RX_PF_THRES_MASK_             (0x001F0000)
+#define RX_CFG_A_RX_PF_THRES_SET_(val) \
+       ((((u32)(val)) << 16) & RX_CFG_A_RX_PF_THRES_MASK_)
+#define RX_CFG_A_RX_PF_PRI_THRES_MASK_         (0x00001F00)
+#define RX_CFG_A_RX_PF_PRI_THRES_SET_(val)     \
+       ((((u32)(val)) << 8) & RX_CFG_A_RX_PF_PRI_THRES_MASK_)
+#define RX_CFG_A_RX_HP_WB_EN_                  BIT(5)
+
+#define RX_CFG_B(channel)                      (0xC44 + ((channel) << 6))
+#define RX_CFG_B_TS_ALL_RX_                    BIT(29)
+#define RX_CFG_B_RX_PAD_MASK_                  (0x03000000)
+#define RX_CFG_B_RX_PAD_0_                     (0x00000000)
+#define RX_CFG_B_RX_PAD_2_                     (0x02000000)
+#define RX_CFG_B_RDMABL_512_                   (0x00040000)
+#define RX_CFG_B_RX_RING_LEN_MASK_             (0x0000FFFF)
+
+#define RX_BASE_ADDRH(channel)                 (0xC48 + ((channel) << 6))
+
+#define RX_BASE_ADDRL(channel)                 (0xC4C + ((channel) << 6))
+
+#define RX_HEAD_WRITEBACK_ADDRH(channel)       (0xC50 + ((channel) << 6))
+
+#define RX_HEAD_WRITEBACK_ADDRL(channel)       (0xC54 + ((channel) << 6))
+
+#define RX_HEAD(channel)                       (0xC58 + ((channel) << 6))
+
+#define RX_TAIL(channel)                       (0xC5C + ((channel) << 6))
+#define RX_TAIL_SET_TOP_INT_EN_                        BIT(30)
+#define RX_TAIL_SET_TOP_INT_VEC_EN_            BIT(29)
+
+#define RX_CFG_C(channel)                      (0xC64 + ((channel) << 6))
+#define RX_CFG_C_RX_TOP_INT_EN_AUTO_CLR_       BIT(6)
+#define RX_CFG_C_RX_INT_EN_R2C_                        BIT(4)
+#define RX_CFG_C_RX_DMA_INT_STS_AUTO_CLR_      BIT(3)
+#define RX_CFG_C_RX_INT_STS_R2C_MODE_MASK_     (0x00000007)
+
+#define TX_CFG_A(channel)                      (0xD40 + ((channel) << 6))
+#define TX_CFG_A_TX_HP_WB_ON_INT_TMR_          BIT(30)
+#define TX_CFG_A_TX_TMR_HPWB_SEL_IOC_          (0x10000000)
+#define TX_CFG_A_TX_PF_THRES_MASK_             (0x001F0000)
+#define TX_CFG_A_TX_PF_THRES_SET_(value)       \
+       ((((u32)(value)) << 16) & TX_CFG_A_TX_PF_THRES_MASK_)
+#define TX_CFG_A_TX_PF_PRI_THRES_MASK_         (0x00001F00)
+#define TX_CFG_A_TX_PF_PRI_THRES_SET_(value)   \
+       ((((u32)(value)) << 8) & TX_CFG_A_TX_PF_PRI_THRES_MASK_)
+#define TX_CFG_A_TX_HP_WB_EN_                  BIT(5)
+#define TX_CFG_A_TX_HP_WB_THRES_MASK_          (0x0000000F)
+#define TX_CFG_A_TX_HP_WB_THRES_SET_(value)    \
+       (((u32)(value)) & TX_CFG_A_TX_HP_WB_THRES_MASK_)
+
+#define TX_CFG_B(channel)                      (0xD44 + ((channel) << 6))
+#define TX_CFG_B_TDMABL_512_                   (0x00040000)
+#define TX_CFG_B_TX_RING_LEN_MASK_             (0x0000FFFF)
+
+#define TX_BASE_ADDRH(channel)                 (0xD48 + ((channel) << 6))
+
+#define TX_BASE_ADDRL(channel)                 (0xD4C + ((channel) << 6))
+
+#define TX_HEAD_WRITEBACK_ADDRH(channel)       (0xD50 + ((channel) << 6))
+
+#define TX_HEAD_WRITEBACK_ADDRL(channel)       (0xD54 + ((channel) << 6))
+
+#define TX_HEAD(channel)                       (0xD58 + ((channel) << 6))
+
+#define TX_TAIL(channel)                       (0xD5C + ((channel) << 6))
+#define TX_TAIL_SET_DMAC_INT_EN_               BIT(31)
+#define TX_TAIL_SET_TOP_INT_EN_                        BIT(30)
+#define TX_TAIL_SET_TOP_INT_VEC_EN_            BIT(29)
+
+#define TX_CFG_C(channel)                      (0xD64 + ((channel) << 6))
+#define TX_CFG_C_TX_TOP_INT_EN_AUTO_CLR_       BIT(6)
+#define TX_CFG_C_TX_DMA_INT_EN_AUTO_CLR_       BIT(5)
+#define TX_CFG_C_TX_INT_EN_R2C_                        BIT(4)
+#define TX_CFG_C_TX_DMA_INT_STS_AUTO_CLR_      BIT(3)
+#define TX_CFG_C_TX_INT_STS_R2C_MODE_MASK_     (0x00000007)
+
+/* MAC statistics registers */
+#define STAT_RX_FCS_ERRORS                     (0x1200)
+#define STAT_RX_ALIGNMENT_ERRORS               (0x1204)
+#define STAT_RX_JABBER_ERRORS                  (0x120C)
+#define STAT_RX_UNDERSIZE_FRAME_ERRORS         (0x1210)
+#define STAT_RX_OVERSIZE_FRAME_ERRORS          (0x1214)
+#define STAT_RX_DROPPED_FRAMES                 (0x1218)
+#define STAT_RX_UNICAST_BYTE_COUNT             (0x121C)
+#define STAT_RX_BROADCAST_BYTE_COUNT           (0x1220)
+#define STAT_RX_MULTICAST_BYTE_COUNT           (0x1224)
+#define STAT_RX_MULTICAST_FRAMES               (0x1230)
+#define STAT_RX_TOTAL_FRAMES                   (0x1254)
+
+#define STAT_TX_FCS_ERRORS                     (0x1280)
+#define STAT_TX_EXCESS_DEFERRAL_ERRORS         (0x1284)
+#define STAT_TX_CARRIER_ERRORS                 (0x1288)
+#define STAT_TX_SINGLE_COLLISIONS              (0x1290)
+#define STAT_TX_MULTIPLE_COLLISIONS            (0x1294)
+#define STAT_TX_EXCESSIVE_COLLISION            (0x1298)
+#define STAT_TX_LATE_COLLISIONS                        (0x129C)
+#define STAT_TX_UNICAST_BYTE_COUNT             (0x12A0)
+#define STAT_TX_BROADCAST_BYTE_COUNT           (0x12A4)
+#define STAT_TX_MULTICAST_BYTE_COUNT           (0x12A8)
+#define STAT_TX_MULTICAST_FRAMES               (0x12B4)
+#define STAT_TX_TOTAL_FRAMES                   (0x12D8)
+
+/* End of Register definitions */
+
+#define LAN743X_MAX_RX_CHANNELS                (4)
+#define LAN743X_MAX_TX_CHANNELS                (1)
+struct lan743x_adapter;
+
+#define LAN743X_USED_RX_CHANNELS       (4)
+#define LAN743X_USED_TX_CHANNELS       (1)
+#define LAN743X_INT_MOD        (400)
+
+#if (LAN743X_USED_RX_CHANNELS > LAN743X_MAX_RX_CHANNELS)
+#error Invalid LAN743X_USED_RX_CHANNELS
+#endif
+#if (LAN743X_USED_TX_CHANNELS > LAN743X_MAX_TX_CHANNELS)
+#error Invalid LAN743X_USED_TX_CHANNELS
+#endif
+
+/* PCI */
+/* SMSC acquired EFAR late 1990's, MCHP acquired SMSC 2012 */
+#define PCI_VENDOR_ID_SMSC             PCI_VENDOR_ID_EFAR
+#define PCI_DEVICE_ID_SMSC_LAN7430     (0x7430)
+
+#define PCI_CONFIG_LENGTH              (0x1000)
+
+/* CSR */
+#define CSR_LENGTH                                     (0x2000)
+
+#define LAN743X_CSR_FLAG_IS_A0                         BIT(0)
+#define LAN743X_CSR_FLAG_IS_B0                         BIT(1)
+#define LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR    BIT(8)
+
+struct lan743x_csr {
+       u32 flags;
+       u8 __iomem *csr_address;
+       u32 id_rev;
+       u32 fpga_rev;
+};
+
+/* INTERRUPTS */
+typedef void(*lan743x_vector_handler)(void *context, u32 int_sts, u32 flags);
+
+#define LAN743X_VECTOR_FLAG_IRQ_SHARED                 BIT(0)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ         BIT(1)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C          BIT(2)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C          BIT(3)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK                BIT(4)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR                BIT(5)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C          BIT(6)
+#define LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR                BIT(7)
+#define LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET          BIT(8)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR    BIT(9)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET      BIT(10)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR   BIT(11)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET     BIT(12)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR   BIT(13)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET     BIT(14)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR   BIT(15)
+
+struct lan743x_vector {
+       int                     irq;
+       u32                     flags;
+       struct lan743x_adapter  *adapter;
+       int                     vector_index;
+       u32                     int_mask;
+       lan743x_vector_handler  handler;
+       void                    *context;
+};
+
+#define LAN743X_MAX_VECTOR_COUNT       (8)
+
+struct lan743x_intr {
+       int                     flags;
+
+       unsigned int            irq;
+
+       struct lan743x_vector   vector_list[LAN743X_MAX_VECTOR_COUNT];
+       int                     number_of_vectors;
+       bool                    using_vectors;
+
+       int                     software_isr_flag;
+};
+
+#define LAN743X_MAX_FRAME_SIZE                 (9 * 1024)
+
+/* PHY */
+struct lan743x_phy {
+       bool    fc_autoneg;
+       u8      fc_request_control;
+};
+
+/* TX */
+struct lan743x_tx_descriptor;
+struct lan743x_tx_buffer_info;
+
+#define GPIO_QUEUE_STARTED             (0)
+#define GPIO_TX_FUNCTION               (1)
+#define GPIO_TX_COMPLETION             (2)
+#define GPIO_TX_FRAGMENT               (3)
+
+#define TX_FRAME_FLAG_IN_PROGRESS      BIT(0)
+
+struct lan743x_tx {
+       struct lan743x_adapter *adapter;
+       u32     vector_flags;
+       int     channel_number;
+
+       int     ring_size;
+       size_t  ring_allocation_size;
+       struct lan743x_tx_descriptor *ring_cpu_ptr;
+       dma_addr_t ring_dma_ptr;
+       /* ring_lock: used to prevent concurrent access to tx ring */
+       spinlock_t ring_lock;
+       u32             frame_flags;
+       u32             frame_first;
+       u32             frame_data0;
+       u32             frame_tail;
+
+       struct lan743x_tx_buffer_info *buffer_info;
+
+       u32             *head_cpu_ptr;
+       dma_addr_t      head_dma_ptr;
+       int             last_head;
+       int             last_tail;
+
+       struct napi_struct napi;
+
+       struct sk_buff *overflow_skb;
+};
+
+/* RX */
+struct lan743x_rx_descriptor;
+struct lan743x_rx_buffer_info;
+
+struct lan743x_rx {
+       struct lan743x_adapter *adapter;
+       u32     vector_flags;
+       int     channel_number;
+
+       int     ring_size;
+       size_t  ring_allocation_size;
+       struct lan743x_rx_descriptor *ring_cpu_ptr;
+       dma_addr_t ring_dma_ptr;
+
+       struct lan743x_rx_buffer_info *buffer_info;
+
+       u32             *head_cpu_ptr;
+       dma_addr_t      head_dma_ptr;
+       u32             last_head;
+       u32             last_tail;
+
+       struct napi_struct napi;
+
+       u32             frame_count;
+};
+
+struct lan743x_adapter {
+       struct net_device       *netdev;
+       struct mii_bus          *mdiobus;
+       int                     msg_enable;
+       struct pci_dev          *pdev;
+       struct lan743x_csr      csr;
+       struct lan743x_intr     intr;
+
+       /* lock, used to prevent concurrent access to data port */
+       struct mutex            dp_lock;
+
+       u8                      mac_address[ETH_ALEN];
+
+       struct lan743x_phy      phy;
+       struct lan743x_tx       tx[LAN743X_MAX_TX_CHANNELS];
+       struct lan743x_rx       rx[LAN743X_MAX_RX_CHANNELS];
+};
+
+#define LAN743X_COMPONENT_FLAG_RX(channel)  BIT(20 + (channel))
+
+#define INTR_FLAG_IRQ_REQUESTED(vector_index)  BIT(0 + vector_index)
+#define INTR_FLAG_MSI_ENABLED                  BIT(8)
+#define INTR_FLAG_MSIX_ENABLED                 BIT(9)
+
+#define MAC_MII_READ            1
+#define MAC_MII_WRITE           0
+
+#define PHY_FLAG_OPENED     BIT(0)
+#define PHY_FLAG_ATTACHED   BIT(1)
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#define DMA_ADDR_HIGH32(dma_addr)   ((u32)(((dma_addr) >> 32) & 0xFFFFFFFF))
+#else
+#define DMA_ADDR_HIGH32(dma_addr)   ((u32)(0))
+#endif
+#define DMA_ADDR_LOW32(dma_addr) ((u32)((dma_addr) & 0xFFFFFFFF))
+#define DMA_DESCRIPTOR_SPACING_16       (16)
+#define DMA_DESCRIPTOR_SPACING_32       (32)
+#define DMA_DESCRIPTOR_SPACING_64       (64)
+#define DMA_DESCRIPTOR_SPACING_128      (128)
+#define DEFAULT_DMA_DESCRIPTOR_SPACING  (L1_CACHE_BYTES)
+
+#define DMAC_CHANNEL_STATE_SET(start_bit, stop_bit) \
+       (((start_bit) ? 2 : 0) | ((stop_bit) ? 1 : 0))
+#define DMAC_CHANNEL_STATE_INITIAL      DMAC_CHANNEL_STATE_SET(0, 0)
+#define DMAC_CHANNEL_STATE_STARTED      DMAC_CHANNEL_STATE_SET(1, 0)
+#define DMAC_CHANNEL_STATE_STOP_PENDING DMAC_CHANNEL_STATE_SET(1, 1)
+#define DMAC_CHANNEL_STATE_STOPPED      DMAC_CHANNEL_STATE_SET(0, 1)
+
+/* TX Descriptor bits */
+#define TX_DESC_DATA0_DTYPE_MASK_              (0xC0000000)
+#define TX_DESC_DATA0_DTYPE_DATA_              (0x00000000)
+#define TX_DESC_DATA0_DTYPE_EXT_               (0x40000000)
+#define TX_DESC_DATA0_FS_                      (0x20000000)
+#define TX_DESC_DATA0_LS_                      (0x10000000)
+#define TX_DESC_DATA0_EXT_                     (0x08000000)
+#define TX_DESC_DATA0_IOC_                     (0x04000000)
+#define TX_DESC_DATA0_ICE_                     (0x00400000)
+#define TX_DESC_DATA0_IPE_                     (0x00200000)
+#define TX_DESC_DATA0_TPE_                     (0x00100000)
+#define TX_DESC_DATA0_FCS_                     (0x00020000)
+#define TX_DESC_DATA0_BUF_LENGTH_MASK_         (0x0000FFFF)
+#define TX_DESC_DATA0_EXT_LSO_                 (0x00200000)
+#define TX_DESC_DATA0_EXT_PAY_LENGTH_MASK_     (0x000FFFFF)
+#define TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_   (0x3FFF0000)
+
+struct lan743x_tx_descriptor {
+       u32     data0;
+       u32     data1;
+       u32     data2;
+       u32     data3;
+} __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING);
+
+#define TX_BUFFER_INFO_FLAG_ACTIVE             BIT(0)
+#define TX_BUFFER_INFO_FLAG_IGNORE_SYNC                BIT(2)
+#define TX_BUFFER_INFO_FLAG_SKB_FRAGMENT       BIT(3)
+struct lan743x_tx_buffer_info {
+       int flags;
+       struct sk_buff *skb;
+       dma_addr_t      dma_ptr;
+       unsigned int    buffer_length;
+};
+
+#define LAN743X_TX_RING_SIZE    (50)
+
+/* OWN bit is set. ie, Descs are owned by RX DMAC */
+#define RX_DESC_DATA0_OWN_                (0x00008000)
+/* OWN bit is clear. ie, Descs are owned by host */
+#define RX_DESC_DATA0_FS_                 (0x80000000)
+#define RX_DESC_DATA0_LS_                 (0x40000000)
+#define RX_DESC_DATA0_FRAME_LENGTH_MASK_  (0x3FFF0000)
+#define RX_DESC_DATA0_FRAME_LENGTH_GET_(data0) \
+       (((data0) & RX_DESC_DATA0_FRAME_LENGTH_MASK_) >> 16)
+#define RX_DESC_DATA0_EXT_                (0x00004000)
+#define RX_DESC_DATA0_BUF_LENGTH_MASK_    (0x00003FFF)
+#define RX_DESC_DATA2_TS_NS_MASK_         (0x3FFFFFFF)
+
+#if ((NET_IP_ALIGN != 0) && (NET_IP_ALIGN != 2))
+#error NET_IP_ALIGN must be 0 or 2
+#endif
+
+#define RX_HEAD_PADDING                NET_IP_ALIGN
+
+struct lan743x_rx_descriptor {
+       u32     data0;
+       u32     data1;
+       u32     data2;
+       u32     data3;
+} __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING);
+
+#define RX_BUFFER_INFO_FLAG_ACTIVE      BIT(0)
+struct lan743x_rx_buffer_info {
+       int flags;
+       struct sk_buff *skb;
+
+       dma_addr_t      dma_ptr;
+       unsigned int    buffer_length;
+};
+
+#define LAN743X_RX_RING_SIZE        (65)
+
+#define RX_PROCESS_RESULT_NOTHING_TO_DO     (0)
+#define RX_PROCESS_RESULT_PACKET_RECEIVED   (1)
+#define RX_PROCESS_RESULT_PACKET_DROPPED    (2)
+
+#endif /* _LAN743X_H */
index a10ef50e4f12c3c949f0f2120c71d3562d8a0559..017fb23225897983b0440e1bf67db2ab0ae31fc1 100644 (file)
@@ -1,16 +1,16 @@
 #
-# National Semi-conductor device configuration
+# National Semiconductor device configuration
 #
 
 config NET_VENDOR_NATSEMI
-       bool "National Semi-conductor devices"
+       bool "National Semiconductor devices"
        default y
        ---help---
          If you have a network (Ethernet) card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
          kernel: saying N will just cause the configurator to skip all
-         the questions about National Semi-conductor devices. If you say Y,
+         the questions about National Semiconductor devices. If you say Y,
          you will be asked for your specific card in the following questions.
 
 if NET_VENDOR_NATSEMI
index cc664977596e2426de92453285d028bebd868ddf..a759aa09ef5960b979b951e00c4497fbb4bf2276 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 #
-# Makefile for the National Semi-conductor Sonic devices.
+# Makefile for the National Semiconductor Sonic devices.
 #
 
 obj-$(CONFIG_MACSONIC) += macsonic.o
index d5b28884e21eb10f742935df7d10ce3d4793dcab..51fa82b429a3cf348be4cc1d67903fd9a594a2c0 100644 (file)
@@ -60,14 +60,6 @@ do {                                                                 \
        *((volatile unsigned int *)dev->base_addr+(reg)) = (val);               \
 } while (0)
 
-
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -117,7 +109,6 @@ static const struct net_device_ops sonic_netdev_ops = {
 
 static int sonic_probe1(struct net_device *dev)
 {
-       static unsigned version_printed;
        unsigned int silicon_revision;
        unsigned int val;
        struct sonic_local *lp = netdev_priv(dev);
@@ -133,26 +124,17 @@ static int sonic_probe1(struct net_device *dev)
         * the expected location.
         */
        silicon_revision = SONIC_READ(SONIC_SR);
-       if (sonic_debug > 1)
-               printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
        i = 0;
        while (known_revisions[i] != 0xffff &&
               known_revisions[i] != silicon_revision)
                i++;
 
        if (known_revisions[i] == 0xffff) {
-               printk("SONIC ethernet controller not found (0x%4x)\n",
-                      silicon_revision);
+               pr_info("SONIC ethernet controller not found (0x%4x)\n",
+                       silicon_revision);
                goto out;
        }
 
-       if (sonic_debug  &&  version_printed++ == 0)
-               printk(version);
-
-       printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ",
-              dev_name(lp->device), dev->base_addr);
-
        /*
         * Put the sonic into software reset, then
         * retrieve and print the ethernet address.
@@ -245,12 +227,16 @@ static int jazz_sonic_probe(struct platform_device *pdev)
        err = sonic_probe1(dev);
        if (err)
                goto out;
+
+       pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+               dev->base_addr, dev->dev_addr, dev->irq);
+
+       sonic_msg_init(dev);
+
        err = register_netdev(dev);
        if (err)
                goto out1;
 
-       printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
        return 0;
 
 out1:
@@ -262,8 +248,6 @@ static int jazz_sonic_probe(struct platform_device *pdev)
 }
 
 MODULE_DESCRIPTION("Jazz SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)");
 MODULE_ALIAS("platform:jazzsonic");
 
 #include "sonic.c"
index b922ab5cedea2500a4aadde3c324155c0caf6657..0937fc2a928ed50d168ffcc7870dda0aa0b5770f 100644 (file)
@@ -60,8 +60,6 @@
 #include <asm/macints.h>
 #include <asm/mac_via.h>
 
-static char mac_sonic_string[] = "macsonic";
-
 #include "sonic.h"
 
 /* These should basically be bus-size and endian independent (since
@@ -72,15 +70,6 @@ static char mac_sonic_string[] = "macsonic";
 #define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \
              + lp->reg_offset))
 
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
-static int sonic_version_printed;
-
 /* For onboard SONIC */
 #define ONBOARD_SONIC_REGISTERS        0x50F0A000
 #define ONBOARD_SONIC_PROM_BASE        0x50f08000
@@ -313,11 +302,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
        int sr;
        bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM;
 
-       if (!MACH_IS_MAC)
-               return -ENODEV;
-
-       printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
-
        /* Bogus probing, on the models which may or may not have
           Ethernet (BTW, the Ethernet *is* always at the same
           address, and nothing else lives there, at least if Apple's
@@ -327,13 +311,11 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 
                card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS);
                if (!card_present) {
-                       printk("none.\n");
+                       pr_info("Onboard/comm-slot SONIC not found\n");
                        return -ENODEV;
                }
        }
 
-       printk("yes\n");
-
        /* Danger!  My arms are flailing wildly!  You *must* set lp->reg_offset
         * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
        dev->base_addr = ONBOARD_SONIC_REGISTERS;
@@ -342,18 +324,10 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
        else
                dev->irq = IRQ_NUBUS_9;
 
-       if (!sonic_version_printed) {
-               printk(KERN_INFO "%s", version);
-               sonic_version_printed = 1;
-       }
-       printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n",
-              dev_name(lp->device), dev->base_addr);
-
        /* The PowerBook's SONIC is 16 bit always. */
        if (macintosh_config->ident == MAC_MODEL_PB520) {
                lp->reg_offset = 0;
                lp->dma_bitmode = SONIC_BITMODE16;
-               sr = SONIC_READ(SONIC_SR);
        } else if (commslot) {
                /* Some of the comm-slot cards are 16 bit.  But some
                   of them are not.  The 32-bit cards use offset 2 and
@@ -370,22 +344,21 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
                else {
                        lp->dma_bitmode = SONIC_BITMODE16;
                        lp->reg_offset = 0;
-                       sr = SONIC_READ(SONIC_SR);
                }
        } else {
                /* All onboard cards are at offset 2 with 32 bit DMA. */
                lp->reg_offset = 2;
                lp->dma_bitmode = SONIC_BITMODE32;
-               sr = SONIC_READ(SONIC_SR);
        }
-       printk(KERN_INFO
-              "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-              dev_name(lp->device), sr, lp->dma_bitmode?32:16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-       printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-              SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+       pr_info("Onboard/comm-slot SONIC, revision 0x%04x, %d bit DMA, register offset %d\n",
+               SONIC_READ(SONIC_SR), lp->dma_bitmode ? 32 : 16,
+               lp->reg_offset);
+
+       /* This is sometimes useful to find out how MacOS configured the card */
+       pr_debug("%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+                SONIC_READ(SONIC_DCR) & 0xffff,
+                SONIC_READ(SONIC_DCR2) & 0xffff);
 
        /* Software reset, then initialize control registers. */
        SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -406,11 +379,14 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
        /* Now look for the MAC address. */
        mac_onboard_sonic_ethernet_addr(dev);
 
+       pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+               dev->base_addr, dev->dev_addr, dev->irq);
+
        /* Shared init code */
        return macsonic_init(dev);
 }
 
-static int mac_nubus_sonic_ethernet_addr(struct net_device *dev,
+static int mac_sonic_nubus_ethernet_addr(struct net_device *dev,
                                         unsigned long prom_addr, int id)
 {
        int i;
@@ -449,70 +425,49 @@ static int macsonic_ident(struct nubus_rsrc *fres)
        return -1;
 }
 
-static int mac_nubus_sonic_probe(struct net_device *dev)
+static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
+                                      struct net_device *dev)
 {
-       static int slots;
-       struct nubus_rsrc *ndev = NULL;
        struct sonic_local* lp = netdev_priv(dev);
        unsigned long base_addr, prom_addr;
        u16 sonic_dcr;
-       int id = -1;
        int reg_offset, dma_bitmode;
 
-       /* Find the first SONIC that hasn't been initialized already */
-       for_each_func_rsrc(ndev) {
-               if (ndev->category != NUBUS_CAT_NETWORK ||
-                   ndev->type != NUBUS_TYPE_ETHERNET)
-                       continue;
-
-               /* Have we seen it already? */
-               if (slots & (1<<ndev->board->slot))
-                       continue;
-               slots |= 1<<ndev->board->slot;
-
-               /* Is it one of ours? */
-               if ((id = macsonic_ident(ndev)) != -1)
-                       break;
-       }
-
-       if (ndev == NULL)
-               return -ENODEV;
-
        switch (id) {
        case MACSONIC_DUODOCK:
-               base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE;
+               base_addr = board->slot_addr + DUODOCK_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + DUODOCK_SONIC_PROM_BASE;
                sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 |
                            SONIC_DCR_TFT0;
                reg_offset = 2;
                dma_bitmode = SONIC_BITMODE32;
                break;
        case MACSONIC_APPLE:
-               base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+               base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
                sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0;
                reg_offset = 0;
                dma_bitmode = SONIC_BITMODE32;
                break;
        case MACSONIC_APPLE16:
-               base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+               base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
                sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
                            SONIC_DCR_PO1 | SONIC_DCR_BMS;
                reg_offset = 0;
                dma_bitmode = SONIC_BITMODE16;
                break;
        case MACSONIC_DAYNALINK:
-               base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE;
+               base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + DAYNALINK_PROM_BASE;
                sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
                            SONIC_DCR_PO1 | SONIC_DCR_BMS;
                reg_offset = 0;
                dma_bitmode = SONIC_BITMODE16;
                break;
        case MACSONIC_DAYNA:
-               base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS;
-               prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR;
+               base_addr = board->slot_addr + DAYNA_SONIC_REGISTERS;
+               prom_addr = board->slot_addr + DAYNA_SONIC_MAC_ADDR;
                sonic_dcr = SONIC_DCR_BMS |
                            SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
                reg_offset = 0;
@@ -528,21 +483,16 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
        dev->base_addr = base_addr;
        lp->reg_offset = reg_offset;
        lp->dma_bitmode = dma_bitmode;
-       dev->irq = SLOT2IRQ(ndev->board->slot);
+       dev->irq = SLOT2IRQ(board->slot);
 
-       if (!sonic_version_printed) {
-               printk(KERN_INFO "%s", version);
-               sonic_version_printed = 1;
-       }
-       printk(KERN_INFO "%s: %s in slot %X\n",
-              dev_name(lp->device), ndev->board->name, ndev->board->slot);
-       printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-              dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
+       dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n",
+                board->name, SONIC_READ(SONIC_SR),
+                lp->dma_bitmode ? 32 : 16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-       printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-              SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+       /* This is sometimes useful to find out how MacOS configured the card */
+       dev_dbg(&board->dev, "%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+               SONIC_READ(SONIC_DCR) & 0xffff,
+               SONIC_READ(SONIC_DCR2) & 0xffff);
 
        /* Software reset, then initialize control registers. */
        SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -557,14 +507,17 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
        SONIC_WRITE(SONIC_ISR, 0x7fff);
 
        /* Now look for the MAC address. */
-       if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0)
+       if (mac_sonic_nubus_ethernet_addr(dev, prom_addr, id) != 0)
                return -ENODEV;
 
+       dev_info(&board->dev, "SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+                dev->base_addr, dev->dev_addr, dev->irq);
+
        /* Shared init code */
        return macsonic_init(dev);
 }
 
-static int mac_sonic_probe(struct platform_device *pdev)
+static int mac_sonic_platform_probe(struct platform_device *pdev)
 {
        struct net_device *dev;
        struct sonic_local *lp;
@@ -579,22 +532,16 @@ static int mac_sonic_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(dev, &pdev->dev);
        platform_set_drvdata(pdev, dev);
 
-       /* This will catch fatal stuff like -ENOMEM as well as success */
        err = mac_onboard_sonic_probe(dev);
-       if (err == 0)
-               goto found;
-       if (err != -ENODEV)
-               goto out;
-       err = mac_nubus_sonic_probe(dev);
        if (err)
                goto out;
-found:
+
+       sonic_msg_init(dev);
+
        err = register_netdev(dev);
        if (err)
                goto out;
 
-       printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
        return 0;
 
 out:
@@ -604,13 +551,11 @@ static int mac_sonic_probe(struct platform_device *pdev)
 }
 
 MODULE_DESCRIPTION("Macintosh SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)");
 MODULE_ALIAS("platform:macsonic");
 
 #include "sonic.c"
 
-static int mac_sonic_device_remove(struct platform_device *pdev)
+static int mac_sonic_platform_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
        struct sonic_local* lp = netdev_priv(dev);
@@ -623,12 +568,105 @@ static int mac_sonic_device_remove(struct platform_device *pdev)
        return 0;
 }
 
-static struct platform_driver mac_sonic_driver = {
-       .probe  = mac_sonic_probe,
-       .remove = mac_sonic_device_remove,
-       .driver = {
-               .name   = mac_sonic_string,
+static struct platform_driver mac_sonic_platform_driver = {
+       .probe  = mac_sonic_platform_probe,
+       .remove = mac_sonic_platform_remove,
+       .driver = {
+               .name = "macsonic",
+       },
+};
+
+static int mac_sonic_nubus_probe(struct nubus_board *board)
+{
+       struct net_device *ndev;
+       struct sonic_local *lp;
+       struct nubus_rsrc *fres;
+       int id = -1;
+       int err;
+
+       /* The platform driver will handle a PDS or Comm Slot card (even if
+        * it has a pseudoslot declaration ROM).
+        */
+       if (macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+               return -ENODEV;
+
+       for_each_board_func_rsrc(board, fres) {
+               if (fres->category != NUBUS_CAT_NETWORK ||
+                   fres->type != NUBUS_TYPE_ETHERNET)
+                       continue;
+
+               id = macsonic_ident(fres);
+               if (id != -1)
+                       break;
+       }
+       if (!fres)
+               return -ENODEV;
+
+       ndev = alloc_etherdev(sizeof(struct sonic_local));
+       if (!ndev)
+               return -ENOMEM;
+
+       lp = netdev_priv(ndev);
+       lp->device = &board->dev;
+       SET_NETDEV_DEV(ndev, &board->dev);
+
+       err = mac_sonic_nubus_probe_board(board, id, ndev);
+       if (err)
+               goto out;
+
+       sonic_msg_init(ndev);
+
+       err = register_netdev(ndev);
+       if (err)
+               goto out;
+
+       nubus_set_drvdata(board, ndev);
+
+       return 0;
+
+out:
+       free_netdev(ndev);
+       return err;
+}
+
+static int mac_sonic_nubus_remove(struct nubus_board *board)
+{
+       struct net_device *ndev = nubus_get_drvdata(board);
+       struct sonic_local *lp = netdev_priv(ndev);
+
+       unregister_netdev(ndev);
+       dma_free_coherent(lp->device,
+                         SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+                         lp->descriptors, lp->descriptors_laddr);
+       free_netdev(ndev);
+
+       return 0;
+}
+
+static struct nubus_driver mac_sonic_nubus_driver = {
+       .probe  = mac_sonic_nubus_probe,
+       .remove = mac_sonic_nubus_remove,
+       .driver = {
+               .name = "macsonic-nubus",
+               .owner = THIS_MODULE,
        },
 };
 
-module_platform_driver(mac_sonic_driver);
+static int perr, nerr;
+
+static int __init mac_sonic_init(void)
+{
+       perr = platform_driver_register(&mac_sonic_platform_driver);
+       nerr = nubus_driver_register(&mac_sonic_nubus_driver);
+       return 0;
+}
+module_init(mac_sonic_init);
+
+static void __exit mac_sonic_exit(void)
+{
+       if (!perr)
+               platform_driver_unregister(&mac_sonic_platform_driver);
+       if (!nerr)
+               nubus_driver_unregister(&mac_sonic_nubus_driver);
+}
+module_exit(mac_sonic_exit);
index 612c7a44b26c6c58a16dbdd82017bfb3246bbb69..7ed08486ae23aadf1ce6710d343a892a41d1fa30 100644 (file)
  * the NetBSD file "sys/arch/mac68k/dev/if_sn.c".
  */
 
+static unsigned int version_printed;
 
+static int sonic_debug = -1;
+module_param(sonic_debug, int, 0);
+MODULE_PARM_DESC(sonic_debug, "debug message level");
+
+static void sonic_msg_init(struct net_device *dev)
+{
+       struct sonic_local *lp = netdev_priv(dev);
+
+       lp->msg_enable = netif_msg_init(sonic_debug, 0);
+
+       if (version_printed++ == 0)
+               netif_dbg(lp, drv, dev, "%s", version);
+}
 
 /*
  * Open/initialize the SONIC controller.
@@ -47,8 +61,7 @@ static int sonic_open(struct net_device *dev)
        struct sonic_local *lp = netdev_priv(dev);
        int i;
 
-       if (sonic_debug > 2)
-               printk("sonic_open: initializing sonic driver.\n");
+       netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
 
        for (i = 0; i < SONIC_NUM_RRS; i++) {
                struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
@@ -95,8 +108,7 @@ static int sonic_open(struct net_device *dev)
 
        netif_start_queue(dev);
 
-       if (sonic_debug > 2)
-               printk("sonic_open: Initialization done.\n");
+       netif_dbg(lp, ifup, dev, "%s: Initialization done\n", __func__);
 
        return 0;
 }
@@ -110,8 +122,7 @@ static int sonic_close(struct net_device *dev)
        struct sonic_local *lp = netdev_priv(dev);
        int i;
 
-       if (sonic_debug > 2)
-               printk("sonic_close\n");
+       netif_dbg(lp, ifdown, dev, "%s\n", __func__);
 
        netif_stop_queue(dev);
 
@@ -205,8 +216,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
        int length;
        int entry = lp->next_tx;
 
-       if (sonic_debug > 2)
-               printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev);
+       netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
 
        length = skb->len;
        if (length < ETH_ZLEN) {
@@ -252,14 +262,12 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
        lp->next_tx = (entry + 1) & SONIC_TDS_MASK;
        if (lp->tx_skb[lp->next_tx] != NULL) {
                /* The ring is full, the ISR has yet to process the next TD. */
-               if (sonic_debug > 3)
-                       printk("%s: stopping queue\n", dev->name);
+               netif_dbg(lp, tx_queued, dev, "%s: stopping queue\n", __func__);
                netif_stop_queue(dev);
                /* after this packet, wait for ISR to free up some TDAs */
        } else netif_start_queue(dev);
 
-       if (sonic_debug > 2)
-               printk("sonic_send_packet: issuing Tx command\n");
+       netif_dbg(lp, tx_queued, dev, "%s: issuing Tx command\n", __func__);
 
        SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
 
@@ -281,8 +289,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
        do {
                if (status & SONIC_INT_PKTRX) {
-                       if (sonic_debug > 2)
-                               printk("%s: packet rx\n", dev->name);
+                       netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__);
                        sonic_rx(dev);  /* got packet(s) */
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
                }
@@ -299,8 +306,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
                         *   still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
                         */
 
-                       if (sonic_debug > 2)
-                               printk("%s: tx done\n", dev->name);
+                       netif_dbg(lp, intr, dev, "%s: tx done\n", __func__);
 
                        while (lp->tx_skb[entry] != NULL) {
                                if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
@@ -346,20 +352,20 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
                 * check error conditions
                 */
                if (status & SONIC_INT_RFO) {
-                       if (sonic_debug > 1)
-                               printk("%s: rx fifo overrun\n", dev->name);
+                       netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n",
+                                 __func__);
                        lp->stats.rx_fifo_errors++;
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
                }
                if (status & SONIC_INT_RDE) {
-                       if (sonic_debug > 1)
-                               printk("%s: rx descriptors exhausted\n", dev->name);
+                       netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n",
+                                 __func__);
                        lp->stats.rx_dropped++;
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
                }
                if (status & SONIC_INT_RBAE) {
-                       if (sonic_debug > 1)
-                               printk("%s: rx buffer area exceeded\n", dev->name);
+                       netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n",
+                                 __func__);
                        lp->stats.rx_dropped++;
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
                }
@@ -380,8 +386,9 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
                /* transmit error */
                if (status & SONIC_INT_TXER) {
-                       if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2))
-                               printk(KERN_ERR "%s: tx fifo underrun\n", dev->name);
+                       if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU)
+                               netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n",
+                                         __func__);
                        SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
                }
 
@@ -475,8 +482,8 @@ static void sonic_rx(struct net_device *dev)
                        if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
                        SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
                        if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
-                               if (sonic_debug > 2)
-                                       printk("%s: rx buffer exhausted\n", dev->name);
+                               netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n",
+                                         __func__);
                                SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
                        }
                } else
@@ -542,9 +549,8 @@ static void sonic_multicast_list(struct net_device *dev)
                    (netdev_mc_count(dev) > 15)) {
                        rcr |= SONIC_RCR_AMC;
                } else {
-                       if (sonic_debug > 2)
-                               printk("sonic_multicast_list: mc_count %d\n",
-                                      netdev_mc_count(dev));
+                       netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__,
+                                 netdev_mc_count(dev));
                        sonic_set_cam_enable(dev, 1);  /* always enable our own address */
                        i = 1;
                        netdev_for_each_mc_addr(ha, dev) {
@@ -562,8 +568,7 @@ static void sonic_multicast_list(struct net_device *dev)
                }
        }
 
-       if (sonic_debug > 2)
-               printk("sonic_multicast_list: setting RCR=%x\n", rcr);
+       netif_dbg(lp, ifup, dev, "%s: setting RCR=%x\n", __func__, rcr);
 
        SONIC_WRITE(SONIC_RCR, rcr);
 }
@@ -596,8 +601,8 @@ static int sonic_init(struct net_device *dev)
        /*
         * initialize the receive resource area
         */
-       if (sonic_debug > 2)
-               printk("sonic_init: initialize receive resource area\n");
+       netif_dbg(lp, ifup, dev, "%s: initialize receive resource area\n",
+                 __func__);
 
        for (i = 0; i < SONIC_NUM_RRS; i++) {
                u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff;
@@ -622,8 +627,7 @@ static int sonic_init(struct net_device *dev)
        SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
 
        /* load the resource pointers */
-       if (sonic_debug > 3)
-               printk("sonic_init: issuing RRRA command\n");
+       netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
 
        SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
        i = 0;
@@ -632,16 +636,17 @@ static int sonic_init(struct net_device *dev)
                        break;
        }
 
-       if (sonic_debug > 2)
-               printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i);
+       netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__,
+                 SONIC_READ(SONIC_CMD), i);
 
        /*
         * Initialize the receive descriptors so that they
         * become a circular linked list, ie. let the last
         * descriptor point to the first again.
         */
-       if (sonic_debug > 2)
-               printk("sonic_init: initialize receive descriptors\n");
+       netif_dbg(lp, ifup, dev, "%s: initialize receive descriptors\n",
+                 __func__);
+
        for (i=0; i<SONIC_NUM_RDS; i++) {
                sonic_rda_put(dev, i, SONIC_RD_STATUS, 0);
                sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0);
@@ -664,8 +669,9 @@ static int sonic_init(struct net_device *dev)
        /*
         * initialize transmit descriptors
         */
-       if (sonic_debug > 2)
-               printk("sonic_init: initialize transmit descriptors\n");
+       netif_dbg(lp, ifup, dev, "%s: initialize transmit descriptors\n",
+                 __func__);
+
        for (i = 0; i < SONIC_NUM_TDS; i++) {
                sonic_tda_put(dev, i, SONIC_TD_STATUS, 0);
                sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0);
@@ -712,10 +718,8 @@ static int sonic_init(struct net_device *dev)
                if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD)
                        break;
        }
-       if (sonic_debug > 2) {
-               printk("sonic_init: CMD=%x, ISR=%x\n, i=%d",
-                      SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
-       }
+       netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__,
+                 SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
 
        /*
         * enable receiver, disable loopback
@@ -731,9 +735,8 @@ static int sonic_init(struct net_device *dev)
        if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
                printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
 
-       if (sonic_debug > 2)
-               printk("sonic_init: new status=%x\n",
-                      SONIC_READ(SONIC_CMD));
+       netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__,
+                 SONIC_READ(SONIC_CMD));
 
        return 0;
 }
index 421b1a283fedae971e0a487f75adfd7881ba4796..2b27f7049acb8a5a5d011c29bfe4c5ba8484f46b 100644 (file)
@@ -319,6 +319,7 @@ struct sonic_local {
        unsigned int eol_rx;
        unsigned int eol_tx;           /* last unacked transmit packet */
        unsigned int next_tx;          /* next free TD */
+       int msg_enable;
        struct device *device;         /* generic device */
        struct net_device_stats stats;
 };
@@ -336,6 +337,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev);
 static void sonic_multicast_list(struct net_device *dev);
 static int sonic_init(struct net_device *dev);
 static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_msg_init(struct net_device *dev);
 
 /* Internal inlines for reading/writing DMA buffers.  Note that bus
    size and endianness matter here, whereas they don't for registers,
index 1817deea98a44716e78b441d81affc37f0e7e3af..e1b886e87a762ce1f006e135ab796872c9075b70 100644 (file)
@@ -73,14 +73,6 @@ extern void xtboard_get_ether_addr(unsigned char *buf);
 #define SONIC_WRITE(reg,val) \
        *((volatile unsigned int *)dev->base_addr+reg) = val
 
-
-/* Use 0 for production, 1 for verification, and >2 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -130,7 +122,6 @@ static const struct net_device_ops xtsonic_netdev_ops = {
 
 static int __init sonic_probe1(struct net_device *dev)
 {
-       static unsigned version_printed = 0;
        unsigned int silicon_revision;
        struct sonic_local *lp = netdev_priv(dev);
        unsigned int base_addr = dev->base_addr;
@@ -146,23 +137,17 @@ static int __init sonic_probe1(struct net_device *dev)
         * the expected location.
         */
        silicon_revision = SONIC_READ(SONIC_SR);
-       if (sonic_debug > 1)
-               printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
        i = 0;
        while ((known_revisions[i] != 0xffff) &&
                        (known_revisions[i] != silicon_revision))
                i++;
 
        if (known_revisions[i] == 0xffff) {
-               printk("SONIC ethernet controller not found (0x%4x)\n",
-                               silicon_revision);
+               pr_info("SONIC ethernet controller not found (0x%4x)\n",
+                       silicon_revision);
                return -ENODEV;
        }
 
-       if (sonic_debug  &&  version_printed++ == 0)
-               printk(version);
-
        /*
         * Put the sonic into software reset, then retrieve ethernet address.
         * Note: we are assuming that the boot-loader has initialized the cam.
@@ -273,12 +258,15 @@ int xtsonic_probe(struct platform_device *pdev)
 
        if ((err = sonic_probe1(dev)))
                goto out;
+
+       pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+               dev->base_addr, dev->dev_addr, dev->irq);
+
+       sonic_msg_init(dev);
+
        if ((err = register_netdev(dev)))
                goto out1;
 
-       printk("%s: SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", dev->name,
-              dev->base_addr, dev->dev_addr, dev->irq);
-
        return 0;
 
 out1:
@@ -290,8 +278,6 @@ int xtsonic_probe(struct platform_device *pdev)
 }
 
 MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "xtsonic debug level (1-4)");
 
 #include "sonic.c"
 
index 6f546e869d8d69fd17c7eaeeec579d1134d3364d..00f41c145d4d01674d146fe1eda41a346b3cc5a1 100644 (file)
@@ -2480,7 +2480,10 @@ int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto)
        if (rc)
                return rc;
 
-       /* Free Task CXT */
+       /* Free Task CXT ( Intentionally RoCE as task-id is shared between
+        * RoCE and iWARP )
+        */
+       proto = PROTOCOLID_ROCE;
        rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_TASK, 0,
                                    qed_cxt_get_proto_tid_count(p_hwfn, proto));
        if (rc)
index 553a6d17260ed3c849140041e266420a60aa4cd0..cdb3eec0f68c3b027de171964101bb3190d5d798 100644 (file)
@@ -298,8 +298,8 @@ static void qed_init_qm_params(struct qed_hwfn *p_hwfn)
        qm_info->start_vport = (u8) RESC_START(p_hwfn, QED_VPORT);
 
        /* rate limiting and weighted fair queueing are always enabled */
-       qm_info->vport_rl_en = 1;
-       qm_info->vport_wfq_en = 1;
+       qm_info->vport_rl_en = true;
+       qm_info->vport_wfq_en = true;
 
        /* TC config is different for AH 4 port */
        four_port = p_hwfn->cdev->num_ports_in_engine == MAX_NUM_PORTS_K2;
@@ -1276,9 +1276,9 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 
        if (p_hwfn->mcp_info) {
                if (p_hwfn->mcp_info->func_info.bandwidth_max)
-                       qm_info->pf_rl_en = 1;
+                       qm_info->pf_rl_en = true;
                if (p_hwfn->mcp_info->func_info.bandwidth_min)
-                       qm_info->pf_wfq_en = 1;
+                       qm_info->pf_wfq_en = true;
        }
 
        memset(&params, 0, sizeof(params));
@@ -1630,7 +1630,7 @@ static int qed_vf_start(struct qed_hwfn *p_hwfn,
                qed_vf_pf_tunnel_param_update(p_hwfn, p_params->p_tunn);
        }
 
-       p_hwfn->b_int_enabled = 1;
+       p_hwfn->b_int_enabled = true;
 
        return 0;
 }
index ca4a81dc1ace685f4bb1cda85dc0e85746b4583a..69051e98aff9629da5f76d58e0040d8acd7cac1e 100644 (file)
@@ -1703,6 +1703,13 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
        iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen);
 
        if (eth_type == ETH_P_IP) {
+               if (iph->protocol != IPPROTO_TCP) {
+                       DP_NOTICE(p_hwfn,
+                                 "Unexpected ip protocol on ll2 %x\n",
+                                 iph->protocol);
+                       return -EINVAL;
+               }
+
                cm_info->local_ip[0] = ntohl(iph->daddr);
                cm_info->remote_ip[0] = ntohl(iph->saddr);
                cm_info->ip_version = TCP_IPV4;
@@ -1711,6 +1718,14 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
                *payload_len = ntohs(iph->tot_len) - ip_hlen;
        } else if (eth_type == ETH_P_IPV6) {
                ip6h = (struct ipv6hdr *)iph;
+
+               if (ip6h->nexthdr != IPPROTO_TCP) {
+                       DP_NOTICE(p_hwfn,
+                                 "Unexpected ip protocol on ll2 %x\n",
+                                 iph->protocol);
+                       return -EINVAL;
+               }
+
                for (i = 0; i < 4; i++) {
                        cm_info->local_ip[i] =
                            ntohl(ip6h->daddr.in6_u.u6_addr32[i]);
@@ -1784,7 +1799,7 @@ enum qed_iwarp_mpa_pkt_type {
 /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
 #define QED_IWARP_MAX_BDS_PER_FPDU 3
 
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
        "QED_IWARP_MPA_PKT_PACKED",
        "QED_IWARP_MPA_PKT_PARTIAL",
        "QED_IWARP_MPA_PKT_UNALIGNED"
@@ -1928,8 +1943,8 @@ qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn,
                /* Missing lower byte is now available */
                mpa_len = fpdu->fpdu_length | *mpa_data;
                fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len);
-               fpdu->mpa_frag_len = fpdu->fpdu_length;
                /* one byte of hdr */
+               fpdu->mpa_frag_len = 1;
                fpdu->incomplete_bytes = fpdu->fpdu_length - 1;
                DP_VERBOSE(p_hwfn,
                           QED_MSG_RDMA,
index 5d040b873137d0917637b6df42eddc4e0e031595..a411f9c702a16ae6963aa5c7eda112cc5a72404d 100644 (file)
@@ -379,6 +379,7 @@ static void qed_rdma_free(struct qed_hwfn *p_hwfn)
        DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n");
 
        qed_rdma_free_reserved_lkey(p_hwfn);
+       qed_cxt_free_proto_ilt(p_hwfn, p_hwfn->p_rdma_info->proto);
        qed_rdma_resc_free(p_hwfn);
 }
 
index 2db70eabddfec1edfe38b524233c3baaa9f883f5..a01e7d6e5442f079e9006811b82b4feb02dc23bc 100644 (file)
@@ -288,7 +288,7 @@ int __init qede_init(void)
        }
 
        /* Must register notifier before pci ops, since we might miss
-        * interface rename after pci probe and netdev registeration.
+        * interface rename after pci probe and netdev registration.
         */
        ret = register_netdevice_notifier(&qede_netdev_notifier);
        if (ret) {
@@ -988,7 +988,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
        if (rc)
                goto err3;
 
-       /* Prepare the lock prior to the registeration of the netdev,
+       /* Prepare the lock prior to the registration of the netdev,
         * as once it's registered we might reach flows requiring it
         * [it's even possible to reach a flow needing it directly
         * from there, although it's unlikely].
@@ -2067,8 +2067,6 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
        link_params.link_up = true;
        edev->ops->common->set_link(edev->cdev, &link_params);
 
-       qede_rdma_dev_event_open(edev);
-
        edev->state = QEDE_STATE_OPEN;
 
        DP_INFO(edev, "Ending successfully qede load\n");
@@ -2169,12 +2167,14 @@ static void qede_link_update(void *dev, struct qed_link_output *link)
                        DP_NOTICE(edev, "Link is up\n");
                        netif_tx_start_all_queues(edev->ndev);
                        netif_carrier_on(edev->ndev);
+                       qede_rdma_dev_event_open(edev);
                }
        } else {
                if (netif_carrier_ok(edev->ndev)) {
                        DP_NOTICE(edev, "Link is down\n");
                        netif_tx_disable(edev->ndev);
                        netif_carrier_off(edev->ndev);
+                       qede_rdma_dev_event_close(edev);
                }
        }
 }
index 9b2280badaf77666ceab5cf0409f484ed08719b8..02adb513f4756cb58c423936213bdcb4158d1dfa 100644 (file)
@@ -485,7 +485,7 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc)
        ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev);
        if (IS_ERR(ptp->clock)) {
                rc = -EINVAL;
-               DP_ERR(edev, "PTP clock registeration failed\n");
+               DP_ERR(edev, "PTP clock registration failed\n");
                goto err2;
        }
 
index 9cbb27263742bf0506684bd2e76d517037217475..d5a32b7c7dc5a4d97c89ba9d33ca769e51c00daf 100644 (file)
@@ -1194,9 +1194,9 @@ void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q)
        while (tx_q->tpd.consume_idx != hw_consume_idx) {
                tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx);
                if (tpbuf->dma_addr) {
-                       dma_unmap_single(adpt->netdev->dev.parent,
-                                        tpbuf->dma_addr, tpbuf->length,
-                                        DMA_TO_DEVICE);
+                       dma_unmap_page(adpt->netdev->dev.parent,
+                                      tpbuf->dma_addr, tpbuf->length,
+                                      DMA_TO_DEVICE);
                        tpbuf->dma_addr = 0;
                }
 
@@ -1353,9 +1353,11 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt,
 
                tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
                tpbuf->length = mapped_len;
-               tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
-                                                skb->data, tpbuf->length,
-                                                DMA_TO_DEVICE);
+               tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent,
+                                              virt_to_page(skb->data),
+                                              offset_in_page(skb->data),
+                                              tpbuf->length,
+                                              DMA_TO_DEVICE);
                ret = dma_mapping_error(adpt->netdev->dev.parent,
                                        tpbuf->dma_addr);
                if (ret)
@@ -1371,9 +1373,12 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt,
        if (mapped_len < len) {
                tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
                tpbuf->length = len - mapped_len;
-               tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
-                                                skb->data + mapped_len,
-                                                tpbuf->length, DMA_TO_DEVICE);
+               tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent,
+                                              virt_to_page(skb->data +
+                                                           mapped_len),
+                                              offset_in_page(skb->data +
+                                                             mapped_len),
+                                              tpbuf->length, DMA_TO_DEVICE);
                ret = dma_mapping_error(adpt->netdev->dev.parent,
                                        tpbuf->dma_addr);
                if (ret)
index 9c236298fe2125cf26afef756cfd5f3f7250098b..5803cd6db406c7f9c5426ceb87bf062d4f0434fb 100644 (file)
@@ -705,7 +705,6 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev)
                tskb = skb_copy_expand(skb, QCAFRM_HEADER_LEN,
                                       QCAFRM_FOOTER_LEN + pad_len, GFP_ATOMIC);
                if (!tskb) {
-                       netdev_dbg(qca->net_dev, "could not allocate tx_buff\n");
                        qca->stats.out_of_mem++;
                        return NETDEV_TX_BUSY;
                }
index c4949183eef3f0654cf8e5855ba6b85bce6b35f3..38d9356ebcc4cffbc8b73b2e9998a1c2a2937e55 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
 
 /* Local Definitions and Declarations */
 
+static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = {
+       [IFLA_RMNET_MUX_ID]     = { .type = NLA_U16 },
+       [IFLA_RMNET_FLAGS]      = { .len = sizeof(struct ifla_rmnet_flags) },
+};
+
 static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
 {
        return rcu_access_pointer(real_dev->rx_handler) == rmnet_rx_handler;
@@ -131,7 +136,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
                         struct nlattr *tb[], struct nlattr *data[],
                         struct netlink_ext_ack *extack)
 {
-       u32 data_format = RMNET_INGRESS_FORMAT_DEAGGREGATION;
+       u32 data_format = RMNET_FLAGS_INGRESS_DEAGGREGATION;
        struct net_device *real_dev;
        int mode = RMNET_EPMODE_VND;
        struct rmnet_endpoint *ep;
@@ -143,14 +148,14 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
        if (!real_dev || !dev)
                return -ENODEV;
 
-       if (!data[IFLA_VLAN_ID])
+       if (!data[IFLA_RMNET_MUX_ID])
                return -EINVAL;
 
        ep = kzalloc(sizeof(*ep), GFP_ATOMIC);
        if (!ep)
                return -ENOMEM;
 
-       mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+       mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 
        err = rmnet_register_real_device(real_dev);
        if (err)
@@ -165,10 +170,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 
        hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
 
-       if (data[IFLA_VLAN_FLAGS]) {
-               struct ifla_vlan_flags *flags;
+       if (data[IFLA_RMNET_FLAGS]) {
+               struct ifla_rmnet_flags *flags;
 
-               flags = nla_data(data[IFLA_VLAN_FLAGS]);
+               flags = nla_data(data[IFLA_RMNET_FLAGS]);
                data_format = flags->flags & flags->mask;
        }
 
@@ -276,10 +281,10 @@ static int rmnet_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
 {
        u16 mux_id;
 
-       if (!data || !data[IFLA_VLAN_ID])
+       if (!data || !data[IFLA_RMNET_MUX_ID])
                return -EINVAL;
 
-       mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+       mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
        if (mux_id > (RMNET_MAX_LOGICAL_EP - 1))
                return -ERANGE;
 
@@ -304,8 +309,8 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 
        port = rmnet_get_port_rtnl(real_dev);
 
-       if (data[IFLA_VLAN_ID]) {
-               mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+       if (data[IFLA_RMNET_MUX_ID]) {
+               mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
                ep = rmnet_get_endpoint(port, priv->mux_id);
 
                hlist_del_init_rcu(&ep->hlnode);
@@ -315,10 +320,10 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
                priv->mux_id = mux_id;
        }
 
-       if (data[IFLA_VLAN_FLAGS]) {
-               struct ifla_vlan_flags *flags;
+       if (data[IFLA_RMNET_FLAGS]) {
+               struct ifla_rmnet_flags *flags;
 
-               flags = nla_data(data[IFLA_VLAN_FLAGS]);
+               flags = nla_data(data[IFLA_RMNET_FLAGS]);
                port->data_format = flags->flags & flags->mask;
        }
 
@@ -327,13 +332,45 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 
 static size_t rmnet_get_size(const struct net_device *dev)
 {
-       return nla_total_size(2) /* IFLA_VLAN_ID */ +
-              nla_total_size(sizeof(struct ifla_vlan_flags)); /* IFLA_VLAN_FLAGS */
+       return
+               /* IFLA_RMNET_MUX_ID */
+               nla_total_size(2) +
+               /* IFLA_RMNET_FLAGS */
+               nla_total_size(sizeof(struct ifla_rmnet_flags));
+}
+
+static int rmnet_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+       struct rmnet_priv *priv = netdev_priv(dev);
+       struct net_device *real_dev;
+       struct ifla_rmnet_flags f;
+       struct rmnet_port *port;
+
+       real_dev = priv->real_dev;
+
+       if (!rmnet_is_real_dev_registered(real_dev))
+               return -ENODEV;
+
+       if (nla_put_u16(skb, IFLA_RMNET_MUX_ID, priv->mux_id))
+               goto nla_put_failure;
+
+       port = rmnet_get_port_rtnl(real_dev);
+
+       f.flags = port->data_format;
+       f.mask  = ~0;
+
+       if (nla_put(skb, IFLA_RMNET_FLAGS, sizeof(f), &f))
+               goto nla_put_failure;
+
+       return 0;
+
+nla_put_failure:
+       return -EMSGSIZE;
 }
 
 struct rtnl_link_ops rmnet_link_ops __read_mostly = {
        .kind           = "rmnet",
-       .maxtype        = __IFLA_VLAN_MAX,
+       .maxtype        = __IFLA_RMNET_MAX,
        .priv_size      = sizeof(struct rmnet_priv),
        .setup          = rmnet_vnd_setup,
        .validate       = rmnet_rtnl_validate,
@@ -341,6 +378,8 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = {
        .dellink        = rmnet_dellink,
        .get_size       = rmnet_get_size,
        .changelink     = rmnet_changelink,
+       .policy         = rmnet_policy,
+       .fill_info      = rmnet_fill_info,
 };
 
 /* Needs either rcu_read_lock() or rtnl lock */
index 00e4634100d35f197b7c8c301b93400ea4981eb8..0b5b5da801988324d687752b7fabb14e5487da0c 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
index 601edec28c5f84fd3934a03eecd2fb6aaeba86bf..6fcd586e980483ef8480dea17eeba7219495a5a7 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -70,7 +70,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
        u8 mux_id;
 
        if (RMNET_MAP_GET_CD_BIT(skb)) {
-               if (port->data_format & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
+               if (port->data_format & RMNET_FLAGS_INGRESS_MAP_COMMANDS)
                        return rmnet_map_command(skb, port);
 
                goto free_skb;
@@ -93,7 +93,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
        skb_pull(skb, sizeof(struct rmnet_map_header));
        rmnet_set_skb_proto(skb);
 
-       if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+       if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
                if (!rmnet_map_checksum_downlink_packet(skb, len + pad))
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
@@ -121,7 +121,7 @@ rmnet_map_ingress_handler(struct sk_buff *skb,
                skb_push(skb, ETH_HLEN);
        }
 
-       if (port->data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
+       if (port->data_format & RMNET_FLAGS_INGRESS_DEAGGREGATION) {
                while ((skbn = rmnet_map_deaggregate(skb, port)) != NULL)
                        __rmnet_map_ingress_handler(skbn, port);
 
@@ -141,7 +141,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
        additional_header_len = 0;
        required_headroom = sizeof(struct rmnet_map_header);
 
-       if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4) {
+       if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4) {
                additional_header_len = sizeof(struct rmnet_map_ul_csum_header);
                required_headroom += additional_header_len;
        }
@@ -151,7 +151,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
                        goto fail;
        }
 
-       if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4)
+       if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4)
                rmnet_map_checksum_uplink_packet(skb, orig_dev);
 
        map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
index 6ce31e29136d4bf79ce6425fdb6cf1574462dd6d..884f1f52dcc25e88713a28978bb9bdaa4bc3a320 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -23,8 +23,8 @@ struct rmnet_map_control_command {
                struct {
                        u16 ip_family:2;
                        u16 reserved:14;
-                       u16 flow_control_seq_num;
-                       u32 qos_id;
+                       __be16 flow_control_seq_num;
+                       __be32 qos_id;
                } flow_control;
                u8 data[0];
        };
@@ -44,7 +44,7 @@ struct rmnet_map_header {
        u8  reserved_bit:1;
        u8  cd_bit:1;
        u8  mux_id;
-       u16 pkt_len;
+       __be16 pkt_len;
 }  __aligned(1);
 
 struct rmnet_map_dl_csum_trailer {
index b0dbca070c008d386699824ce72a6d6f4c0e2d73..78fdad0c6f76b1358906f99f095cd9abb2a27562 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -69,7 +69,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
        struct rmnet_map_control_command *cmd;
        int xmit_status;
 
-       if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+       if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
                if (skb->len < sizeof(struct rmnet_map_header) +
                    RMNET_MAP_GET_LENGTH(skb) +
                    sizeof(struct rmnet_map_dl_csum_trailer)) {
index c74a6c56d315cfa99a4d91f1849526c18f2f2492..a6ea09416f8ddac418da84aea6dc3ca2ce2c4de5 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -309,7 +309,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
        maph = (struct rmnet_map_header *)skb->data;
        packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header);
 
-       if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4)
+       if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4)
                packet_len += sizeof(struct rmnet_map_dl_csum_trailer);
 
        if (((int)skb->len - (int)packet_len) < 0)
@@ -323,7 +323,6 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
        if (!skbn)
                return NULL;
 
-       skbn->dev = skb->dev;
        skb_reserve(skbn, RMNET_MAP_DEAGGR_HEADROOM);
        skb_put(skbn, packet_len);
        memcpy(skbn->data, skb->data, packet_len);
index de0143eaa05ab7fb6ca7ec93a8b8e8d7700eb88b..b9cc4f85f2299de7661c5ee337c96fa410c31b5e 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
 #define RMNET_NEEDED_HEADROOM      16
 #define RMNET_TX_QUEUE_LEN         1000
 
-/* Constants */
-#define RMNET_INGRESS_FORMAT_DEAGGREGATION      BIT(0)
-#define RMNET_INGRESS_FORMAT_MAP_COMMANDS       BIT(1)
-#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4        BIT(2)
-#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4         BIT(3)
-
 /* Replace skb->dev to a virtual rmnet device and pass up the stack */
 #define RMNET_EPMODE_VND (1)
 /* Pass the frame directly to another device with dev_queue_xmit() */
index 346d310914df17791018108d5ddd67715b04ce92..2ea16a088de8731cc96a2db88c844fa1ee0810a0 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
index 96db3283eecf7344b80bafbf404b55aea13b90f9..630409e0337fbaf7b98bb23e578c85b2aceae30b 100644 (file)
@@ -99,12 +99,12 @@ static const int multicast_filter_limit = 32;
 #define RTL8169_PHY_TIMEOUT    (10*HZ)
 
 /* write/read MMIO register */
-#define RTL_W8(reg, val8)      writeb ((val8), ioaddr + (reg))
-#define RTL_W16(reg, val16)    writew ((val16), ioaddr + (reg))
-#define RTL_W32(reg, val32)    writel ((val32), ioaddr + (reg))
-#define RTL_R8(reg)            readb (ioaddr + (reg))
-#define RTL_R16(reg)           readw (ioaddr + (reg))
-#define RTL_R32(reg)           readl (ioaddr + (reg))
+#define RTL_W8(tp, reg, val8)  writeb((val8), tp->mmio_addr + (reg))
+#define RTL_W16(tp, reg, val16)        writew((val16), tp->mmio_addr + (reg))
+#define RTL_W32(tp, reg, val32)        writel((val32), tp->mmio_addr + (reg))
+#define RTL_R8(tp, reg)                readb(tp->mmio_addr + (reg))
+#define RTL_R16(tp, reg)               readw(tp->mmio_addr + (reg))
+#define RTL_R32(tp, reg)               readl(tp->mmio_addr + (reg))
 
 enum mac_version {
        RTL_GIGA_MAC_VER_01 = 0,
@@ -735,11 +735,6 @@ struct ring_info {
        u8              __pad[sizeof(void *) - sizeof(u32)];
 };
 
-enum features {
-       RTL_FEATURE_MSI         = (1 << 0),
-       RTL_FEATURE_GMII        = (1 << 1),
-};
-
 struct rtl8169_counters {
        __le64  tx_packets;
        __le64  rx_packets;
@@ -828,7 +823,7 @@ struct rtl8169_private {
        void (*phy_reset_enable)(struct rtl8169_private *tp);
        void (*hw_start)(struct net_device *);
        unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
-       unsigned int (*link_ok)(void __iomem *);
+       unsigned int (*link_ok)(struct rtl8169_private *tp);
        int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd);
        bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *);
 
@@ -892,6 +887,11 @@ MODULE_FIRMWARE(FIRMWARE_8168H_2);
 MODULE_FIRMWARE(FIRMWARE_8107E_1);
 MODULE_FIRMWARE(FIRMWARE_8107E_2);
 
+static inline struct device *tp_to_dev(struct rtl8169_private *tp)
+{
+       return &tp->pci_dev->dev;
+}
+
 static void rtl_lock_work(struct rtl8169_private *tp)
 {
        mutex_lock(&tp->wk.mutex);
@@ -902,9 +902,9 @@ static void rtl_unlock_work(struct rtl8169_private *tp)
        mutex_unlock(&tp->wk.mutex);
 }
 
-static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force)
+static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
 {
-       pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
+       pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
                                           PCI_EXP_DEVCTL_READRQ, force);
 }
 
@@ -983,56 +983,46 @@ static bool rtl_ocp_reg_failure(struct rtl8169_private *tp, u32 reg)
 
 DECLARE_RTL_COND(rtl_ocp_gphy_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(GPHY_OCP) & OCPAR_FLAG;
+       return RTL_R32(tp, GPHY_OCP) & OCPAR_FLAG;
 }
 
 static void r8168_phy_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_ocp_reg_failure(tp, reg))
                return;
 
-       RTL_W32(GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
+       RTL_W32(tp, GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
 
        rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
 }
 
 static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_ocp_reg_failure(tp, reg))
                return 0;
 
-       RTL_W32(GPHY_OCP, reg << 15);
+       RTL_W32(tp, GPHY_OCP, reg << 15);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
-               (RTL_R32(GPHY_OCP) & 0xffff) : ~0;
+               (RTL_R32(tp, GPHY_OCP) & 0xffff) : ~0;
 }
 
 static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_ocp_reg_failure(tp, reg))
                return;
 
-       RTL_W32(OCPDR, OCPAR_FLAG | (reg << 15) | data);
+       RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data);
 }
 
 static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_ocp_reg_failure(tp, reg))
                return 0;
 
-       RTL_W32(OCPDR, reg << 15);
+       RTL_W32(tp, OCPDR, reg << 15);
 
-       return RTL_R32(OCPDR);
+       return RTL_R32(tp, OCPDR);
 }
 
 #define OCP_STD_PHY_BASE       0xa400
@@ -1075,16 +1065,12 @@ static int mac_mcu_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_phyar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(PHYAR) & 0x80000000;
+       return RTL_R32(tp, PHYAR) & 0x80000000;
 }
 
 static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
+       RTL_W32(tp, PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
 
        rtl_udelay_loop_wait_low(tp, &rtl_phyar_cond, 25, 20);
        /*
@@ -1096,13 +1082,12 @@ static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        int value;
 
-       RTL_W32(PHYAR, 0x0 | (reg & 0x1f) << 16);
+       RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16);
 
        value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
-               RTL_R32(PHYAR) & 0xffff : ~0;
+               RTL_R32(tp, PHYAR) & 0xffff : ~0;
 
        /*
         * According to hardware specs a 20us delay is required after read
@@ -1115,18 +1100,14 @@ static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_ocpar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(OCPAR) & OCPAR_FLAG;
+       return RTL_R32(tp, OCPAR) & OCPAR_FLAG;
 }
 
 static void r8168dp_1_mdio_access(struct rtl8169_private *tp, int reg, u32 data)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
-       RTL_W32(OCPAR, OCPAR_GPHY_WRITE_CMD);
-       RTL_W32(EPHY_RXER_NUM, 0);
+       RTL_W32(tp, OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
+       RTL_W32(tp, OCPAR, OCPAR_GPHY_WRITE_CMD);
+       RTL_W32(tp, EPHY_RXER_NUM, 0);
 
        rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 1000, 100);
 }
@@ -1139,51 +1120,46 @@ static void r8168dp_1_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        r8168dp_1_mdio_access(tp, reg, OCPDR_READ_CMD);
 
        mdelay(1);
-       RTL_W32(OCPAR, OCPAR_GPHY_READ_CMD);
-       RTL_W32(EPHY_RXER_NUM, 0);
+       RTL_W32(tp, OCPAR, OCPAR_GPHY_READ_CMD);
+       RTL_W32(tp, EPHY_RXER_NUM, 0);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
-               RTL_R32(OCPDR) & OCPDR_DATA_MASK : ~0;
+               RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : ~0;
 }
 
 #define R8168DP_1_MDIO_ACCESS_BIT      0x00020000
 
-static void r8168dp_2_mdio_start(void __iomem *ioaddr)
+static void r8168dp_2_mdio_start(struct rtl8169_private *tp)
 {
-       RTL_W32(0xd0, RTL_R32(0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
+       RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
 }
 
-static void r8168dp_2_mdio_stop(void __iomem *ioaddr)
+static void r8168dp_2_mdio_stop(struct rtl8169_private *tp)
 {
-       RTL_W32(0xd0, RTL_R32(0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
+       RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
 }
 
 static void r8168dp_2_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       r8168dp_2_mdio_start(ioaddr);
+       r8168dp_2_mdio_start(tp);
 
        r8169_mdio_write(tp, reg, value);
 
-       r8168dp_2_mdio_stop(ioaddr);
+       r8168dp_2_mdio_stop(tp);
 }
 
 static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        int value;
 
-       r8168dp_2_mdio_start(ioaddr);
+       r8168dp_2_mdio_start(tp);
 
        value = r8169_mdio_read(tp, reg);
 
-       r8168dp_2_mdio_stop(ioaddr);
+       r8168dp_2_mdio_stop(tp);
 
        return value;
 }
@@ -1228,16 +1204,12 @@ static int rtl_mdio_read(struct net_device *dev, int phy_id, int location)
 
 DECLARE_RTL_COND(rtl_ephyar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(EPHYAR) & EPHYAR_FLAG;
+       return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG;
 }
 
 static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
+       RTL_W32(tp, EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
                (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
        rtl_udelay_loop_wait_low(tp, &rtl_ephyar_cond, 10, 100);
@@ -1247,41 +1219,33 @@ static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 
 static u16 rtl_ephy_read(struct rtl8169_private *tp, int reg_addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
+       RTL_W32(tp, EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_ephyar_cond, 10, 100) ?
-               RTL_R32(EPHYAR) & EPHYAR_DATA_MASK : ~0;
+               RTL_R32(tp, EPHYAR) & EPHYAR_DATA_MASK : ~0;
 }
 
 DECLARE_RTL_COND(rtl_eriar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(ERIAR) & ERIAR_FLAG;
+       return RTL_R32(tp, ERIAR) & ERIAR_FLAG;
 }
 
 static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
                          u32 val, int type)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        BUG_ON((addr & 3) || (mask == 0));
-       RTL_W32(ERIDR, val);
-       RTL_W32(ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
+       RTL_W32(tp, ERIDR, val);
+       RTL_W32(tp, ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
 
        rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100);
 }
 
 static u32 rtl_eri_read(struct rtl8169_private *tp, int addr, int type)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
+       RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ?
-               RTL_R32(ERIDR) : ~0;
+               RTL_R32(tp, ERIDR) : ~0;
 }
 
 static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
@@ -1295,11 +1259,9 @@ static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
 
 static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+       RTL_W32(tp, OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
        return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 100, 20) ?
-               RTL_R32(OCPDR) : ~0;
+               RTL_R32(tp, OCPDR) : ~0;
 }
 
 static u32 r8168ep_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
@@ -1327,10 +1289,8 @@ static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 static void r8168dp_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
                              u32 data)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(OCPDR, data);
-       RTL_W32(OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+       RTL_W32(tp, OCPDR, data);
+       RTL_W32(tp, OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
        rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 100, 20);
 }
 
@@ -1392,19 +1352,15 @@ DECLARE_RTL_COND(rtl_ep_ocp_read_cond)
 
 DECLARE_RTL_COND(rtl_ocp_tx_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R8(IBISR0) & 0x20;
+       return RTL_R8(tp, IBISR0) & 0x20;
 }
 
 static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01);
+       RTL_W8(tp, IBCR2, RTL_R8(tp, IBCR2) & ~0x01);
        rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
-       RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20);
-       RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01);
+       RTL_W8(tp, IBISR0, RTL_R8(tp, IBISR0) | 0x20);
+       RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
 }
 
 static void rtl8168dp_driver_start(struct rtl8169_private *tp)
@@ -1517,49 +1473,37 @@ static void rtl_write_exgmac_batch(struct rtl8169_private *tp,
 
 DECLARE_RTL_COND(rtl_efusear_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(EFUSEAR) & EFUSEAR_FLAG;
+       return RTL_R32(tp, EFUSEAR) & EFUSEAR_FLAG;
 }
 
 static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
+       RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_efusear_cond, 100, 300) ?
-               RTL_R32(EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
+               RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
 }
 
 static u16 rtl_get_events(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R16(IntrStatus);
+       return RTL_R16(tp, IntrStatus);
 }
 
 static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W16(IntrStatus, bits);
+       RTL_W16(tp, IntrStatus, bits);
        mmiowb();
 }
 
 static void rtl_irq_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W16(IntrMask, 0);
+       RTL_W16(tp, IntrMask, 0);
        mmiowb();
 }
 
 static void rtl_irq_enable(struct rtl8169_private *tp, u16 bits)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W16(IntrMask, bits);
+       RTL_W16(tp, IntrMask, bits);
 }
 
 #define RTL_EVENT_NAPI_RX      (RxOK | RxErr)
@@ -1573,18 +1517,14 @@ static void rtl_irq_enable_all(struct rtl8169_private *tp)
 
 static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        rtl_irq_disable(tp);
        rtl_ack_events(tp, RTL_EVENT_NAPI | tp->event_slow);
-       RTL_R8(ChipCmd);
+       RTL_R8(tp, ChipCmd);
 }
 
 static unsigned int rtl8169_tbi_reset_pending(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(TBICSR) & TBIReset;
+       return RTL_R32(tp, TBICSR) & TBIReset;
 }
 
 static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
@@ -1592,21 +1532,19 @@ static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
        return rtl_readphy(tp, MII_BMCR) & BMCR_RESET;
 }
 
-static unsigned int rtl8169_tbi_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_tbi_link_ok(struct rtl8169_private *tp)
 {
-       return RTL_R32(TBICSR) & TBILinkOk;
+       return RTL_R32(tp, TBICSR) & TBILinkOk;
 }
 
-static unsigned int rtl8169_xmii_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_xmii_link_ok(struct rtl8169_private *tp)
 {
-       return RTL_R8(PHYstatus) & LinkStatus;
+       return RTL_R8(tp, PHYstatus) & LinkStatus;
 }
 
 static void rtl8169_tbi_reset_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(TBICSR, RTL_R32(TBICSR) | TBIReset);
+       RTL_W32(tp, TBICSR, RTL_R32(tp, TBICSR) | TBIReset);
 }
 
 static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
@@ -1619,7 +1557,6 @@ static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
 
 static void rtl_link_chg_patch(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        struct net_device *dev = tp->dev;
 
        if (!netif_running(dev))
@@ -1627,12 +1564,12 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
            tp->mac_version == RTL_GIGA_MAC_VER_38) {
-               if (RTL_R8(PHYstatus) & _1000bpsF) {
+               if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
                        rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
                                      ERIAR_EXGMAC);
-               } else if (RTL_R8(PHYstatus) & _100bps) {
+               } else if (RTL_R8(tp, PHYstatus) & _100bps) {
                        rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1650,7 +1587,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
                             ERIAR_EXGMAC);
        } else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
                   tp->mac_version == RTL_GIGA_MAC_VER_36) {
-               if (RTL_R8(PHYstatus) & _1000bpsF) {
+               if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
                        rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1662,7 +1599,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
                                      ERIAR_EXGMAC);
                }
        } else if (tp->mac_version == RTL_GIGA_MAC_VER_37) {
-               if (RTL_R8(PHYstatus) & _10bps) {
+               if (RTL_R8(tp, PHYstatus) & _10bps) {
                        rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060,
@@ -1675,20 +1612,21 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 }
 
 static void rtl8169_check_link_status(struct net_device *dev,
-                                     struct rtl8169_private *tp,
-                                     void __iomem *ioaddr)
+                                     struct rtl8169_private *tp)
 {
-       if (tp->link_ok(ioaddr)) {
+       struct device *d = tp_to_dev(tp);
+
+       if (tp->link_ok(tp)) {
                rtl_link_chg_patch(tp);
                /* This is to cancel a scheduled suspend if there's one. */
-               pm_request_resume(&tp->pci_dev->dev);
+               pm_request_resume(d);
                netif_carrier_on(dev);
                if (net_ratelimit())
                        netif_info(tp, ifup, dev, "link up\n");
        } else {
                netif_carrier_off(dev);
                netif_info(tp, ifdown, dev, "link down\n");
-               pm_runtime_idle(&tp->pci_dev->dev);
+               pm_runtime_idle(d);
        }
 }
 
@@ -1696,15 +1634,14 @@ static void rtl8169_check_link_status(struct net_device *dev,
 
 static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        u8 options;
        u32 wolopts = 0;
 
-       options = RTL_R8(Config1);
+       options = RTL_R8(tp, Config1);
        if (!(options & PMEnable))
                return 0;
 
-       options = RTL_R8(Config3);
+       options = RTL_R8(tp, Config3);
        if (options & LinkUp)
                wolopts |= WAKE_PHY;
        switch (tp->mac_version) {
@@ -1734,7 +1671,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
                break;
        }
 
-       options = RTL_R8(Config5);
+       options = RTL_R8(tp, Config5);
        if (options & UWF)
                wolopts |= WAKE_UCAST;
        if (options & BWF)
@@ -1748,7 +1685,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       struct device *d = &tp->pci_dev->dev;
+       struct device *d = tp_to_dev(tp);
 
        pm_runtime_get_noresume(d);
 
@@ -1767,7 +1704,6 @@ static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        unsigned int i, tmp;
        static const struct {
                u32 opt;
@@ -1783,7 +1719,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
        };
        u8 options;
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_34:
@@ -1825,34 +1761,34 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
        }
 
        for (i = 0; i < tmp; i++) {
-               options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
+               options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
                if (wolopts & cfg[i].opt)
                        options |= cfg[i].mask;
-               RTL_W8(cfg[i].reg, options);
+               RTL_W8(tp, cfg[i].reg, options);
        }
 
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
-               options = RTL_R8(Config1) & ~PMEnable;
+               options = RTL_R8(tp, Config1) & ~PMEnable;
                if (wolopts)
                        options |= PMEnable;
-               RTL_W8(Config1, options);
+               RTL_W8(tp, Config1, options);
                break;
        default:
-               options = RTL_R8(Config2) & ~PME_SIGNAL;
+               options = RTL_R8(tp, Config2) & ~PME_SIGNAL;
                if (wolopts)
                        options |= PME_SIGNAL;
-               RTL_W8(Config2, options);
+               RTL_W8(tp, Config2, options);
                break;
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       struct device *d = &tp->pci_dev->dev;
+       struct device *d = tp_to_dev(tp);
 
        pm_runtime_get_noresume(d);
 
@@ -1865,7 +1801,7 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
        rtl_unlock_work(tp);
 
-       device_set_wakeup_enable(&tp->pci_dev->dev, wol->wolopts);
+       device_set_wakeup_enable(d, wol->wolopts);
 
        pm_runtime_put_noidle(d);
 
@@ -1901,16 +1837,15 @@ static int rtl8169_set_speed_tbi(struct net_device *dev,
                                 u8 autoneg, u16 speed, u8 duplex, u32 ignored)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        int ret = 0;
        u32 reg;
 
-       reg = RTL_R32(TBICSR);
+       reg = RTL_R32(tp, TBICSR);
        if ((autoneg == AUTONEG_DISABLE) && (speed == SPEED_1000) &&
            (duplex == DUPLEX_FULL)) {
-               RTL_W32(TBICSR, reg & ~(TBINwEnable | TBINwRestart));
+               RTL_W32(tp, TBICSR, reg & ~(TBINwEnable | TBINwRestart));
        } else if (autoneg == AUTONEG_ENABLE)
-               RTL_W32(TBICSR, reg | TBINwEnable | TBINwRestart);
+               RTL_W32(tp, TBICSR, reg | TBINwEnable | TBINwRestart);
        else {
                netif_warn(tp, link, dev,
                           "incorrect speed setting refused in TBI mode\n");
@@ -2035,16 +1970,15 @@ static void __rtl8169_set_features(struct net_device *dev,
                                   netdev_features_t features)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 rx_config;
 
-       rx_config = RTL_R32(RxConfig);
+       rx_config = RTL_R32(tp, RxConfig);
        if (features & NETIF_F_RXALL)
                rx_config |= (AcceptErr | AcceptRunt);
        else
                rx_config &= ~(AcceptErr | AcceptRunt);
 
-       RTL_W32(RxConfig, rx_config);
+       RTL_W32(tp, RxConfig, rx_config);
 
        if (features & NETIF_F_RXCSUM)
                tp->cp_cmd |= RxChkSum;
@@ -2056,10 +1990,10 @@ static void __rtl8169_set_features(struct net_device *dev,
        else
                tp->cp_cmd &= ~RxVlan;
 
-       tp->cp_cmd |= RTL_R16(CPlusCmd) & ~(RxVlan | RxChkSum);
+       tp->cp_cmd |= RTL_R16(tp, CPlusCmd) & ~(RxVlan | RxChkSum);
 
-       RTL_W16(CPlusCmd, tp->cp_cmd);
-       RTL_R16(CPlusCmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+       RTL_R16(tp, CPlusCmd);
 }
 
 static int rtl8169_set_features(struct net_device *dev,
@@ -2096,7 +2030,6 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
                                          struct ethtool_link_ksettings *cmd)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 status;
        u32 supported, advertising;
 
@@ -2104,7 +2037,7 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
                SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE;
        cmd->base.port = PORT_FIBRE;
 
-       status = RTL_R32(TBICSR);
+       status = RTL_R32(tp, TBICSR);
        advertising = (status & TBINwEnable) ?  ADVERTISED_Autoneg : 0;
        cmd->base.autoneg = !!(status & TBINwEnable);
 
@@ -2219,23 +2152,20 @@ static int rtl8169_get_sset_count(struct net_device *dev, int sset)
 
 DECLARE_RTL_COND(rtl_counters_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(CounterAddrLow) & (CounterReset | CounterDump);
+       return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump);
 }
 
 static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        dma_addr_t paddr = tp->counters_phys_addr;
        u32 cmd;
 
-       RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
-       RTL_R32(CounterAddrHigh);
+       RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32);
+       RTL_R32(tp, CounterAddrHigh);
        cmd = (u64)paddr & DMA_BIT_MASK(32);
-       RTL_W32(CounterAddrLow, cmd);
-       RTL_W32(CounterAddrLow, cmd | counter_cmd);
+       RTL_W32(tp, CounterAddrLow, cmd);
+       RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
 
        return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
 }
@@ -2257,13 +2187,12 @@ static bool rtl8169_reset_counters(struct net_device *dev)
 static bool rtl8169_update_counters(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
 
        /*
         * Some chips are unable to dump tally counters when the receiver
         * is disabled.
         */
-       if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0)
+       if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0)
                return true;
 
        return rtl8169_do_counters(dev, CounterDump);
@@ -2312,7 +2241,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev,
                                      struct ethtool_stats *stats, u64 *data)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       struct device *d = &tp->pci_dev->dev;
+       struct device *d = tp_to_dev(tp);
        struct rtl8169_counters *counters = tp->counters;
 
        ASSERT_RTNL();
@@ -2443,7 +2372,6 @@ static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
 static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        const struct rtl_coalesce_info *ci;
        const struct rtl_coalesce_scale *scale;
        struct {
@@ -2463,10 +2391,10 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
        if (IS_ERR(ci))
                return PTR_ERR(ci);
 
-       scale = &ci->scalev[RTL_R16(CPlusCmd) & 3];
+       scale = &ci->scalev[RTL_R16(tp, CPlusCmd) & 3];
 
        /* read IntrMitigate and adjust according to scale */
-       for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
+       for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
                *p->max_frames = (w & RTL_COALESCE_MASK) << 2;
                w >>= RTL_COALESCE_SHIFT;
                *p->usecs = w & RTL_COALESCE_MASK;
@@ -2513,7 +2441,6 @@ static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale(
 static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        const struct rtl_coalesce_scale *scale;
        struct {
                u32 frames;
@@ -2561,11 +2488,11 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
        rtl_lock_work(tp);
 
-       RTL_W16(IntrMitigate, swab16(w));
+       RTL_W16(tp, IntrMitigate, swab16(w));
 
        tp->cp_cmd = (tp->cp_cmd & ~3) | cp01;
-       RTL_W16(CPlusCmd, tp->cp_cmd);
-       RTL_R16(CPlusCmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+       RTL_R16(tp, CPlusCmd);
 
        rtl_unlock_work(tp);
 
@@ -2595,17 +2522,16 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 static void rtl8169_get_mac_version(struct rtl8169_private *tp,
                                    struct net_device *dev, u8 default_version)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        /*
         * The driver currently handles the 8168Bf and the 8168Be identically
         * but they can be identified more specifically through the test below
         * if needed:
         *
-        * (RTL_R32(TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
+        * (RTL_R32(tp, TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
         *
         * Same thing for the 8101Eb and the 8101Ec:
         *
-        * (RTL_R32(TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
+        * (RTL_R32(tp, TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
         */
        static const struct rtl_mac_info {
                u32 mask;
@@ -2703,7 +2629,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
        const struct rtl_mac_info *p = mac_info;
        u32 reg;
 
-       reg = RTL_R32(TxConfig);
+       reg = RTL_R32(tp, TxConfig);
        while ((reg & p->mask) != p->val)
                p++;
        tp->mac_version = p->mac_version;
@@ -4584,7 +4510,6 @@ static void rtl_hw_phy_config(struct net_device *dev)
 static void rtl_phy_work(struct rtl8169_private *tp)
 {
        struct timer_list *timer = &tp->timer;
-       void __iomem *ioaddr = tp->mmio_addr;
        unsigned long timeout = RTL8169_PHY_TIMEOUT;
 
        assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
@@ -4598,7 +4523,7 @@ static void rtl_phy_work(struct rtl8169_private *tp)
                goto out_mod_timer;
        }
 
-       if (tp->link_ok(ioaddr))
+       if (tp->link_ok(tp))
                return;
 
        netif_dbg(tp, link, tp->dev, "PHY reset until link up\n");
@@ -4636,21 +4561,17 @@ static void rtl8169_phy_reset(struct net_device *dev,
 
 static bool rtl_tbi_enabled(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
-           (RTL_R8(PHYstatus) & TBI_Enable);
+           (RTL_R8(tp, PHYstatus) & TBI_Enable);
 }
 
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        rtl_hw_phy_config(dev);
 
        if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
                dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-               RTL_W8(0x82, 0x01);
+               RTL_W8(tp, 0x82, 0x01);
        }
 
        pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
@@ -4660,7 +4581,7 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_02) {
                dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-               RTL_W8(0x82, 0x01);
+               RTL_W8(tp, 0x82, 0x01);
                dprintk("Set PHY Reg 0x0bh = 0x00h\n");
                rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0
        }
@@ -4680,22 +4601,20 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
 static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        rtl_lock_work(tp);
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-       RTL_W32(MAC4, addr[4] | addr[5] << 8);
-       RTL_R32(MAC4);
+       RTL_W32(tp, MAC4, addr[4] | addr[5] << 8);
+       RTL_R32(tp, MAC4);
 
-       RTL_W32(MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
-       RTL_R32(MAC0);
+       RTL_W32(tp, MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
+       RTL_R32(tp, MAC0);
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_34)
                rtl_rar_exgmac_set(tp, addr);
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
        rtl_unlock_work(tp);
 }
@@ -4703,13 +4622,12 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 static int rtl_set_mac_address(struct net_device *dev, void *p)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       struct device *d = &tp->pci_dev->dev;
-       struct sockaddr *addr = p;
-
-       if (!is_valid_ether_addr(addr->sa_data))
-               return -EADDRNOTAVAIL;
+       struct device *d = tp_to_dev(tp);
+       int ret;
 
-       memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+       ret = eth_mac_addr(dev, p);
+       if (ret)
+               return ret;
 
        pm_runtime_get_noresume(d);
 
@@ -4815,8 +4733,6 @@ static void rtl_speed_down(struct rtl8169_private *tp)
 
 static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_25:
        case RTL_GIGA_MAC_VER_26:
@@ -4840,7 +4756,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_49:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
-               RTL_W32(RxConfig, RTL_R32(RxConfig) |
+               RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
                        AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
                break;
        default:
@@ -4873,8 +4789,6 @@ static void r810x_phy_power_up(struct rtl8169_private *tp)
 
 static void r810x_pll_power_down(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (rtl_wol_pll_power_down(tp))
                return;
 
@@ -4889,15 +4803,13 @@ static void r810x_pll_power_down(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_16:
                break;
        default:
-               RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
                break;
        }
 }
 
 static void r810x_pll_power_up(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        r810x_phy_power_up(tp);
 
        switch (tp->mac_version) {
@@ -4910,10 +4822,10 @@ static void r810x_pll_power_up(struct rtl8169_private *tp)
                break;
        case RTL_GIGA_MAC_VER_47:
        case RTL_GIGA_MAC_VER_48:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
                break;
        default:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
                break;
        }
 }
@@ -4980,14 +4892,12 @@ static void r8168_phy_power_down(struct rtl8169_private *tp)
 
 static void r8168_pll_power_down(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        if (r8168_check_dash(tp))
                return;
 
        if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
             tp->mac_version == RTL_GIGA_MAC_VER_24) &&
-           (RTL_R16(CPlusCmd) & ASF)) {
+           (RTL_R16(tp, CPlusCmd) & ASF)) {
                return;
        }
 
@@ -5013,22 +4923,20 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_46:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
-               RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
                break;
        case RTL_GIGA_MAC_VER_40:
        case RTL_GIGA_MAC_VER_41:
        case RTL_GIGA_MAC_VER_49:
                rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0x00000000,
                             0xfc000000, ERIAR_EXGMAC);
-               RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
                break;
        }
 }
 
 static void r8168_pll_power_up(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_25:
        case RTL_GIGA_MAC_VER_26:
@@ -5037,19 +4945,19 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_31:
        case RTL_GIGA_MAC_VER_32:
        case RTL_GIGA_MAC_VER_33:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
                break;
        case RTL_GIGA_MAC_VER_44:
        case RTL_GIGA_MAC_VER_45:
        case RTL_GIGA_MAC_VER_46:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
                break;
        case RTL_GIGA_MAC_VER_40:
        case RTL_GIGA_MAC_VER_41:
        case RTL_GIGA_MAC_VER_49:
-               RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+               RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
                rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000,
                             0x00000000, ERIAR_EXGMAC);
                break;
@@ -5139,8 +5047,6 @@ static void rtl_init_pll_power_ops(struct rtl8169_private *tp)
 
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_01:
        case RTL_GIGA_MAC_VER_02:
@@ -5156,7 +5062,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_15:
        case RTL_GIGA_MAC_VER_16:
        case RTL_GIGA_MAC_VER_17:
-               RTL_W32(RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
+               RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
                break;
        case RTL_GIGA_MAC_VER_18:
        case RTL_GIGA_MAC_VER_19:
@@ -5167,7 +5073,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_24:
        case RTL_GIGA_MAC_VER_34:
        case RTL_GIGA_MAC_VER_35:
-               RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
+               RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
                break;
        case RTL_GIGA_MAC_VER_40:
        case RTL_GIGA_MAC_VER_41:
@@ -5181,10 +5087,10 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_49:
        case RTL_GIGA_MAC_VER_50:
        case RTL_GIGA_MAC_VER_51:
-               RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
+               RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
                break;
        default:
-               RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST);
+               RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
                break;
        }
 }
@@ -5196,102 +5102,82 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 
 static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
        rtl_generic_op(tp, tp->jumbo_ops.enable);
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
        rtl_generic_op(tp, tp->jumbo_ops.disable);
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-       RTL_W8(Config4, RTL_R8(Config4) | Jumbo_En1);
-       rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1);
+       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-       RTL_W8(Config4, RTL_R8(Config4) & ~Jumbo_En1);
-       rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
 }
 
 static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 }
 
 static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(MaxTxPacketSize, 0x3f);
-       RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-       RTL_W8(Config4, RTL_R8(Config4) | 0x01);
-       rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
+       RTL_W8(tp, MaxTxPacketSize, 0x3f);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
+       rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(MaxTxPacketSize, 0x0c);
-       RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-       RTL_W8(Config4, RTL_R8(Config4) & ~0x01);
-       rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       RTL_W8(tp, MaxTxPacketSize, 0x0c);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       rtl_tx_performance_tweak(tp->pci_dev,
+       rtl_tx_performance_tweak(tp,
                PCI_EXP_DEVCTL_READRQ_512B | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       rtl_tx_performance_tweak(tp->pci_dev,
+       rtl_tx_performance_tweak(tp,
                (0x5 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        r8168b_0_hw_jumbo_enable(tp);
 
-       RTL_W8(Config4, RTL_R8(Config4) | (1 << 0));
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) | (1 << 0));
 }
 
 static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        r8168b_0_hw_jumbo_disable(tp);
 
-       RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
@@ -5358,16 +5244,12 @@ static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_chipcmd_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R8(ChipCmd) & CmdReset;
+       return RTL_R8(tp, ChipCmd) & CmdReset;
 }
 
 static void rtl_hw_reset(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W8(ChipCmd, CmdReset);
+       RTL_W8(tp, ChipCmd, CmdReset);
 
        rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
@@ -5386,7 +5268,7 @@ static void rtl_request_uncached_firmware(struct rtl8169_private *tp)
        if (!rtl_fw)
                goto err_warn;
 
-       rc = request_firmware(&rtl_fw->fw, name, &tp->pci_dev->dev);
+       rc = request_firmware(&rtl_fw->fw, name, tp_to_dev(tp));
        if (rc < 0)
                goto err_free;
 
@@ -5418,29 +5300,21 @@ static void rtl_request_firmware(struct rtl8169_private *tp)
 
 static void rtl_rx_close(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(RxConfig, RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
+       RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
 }
 
 DECLARE_RTL_COND(rtl_npq_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R8(TxPoll) & NPQ;
+       return RTL_R8(tp, TxPoll) & NPQ;
 }
 
 DECLARE_RTL_COND(rtl_txcfg_empty_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(TxConfig) & TXCFG_EMPTY;
+       return RTL_R32(tp, TxConfig) & TXCFG_EMPTY;
 }
 
 static void rtl8169_hw_reset(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        /* Disable interrupts */
        rtl8169_irq_mask_and_ack(tp);
 
@@ -5467,10 +5341,10 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
                   tp->mac_version == RTL_GIGA_MAC_VER_49 ||
                   tp->mac_version == RTL_GIGA_MAC_VER_50 ||
                   tp->mac_version == RTL_GIGA_MAC_VER_51) {
-               RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+               RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
                rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
        } else {
-               RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+               RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
                udelay(100);
        }
 
@@ -5479,10 +5353,8 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 
 static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        /* Set DMA burst size and Interframe Gap Time */
-       RTL_W32(TxConfig, (TX_DMA_BURST << TxDMAShift) |
+       RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
                (InterFrameGap << TxInterFrameGapShift));
 }
 
@@ -5495,36 +5367,35 @@ static void rtl_hw_start(struct net_device *dev)
        rtl_irq_enable_all(tp);
 }
 
-static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp,
-                                        void __iomem *ioaddr)
+static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 {
        /*
         * Magic spell: some iop3xx ARM board needs the TxDescAddrHigh
         * register to be written before TxDescAddrLow to work.
         * Switching from MMIO to I/O access fixes the issue as well.
         */
-       RTL_W32(TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
-       RTL_W32(TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
-       RTL_W32(RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
-       RTL_W32(RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
+       RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
+       RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
+       RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
+       RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
 }
 
-static u16 rtl_rw_cpluscmd(void __iomem *ioaddr)
+static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp)
 {
        u16 cmd;
 
-       cmd = RTL_R16(CPlusCmd);
-       RTL_W16(CPlusCmd, cmd);
+       cmd = RTL_R16(tp, CPlusCmd);
+       RTL_W16(tp, CPlusCmd, cmd);
        return cmd;
 }
 
-static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
+static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz)
 {
        /* Low hurts. Let's disable the filtering. */
-       RTL_W16(RxMaxSize, rx_buf_sz + 1);
+       RTL_W16(tp, RxMaxSize, rx_buf_sz + 1);
 }
 
-static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
+static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
 {
        static const struct rtl_cfg2_info {
                u32 mac_version;
@@ -5540,10 +5411,10 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
        unsigned int i;
        u32 clk;
 
-       clk = RTL_R8(Config2) & PCI_Clock_66MHz;
+       clk = RTL_R8(tp, Config2) & PCI_Clock_66MHz;
        for (i = 0; i < ARRAY_SIZE(cfg2_info); i++, p++) {
                if ((p->mac_version == mac_version) && (p->clk == clk)) {
-                       RTL_W32(0x7c, p->val);
+                       RTL_W32(tp, 0x7c, p->val);
                        break;
                }
        }
@@ -5552,7 +5423,6 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
 static void rtl_set_rx_mode(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 mc_filter[2];       /* Multicast hash filter */
        int rx_mode;
        u32 tmp = 0;
@@ -5584,7 +5454,7 @@ static void rtl_set_rx_mode(struct net_device *dev)
        if (dev->features & NETIF_F_RXALL)
                rx_mode |= (AcceptErr | AcceptRunt);
 
-       tmp = (RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
+       tmp = (RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
 
        if (tp->mac_version > RTL_GIGA_MAC_VER_06) {
                u32 data = mc_filter[0];
@@ -5596,35 +5466,34 @@ static void rtl_set_rx_mode(struct net_device *dev)
        if (tp->mac_version == RTL_GIGA_MAC_VER_35)
                mc_filter[1] = mc_filter[0] = 0xffffffff;
 
-       RTL_W32(MAR0 + 4, mc_filter[1]);
-       RTL_W32(MAR0 + 0, mc_filter[0]);
+       RTL_W32(tp, MAR0 + 4, mc_filter[1]);
+       RTL_W32(tp, MAR0 + 0, mc_filter[0]);
 
-       RTL_W32(RxConfig, tmp);
+       RTL_W32(tp, RxConfig, tmp);
 }
 
 static void rtl_hw_start_8169(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_05) {
-               RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) | PCIMulRW);
+               RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW);
                pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08);
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
        if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
            tp->mac_version == RTL_GIGA_MAC_VER_02 ||
            tp->mac_version == RTL_GIGA_MAC_VER_03 ||
            tp->mac_version == RTL_GIGA_MAC_VER_04)
-               RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+               RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
        rtl_init_rxcfg(tp);
 
-       RTL_W8(EarlyTxThres, NoEarlyTx);
+       RTL_W8(tp, EarlyTxThres, NoEarlyTx);
 
-       rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+       rtl_set_rx_max_size(tp, rx_buf_sz);
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
            tp->mac_version == RTL_GIGA_MAC_VER_02 ||
@@ -5632,7 +5501,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
            tp->mac_version == RTL_GIGA_MAC_VER_04)
                rtl_set_rx_tx_config_registers(tp);
 
-       tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
+       tp->cp_cmd |= rtl_rw_cpluscmd(tp) | PCIMulRW;
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
            tp->mac_version == RTL_GIGA_MAC_VER_03) {
@@ -5641,37 +5510,37 @@ static void rtl_hw_start_8169(struct net_device *dev)
                tp->cp_cmd |= (1 << 14);
        }
 
-       RTL_W16(CPlusCmd, tp->cp_cmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-       rtl8169_set_magic_reg(ioaddr, tp->mac_version);
+       rtl8169_set_magic_reg(tp, tp->mac_version);
 
        /*
         * Undocumented corner. Supposedly:
         * (TxTimer << 12) | (TxPackets << 8) | (RxTimer << 4) | RxPackets
         */
-       RTL_W16(IntrMitigate, 0x0000);
+       RTL_W16(tp, IntrMitigate, 0x0000);
 
-       rtl_set_rx_tx_desc_registers(tp, ioaddr);
+       rtl_set_rx_tx_desc_registers(tp);
 
        if (tp->mac_version != RTL_GIGA_MAC_VER_01 &&
            tp->mac_version != RTL_GIGA_MAC_VER_02 &&
            tp->mac_version != RTL_GIGA_MAC_VER_03 &&
            tp->mac_version != RTL_GIGA_MAC_VER_04) {
-               RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+               RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
                rtl_set_rx_tx_config_registers(tp);
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
        /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
-       RTL_R8(IntrMask);
+       RTL_R8(tp, IntrMask);
 
-       RTL_W32(RxMissed, 0);
+       RTL_W32(tp, RxMissed, 0);
 
        rtl_set_rx_mode(dev);
 
        /* no early-rx interrupts */
-       RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
@@ -5705,17 +5574,13 @@ static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_csiar_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R32(CSIAR) & CSIAR_FLAG;
+       return RTL_R32(tp, CSIAR) & CSIAR_FLAG;
 }
 
 static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIDR, value);
-       RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+       RTL_W32(tp, CSIDR, value);
+       RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
        rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
@@ -5723,21 +5588,17 @@ static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8169_csi_read(struct rtl8169_private *tp, int addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) |
+       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-               RTL_R32(CSIDR) : ~0;
+               RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIDR, value);
-       RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+       RTL_W32(tp, CSIDR, value);
+       RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
                CSIAR_FUNC_NIC);
 
@@ -5746,21 +5607,17 @@ static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8402_csi_read(struct rtl8169_private *tp, int addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
+       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-               RTL_R32(CSIDR) : ~0;
+               RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIDR, value);
-       RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+       RTL_W32(tp, CSIDR, value);
+       RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
                CSIAR_FUNC_NIC2);
 
@@ -5769,13 +5626,11 @@ static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8411_csi_read(struct rtl8169_private *tp, int addr)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
+       RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
                CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
        return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-               RTL_R32(CSIDR) : ~0;
+               RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void rtl_init_csi_ops(struct rtl8169_private *tp)
@@ -5837,31 +5692,30 @@ static void rtl_ephy_init(struct rtl8169_private *tp, const struct ephy_info *e,
        }
 }
 
-static void rtl_disable_clock_request(struct pci_dev *pdev)
+static void rtl_disable_clock_request(struct rtl8169_private *tp)
 {
-       pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL,
+       pcie_capability_clear_word(tp->pci_dev, PCI_EXP_LNKCTL,
                                   PCI_EXP_LNKCTL_CLKREQ_EN);
 }
 
-static void rtl_enable_clock_request(struct pci_dev *pdev)
+static void rtl_enable_clock_request(struct rtl8169_private *tp)
 {
-       pcie_capability_set_word(pdev, PCI_EXP_LNKCTL,
+       pcie_capability_set_word(tp->pci_dev, PCI_EXP_LNKCTL,
                                 PCI_EXP_LNKCTL_CLKREQ_EN);
 }
 
 static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        u8 data;
 
-       data = RTL_R8(Config3);
+       data = RTL_R8(tp, Config3);
 
        if (enable)
                data |= Rdy_to_L23;
        else
                data &= ~Rdy_to_L23;
 
-       RTL_W8(Config3, data);
+       RTL_W8(tp, Config3, data);
 }
 
 #define R8168_CPCMD_QUIRK_MASK (\
@@ -5877,45 +5731,37 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
-
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 
        if (tp->dev->mtu <= ETH_DATA_LEN) {
-               rtl_tx_performance_tweak(pdev, (0x5 << MAX_READ_REQUEST_SHIFT) |
+               rtl_tx_performance_tweak(tp, (0x5 << MAX_READ_REQUEST_SHIFT) |
                                         PCI_EXP_DEVCTL_NOSNOOP_EN);
        }
 }
 
 static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        rtl_hw_start_8168bb(tp);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+       RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
-
-       RTL_W8(Config1, RTL_R8(Config1) | Speed_down);
+       RTL_W8(tp, Config1, RTL_R8(tp, Config1) | Speed_down);
 
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+               rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       rtl_disable_clock_request(pdev);
+       rtl_disable_clock_request(tp);
 
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
@@ -5937,42 +5783,35 @@ static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
-
        rtl_csi_access_enable_2(tp);
 
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+               rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
-
        rtl_csi_access_enable_2(tp);
 
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
        /* Magic. */
-       RTL_W8(DBG_REG, 0x20);
+       RTL_W8(tp, DBG_REG, 0x20);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+               rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168c_1[] = {
                { 0x02, 0x0800, 0x1000 },
                { 0x03, 0,      0x0002 },
@@ -5981,7 +5820,7 @@ static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 
        rtl_csi_access_enable_2(tp);
 
-       RTL_W8(DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
+       RTL_W8(tp, DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
 
        rtl_ephy_init(tp, e_info_8168c_1, ARRAY_SIZE(e_info_8168c_1));
 
@@ -6016,40 +5855,32 @@ static void rtl_hw_start_8168c_4(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
-
        rtl_csi_access_enable_2(tp);
 
-       rtl_disable_clock_request(pdev);
+       rtl_disable_clock_request(tp);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+               rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+       RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
-
        rtl_csi_access_enable_1(tp);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+               rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       rtl_disable_clock_request(pdev);
+       rtl_disable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
        static const struct ephy_info e_info_8168d_4[] = {
                { 0x0b, 0x0000, 0x0048 },
                { 0x19, 0x0020, 0x0050 },
@@ -6058,19 +5889,17 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
        rtl_csi_access_enable_1(tp);
 
-       rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
        rtl_ephy_init(tp, e_info_8168d_4, ARRAY_SIZE(e_info_8168d_4));
 
-       rtl_enable_clock_request(pdev);
+       rtl_enable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
        static const struct ephy_info e_info_8168e_1[] = {
                { 0x00, 0x0200, 0x0100 },
                { 0x00, 0x0000, 0x0004 },
@@ -6092,23 +5921,21 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
        rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1));
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+               rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       rtl_disable_clock_request(pdev);
+       rtl_disable_clock_request(tp);
 
        /* Reset tx FIFO pointer */
-       RTL_W32(MISC, RTL_R32(MISC) | TXPLA_RST);
-       RTL_W32(MISC, RTL_R32(MISC) & ~TXPLA_RST);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST);
 
-       RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
        static const struct ephy_info e_info_8168e_2[] = {
                { 0x09, 0x0000, 0x0080 },
                { 0x19, 0x0000, 0x0224 }
@@ -6119,7 +5946,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
        rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2));
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
-               rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+               rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -6130,29 +5957,26 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
        rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x10, 0x00, ERIAR_EXGMAC);
        rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-       rtl_disable_clock_request(pdev);
+       rtl_disable_clock_request(tp);
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-       RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-       RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-       RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
-
        rtl_csi_access_enable_2(tp);
 
-       rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -6165,20 +5989,19 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
        rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060, ERIAR_EXGMAC);
 
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-       rtl_disable_clock_request(pdev);
+       rtl_disable_clock_request(tp);
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-       RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
-       RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-       RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-       RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168f_1[] = {
                { 0x06, 0x00c0, 0x0020 },
                { 0x08, 0x0001, 0x0002 },
@@ -6193,7 +6016,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
        rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 }
 
 static void rtl_hw_start_8411(struct rtl8169_private *tp)
@@ -6215,10 +6038,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
-
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
        rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6227,20 +6047,20 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
        rtl_csi_access_enable_1(tp);
 
-       rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
        rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
        rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC);
 
-       RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
        rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
        rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
@@ -6250,7 +6070,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168g_1[] = {
                { 0x00, 0x0000, 0x0008 },
                { 0x0c, 0x37d0, 0x0820 },
@@ -6261,14 +6080,13 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
        rtl_hw_start_8168g(tp);
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
 }
 
 static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168g_2[] = {
                { 0x00, 0x0000, 0x0008 },
                { 0x0c, 0x3df0, 0x0200 },
@@ -6279,14 +6097,13 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
        rtl_hw_start_8168g(tp);
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168g_2, ARRAY_SIZE(e_info_8168g_2));
 }
 
 static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8411_2[] = {
                { 0x00, 0x0000, 0x0008 },
                { 0x0c, 0x3df0, 0x0200 },
@@ -6298,15 +6115,13 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
        rtl_hw_start_8168g(tp);
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2));
 }
 
 static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
        int rg_saw_cnt;
        u32 data;
        static const struct ephy_info e_info_8168h_1[] = {
@@ -6319,11 +6134,11 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
        rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6332,7 +6147,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
        rtl_csi_access_enable_1(tp);
 
-       rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
        rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
        rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6343,19 +6158,19 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
        rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-       RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-       RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+       RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
        rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
 
@@ -6403,12 +6218,9 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
-
        rtl8168ep_stop_cmac(tp);
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
        rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
@@ -6417,7 +6229,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
        rtl_csi_access_enable_1(tp);
 
-       rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
        rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
        rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6426,25 +6238,24 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
        rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-       RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-       RTL_W8(MaxTxPacketSize, EarlySize);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+       RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
        /* Adjust EEE LED frequency */
-       RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+       RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
        rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
        rtl_pcie_state_l2l3_enable(tp, false);
 }
 
 static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168ep_1[] = {
                { 0x00, 0xffff, 0x10ab },
                { 0x06, 0xffff, 0xf030 },
@@ -6454,8 +6265,8 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168ep_1, ARRAY_SIZE(e_info_8168ep_1));
 
        rtl_hw_start_8168ep(tp);
@@ -6463,7 +6274,6 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8168ep_2[] = {
                { 0x00, 0xffff, 0x10a3 },
                { 0x19, 0xffff, 0xfc00 },
@@ -6471,19 +6281,18 @@ static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168ep_2, ARRAY_SIZE(e_info_8168ep_2));
 
        rtl_hw_start_8168ep(tp);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-       RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+       RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 }
 
 static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 data;
        static const struct ephy_info e_info_8168ep_3[] = {
                { 0x00, 0xffff, 0x10a3 },
@@ -6493,14 +6302,14 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-       RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
        rtl_ephy_init(tp, e_info_8168ep_3, ARRAY_SIZE(e_info_8168ep_3));
 
        rtl_hw_start_8168ep(tp);
 
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-       RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+       RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
        data = r8168_mac_ocp_read(tp, 0xd3e2);
        data &= 0xf000;
@@ -6519,19 +6328,18 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 static void rtl_hw_start_8168(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+       rtl_set_rx_max_size(tp, rx_buf_sz);
 
-       tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
+       tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1;
 
-       RTL_W16(CPlusCmd, tp->cp_cmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-       RTL_W16(IntrMitigate, 0x5151);
+       RTL_W16(tp, IntrMitigate, 0x5151);
 
        /* Work around for RxFIFO overflow. */
        if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
@@ -6539,11 +6347,11 @@ static void rtl_hw_start_8168(struct net_device *dev)
                tp->event_slow &= ~RxOverflow;
        }
 
-       rtl_set_rx_tx_desc_registers(tp, ioaddr);
+       rtl_set_rx_tx_desc_registers(tp);
 
        rtl_set_rx_tx_config_registers(tp);
 
-       RTL_R8(IntrMask);
+       RTL_R8(tp, IntrMask);
 
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_11:
@@ -6649,13 +6457,13 @@ static void rtl_hw_start_8168(struct net_device *dev)
                break;
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-       RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+       RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
        rtl_set_rx_mode(dev);
 
-       RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 #define R810X_CPCMD_QUIRK_MASK (\
@@ -6671,8 +6479,6 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
        static const struct ephy_info e_info_8102e_1[] = {
                { 0x01, 0, 0x6e65 },
                { 0x02, 0, 0x091f },
@@ -6687,32 +6493,29 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
        rtl_csi_access_enable_2(tp);
 
-       RTL_W8(DBG_REG, FIX_NAK_1);
+       RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
-       rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(Config1,
+       RTL_W8(tp, Config1,
               LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-       cfg1 = RTL_R8(Config1);
+       cfg1 = RTL_R8(tp, Config1);
        if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
-               RTL_W8(Config1, cfg1 & ~LEDS0);
+               RTL_W8(tp, Config1, cfg1 & ~LEDS0);
 
        rtl_ephy_init(tp, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
 }
 
 static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct pci_dev *pdev = tp->pci_dev;
-
        rtl_csi_access_enable_2(tp);
 
-       rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-       RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
-       RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+       RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
+       RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 }
 
 static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
@@ -6724,7 +6527,6 @@ static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8105e_1[] = {
                { 0x07, 0, 0x4000 },
                { 0x19, 0, 0x0200 },
@@ -6737,13 +6539,13 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
        };
 
        /* Force LAN exit from ASPM if Rx/Tx are not idle */
-       RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+       RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
        /* Disable Early Tally Counter */
-       RTL_W32(FuncEvent, RTL_R32(FuncEvent) & ~0x010000);
+       RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) & ~0x010000);
 
-       RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-       RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
 
        rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
 
@@ -6758,7 +6560,6 @@ static void rtl_hw_start_8105e_2(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8402(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        static const struct ephy_info e_info_8402[] = {
                { 0x19, 0xffff, 0xff64 },
                { 0x1e, 0, 0x4000 }
@@ -6767,14 +6568,14 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
        rtl_csi_access_enable_2(tp);
 
        /* Force LAN exit from ASPM if Rx/Tx are not idle */
-       RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+       RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-       RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-       RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+       RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
        rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
 
-       rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+       rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
        rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00000002, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00000006, ERIAR_EXGMAC);
@@ -6789,14 +6590,12 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8106(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        /* Force LAN exit from ASPM if Rx/Tx are not idle */
-       RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+       RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-       RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
-       RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-       RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
+       RTL_W32(tp, MISC, (RTL_R32(tp, MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+       RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 
        rtl_pcie_state_l2l3_enable(tp, false);
 }
@@ -6804,7 +6603,6 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 static void rtl_hw_start_8101(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
 
        if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
@@ -6815,16 +6613,16 @@ static void rtl_hw_start_8101(struct net_device *dev)
                pcie_capability_set_word(pdev, PCI_EXP_DEVCTL,
                                         PCI_EXP_DEVCTL_NOSNOOP_EN);
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-       RTL_W8(MaxTxPacketSize, TxPacketMax);
+       RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-       rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+       rtl_set_rx_max_size(tp, rx_buf_sz);
 
        tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
-       RTL_W16(CPlusCmd, tp->cp_cmd);
+       RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-       rtl_set_rx_tx_desc_registers(tp, ioaddr);
+       rtl_set_rx_tx_desc_registers(tp);
 
        rtl_set_rx_tx_config_registers(tp);
 
@@ -6864,17 +6662,17 @@ static void rtl_hw_start_8101(struct net_device *dev)
                break;
        }
 
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-       RTL_W16(IntrMitigate, 0x0000);
+       RTL_W16(tp, IntrMitigate, 0x0000);
 
-       RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+       RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
        rtl_set_rx_mode(dev);
 
-       RTL_R8(IntrMask);
+       RTL_R8(tp, IntrMask);
 
-       RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+       RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -6901,7 +6699,7 @@ static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc)
 static void rtl8169_free_rx_databuff(struct rtl8169_private *tp,
                                     void **data_buff, struct RxDesc *desc)
 {
-       dma_unmap_single(&tp->pci_dev->dev, le64_to_cpu(desc->addr), rx_buf_sz,
+       dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), rx_buf_sz,
                         DMA_FROM_DEVICE);
 
        kfree(*data_buff);
@@ -6936,7 +6734,7 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 {
        void *data;
        dma_addr_t mapping;
-       struct device *d = &tp->pci_dev->dev;
+       struct device *d = tp_to_dev(tp);
        struct net_device *dev = tp->dev;
        int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 
@@ -7048,7 +6846,7 @@ static void rtl8169_tx_clear_range(struct rtl8169_private *tp, u32 start,
                if (len) {
                        struct sk_buff *skb = tx_skb->skb;
 
-                       rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
+                       rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
                                             tp->TxDescArray + entry);
                        if (skb) {
                                dev_consume_skb_any(skb);
@@ -7084,7 +6882,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
        napi_enable(&tp->napi);
        rtl_hw_start(dev);
        netif_wake_queue(dev);
-       rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+       rtl8169_check_link_status(dev, tp);
 }
 
 static void rtl8169_tx_timeout(struct net_device *dev)
@@ -7100,7 +6898,7 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb,
        struct skb_shared_info *info = skb_shinfo(skb);
        unsigned int cur_frag, entry;
        struct TxDesc *uninitialized_var(txd);
-       struct device *d = &tp->pci_dev->dev;
+       struct device *d = tp_to_dev(tp);
 
        entry = tp->cur_tx;
        for (cur_frag = 0; cur_frag < info->nr_frags; cur_frag++) {
@@ -7332,8 +7130,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
        struct rtl8169_private *tp = netdev_priv(dev);
        unsigned int entry = tp->cur_tx % NUM_TX_DESC;
        struct TxDesc *txd = tp->TxDescArray + entry;
-       void __iomem *ioaddr = tp->mmio_addr;
-       struct device *d = &tp->pci_dev->dev;
+       struct device *d = tp_to_dev(tp);
        dma_addr_t mapping;
        u32 status, len;
        u32 opts[2];
@@ -7392,7 +7189,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 
        tp->cur_tx += frags + 1;
 
-       RTL_W8(TxPoll, NPQ);
+       RTL_W8(tp, TxPoll, NPQ);
 
        mmiowb();
 
@@ -7463,11 +7260,9 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
 
        /* The infamous DAC f*ckup only happens at boot time */
        if ((tp->cp_cmd & PCIDAC) && !tp->cur_rx) {
-               void __iomem *ioaddr = tp->mmio_addr;
-
                netif_info(tp, intr, dev, "disabling PCI DAC\n");
                tp->cp_cmd &= ~PCIDAC;
-               RTL_W16(CPlusCmd, tp->cp_cmd);
+               RTL_W16(tp, CPlusCmd, tp->cp_cmd);
                dev->features &= ~NETIF_F_HIGHDMA;
        }
 
@@ -7499,7 +7294,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
                 */
                dma_rmb();
 
-               rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
+               rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
                                     tp->TxDescArray + entry);
                if (status & LastFrag) {
                        u64_stats_update_begin(&tp->tx_stats.syncp);
@@ -7533,11 +7328,8 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
                 * of start_xmit activity is detected (if it is not detected,
                 * it is slow enough). -- FR
                 */
-               if (tp->cur_tx != dirty_tx) {
-                       void __iomem *ioaddr = tp->mmio_addr;
-
-                       RTL_W8(TxPoll, NPQ);
-               }
+               if (tp->cur_tx != dirty_tx)
+                       RTL_W8(tp, TxPoll, NPQ);
        }
 }
 
@@ -7563,7 +7355,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
                                           dma_addr_t addr)
 {
        struct sk_buff *skb;
-       struct device *d = &tp->pci_dev->dev;
+       struct device *d = tp_to_dev(tp);
 
        data = rtl8169_align(data);
        dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
@@ -7718,7 +7510,7 @@ static void rtl_slow_event_work(struct rtl8169_private *tp)
                rtl8169_pcierr_interrupt(dev);
 
        if (status & LinkChg)
-               rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+               rtl8169_check_link_status(dev, tp);
 
        rtl_irq_enable_all(tp);
 }
@@ -7790,21 +7582,20 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
        return work_done;
 }
 
-static void rtl8169_rx_missed(struct net_device *dev, void __iomem *ioaddr)
+static void rtl8169_rx_missed(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
 
        if (tp->mac_version > RTL_GIGA_MAC_VER_06)
                return;
 
-       dev->stats.rx_missed_errors += (RTL_R32(RxMissed) & 0xffffff);
-       RTL_W32(RxMissed, 0);
+       dev->stats.rx_missed_errors += RTL_R32(tp, RxMissed) & 0xffffff;
+       RTL_W32(tp, RxMissed, 0);
 }
 
 static void rtl8169_down(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
 
        del_timer_sync(&tp->timer);
 
@@ -7817,7 +7608,7 @@ static void rtl8169_down(struct net_device *dev)
         * as netif_running is not true (rtl8169_interrupt, rtl8169_reset_task)
         * and napi is disabled (rtl8169_poll).
         */
-       rtl8169_rx_missed(dev, ioaddr);
+       rtl8169_rx_missed(dev);
 
        /* Give a racing hard_start_xmit a few cycles to complete. */
        synchronize_sched();
@@ -7847,7 +7638,7 @@ static int rtl8169_close(struct net_device *dev)
 
        cancel_work_sync(&tp->wk.work);
 
-       free_irq(pdev->irq, dev);
+       pci_free_irq(pdev, 0, dev);
 
        dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
                          tp->RxPhyAddr);
@@ -7866,14 +7657,13 @@ static void rtl8169_netpoll(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
 
-       rtl8169_interrupt(tp->pci_dev->irq, dev);
+       rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev);
 }
 #endif
 
 static int rtl_open(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
        int retval = -ENOMEM;
 
@@ -7903,9 +7693,8 @@ static int rtl_open(struct net_device *dev)
 
        rtl_request_firmware(tp);
 
-       retval = request_irq(pdev->irq, rtl8169_interrupt,
-                            (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
-                            dev->name, dev);
+       retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+                                dev->name);
        if (retval < 0)
                goto err_release_fw_2;
 
@@ -7933,7 +7722,7 @@ static int rtl_open(struct net_device *dev)
        tp->saved_wolopts = 0;
        pm_runtime_put_sync(&pdev->dev);
 
-       rtl8169_check_link_status(dev, tp, ioaddr);
+       rtl8169_check_link_status(dev, tp);
 out:
        return retval;
 
@@ -7957,7 +7746,6 @@ static void
 rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       void __iomem *ioaddr = tp->mmio_addr;
        struct pci_dev *pdev = tp->pci_dev;
        struct rtl8169_counters *counters = tp->counters;
        unsigned int start;
@@ -7965,7 +7753,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
        pm_runtime_get_noresume(&pdev->dev);
 
        if (netif_running(dev) && pm_runtime_active(&pdev->dev))
-               rtl8169_rx_missed(dev, ioaddr);
+               rtl8169_rx_missed(dev);
 
        do {
                start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp);
@@ -8088,7 +7876,7 @@ static int rtl8169_runtime_suspend(struct device *device)
        rtl8169_net_suspend(dev);
 
        /* Update counters before going runtime suspend */
-       rtl8169_rx_missed(dev, tp->mmio_addr);
+       rtl8169_rx_missed(dev);
        rtl8169_update_counters(dev);
 
        return 0;
@@ -8149,8 +7937,6 @@ static const struct dev_pm_ops rtl8169_pm_ops = {
 
 static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
        /* WoL fails with 8168b when the receiver is disabled. */
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_11:
@@ -8158,9 +7944,9 @@ static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
        case RTL_GIGA_MAC_VER_17:
                pci_clear_master(tp->pci_dev);
 
-               RTL_W8(ChipCmd, CmdRxEnb);
+               RTL_W8(tp, ChipCmd, CmdRxEnb);
                /* PCI commit */
-               RTL_R8(ChipCmd);
+               RTL_R8(tp, ChipCmd);
                break;
        default:
                break;
@@ -8235,7 +8021,7 @@ static const struct rtl_cfg_info {
        unsigned int region;
        unsigned int align;
        u16 event_slow;
-       unsigned features;
+       unsigned int has_gmii:1;
        const struct rtl_coalesce_info *coalesce_info;
        u8 default_ver;
 } rtl_cfg_infos [] = {
@@ -8244,7 +8030,7 @@ static const struct rtl_cfg_info {
                .region         = 1,
                .align          = 0,
                .event_slow     = SYSErr | LinkChg | RxOverflow | RxFIFOOver,
-               .features       = RTL_FEATURE_GMII,
+               .has_gmii       = 1,
                .coalesce_info  = rtl_coalesce_info_8169,
                .default_ver    = RTL_GIGA_MAC_VER_01,
        },
@@ -8253,7 +8039,7 @@ static const struct rtl_cfg_info {
                .region         = 2,
                .align          = 8,
                .event_slow     = SYSErr | LinkChg | RxOverflow,
-               .features       = RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+               .has_gmii       = 1,
                .coalesce_info  = rtl_coalesce_info_8168_8136,
                .default_ver    = RTL_GIGA_MAC_VER_11,
        },
@@ -8263,56 +8049,44 @@ static const struct rtl_cfg_info {
                .align          = 8,
                .event_slow     = SYSErr | LinkChg | RxOverflow | RxFIFOOver |
                                  PCSTimeout,
-               .features       = RTL_FEATURE_MSI,
                .coalesce_info  = rtl_coalesce_info_8168_8136,
                .default_ver    = RTL_GIGA_MAC_VER_13,
        }
 };
 
-/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
-                           const struct rtl_cfg_info *cfg)
+static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-       unsigned msi = 0;
-       u8 cfg2;
+       unsigned int flags;
 
-       cfg2 = RTL_R8(Config2) & ~MSIEnable;
-       if (cfg->features & RTL_FEATURE_MSI) {
-               if (pci_enable_msi(tp->pci_dev)) {
-                       netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
-               } else {
-                       cfg2 |= MSIEnable;
-                       msi = RTL_FEATURE_MSI;
-               }
+       if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
+               RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+               RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
+               RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+               flags = PCI_IRQ_LEGACY;
+       } else {
+               flags = PCI_IRQ_ALL_TYPES;
        }
-       if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
-               RTL_W8(Config2, cfg2);
-       return msi;
+
+       return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
 }
 
 DECLARE_RTL_COND(rtl_link_list_ready_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return RTL_R8(MCU) & LINK_LIST_RDY;
+       return RTL_R8(tp, MCU) & LINK_LIST_RDY;
 }
 
 DECLARE_RTL_COND(rtl_rxtx_empty_cond)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
-
-       return (RTL_R8(MCU) & RXTX_EMPTY) == RXTX_EMPTY;
+       return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
 }
 
 static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
-       void __iomem *ioaddr = tp->mmio_addr;
        u32 data;
 
        tp->ocp_base = OCP_STD_PHY_BASE;
 
-       RTL_W32(MISC, RTL_R32(MISC) | RXDV_GATED_EN);
+       RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 
        if (!rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
                return;
@@ -8320,9 +8094,9 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
        if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
                return;
 
-       RTL_W8(ChipCmd, RTL_R8(ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
+       RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
        msleep(1);
-       RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+       RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
        data = r8168_mac_ocp_read(tp, 0xe8de);
        data &= ~(1 << 14);
@@ -8376,7 +8150,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct rtl8169_private *tp;
        struct mii_if_info *mii;
        struct net_device *dev;
-       void __iomem *ioaddr;
        int chipset, i;
        int rc;
 
@@ -8402,7 +8175,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        mii->mdio_write = rtl_mdio_write;
        mii->phy_id_mask = 0x1f;
        mii->reg_num_mask = 0x1f;
-       mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+       mii->supports_gmii = cfg->has_gmii;
 
        /* disable ASPM completely as that cause random device stop working
         * problems as well as full system hangs for some PCIe devices users */
@@ -8434,20 +8207,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                return -ENODEV;
        }
 
-       rc = pci_request_regions(pdev, MODULENAME);
+       rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME);
        if (rc < 0) {
-               netif_err(tp, probe, dev, "could not request regions\n");
+               netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
                return rc;
        }
 
-       /* ioremap MMIO region */
-       ioaddr = devm_ioremap(&pdev->dev, pci_resource_start(pdev, region),
-                             R8169_REGS_SIZE);
-       if (!ioaddr) {
-               netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
-               return -EIO;
-       }
-       tp->mmio_addr = ioaddr;
+       tp->mmio_addr = pcim_iomap_table(pdev)[region];
 
        if (!pci_is_pcie(pdev))
                netif_info(tp, probe, dev, "not PCI Express\n");
@@ -8497,9 +8263,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        chipset = tp->mac_version;
        tp->txd_version = rtl_chip_infos[chipset].txd_version;
 
-       RTL_W8(Cfg9346, Cfg9346_Unlock);
-       tp->features |= rtl_try_msi(tp, cfg);
-       RTL_W8(Cfg9346, Cfg9346_Lock);
+       rc = rtl_alloc_irq(tp);
+       if (rc < 0) {
+               netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+               return rc;
+       }
 
        /* override BIOS settings, use userspace tools to enable WOL */
        __rtl8169_set_wol(tp, 0);
@@ -8550,7 +8318,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                        rtl_rar_set(tp, (u8 *)mac_addr);
        }
        for (i = 0; i < ETH_ALEN; i++)
-               dev->dev_addr[i] = RTL_R8(MAC0 + i);
+               dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
 
        dev->ethtool_ops = &rtl8169_ethtool_ops;
        dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
@@ -8617,8 +8385,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        pci_set_drvdata(pdev, dev);
 
        netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
-                  rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
-                  (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
+                  rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr,
+                  (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff),
+                  pci_irq_vector(pdev, 0));
        if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
                netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
                           "tx checksumming: %s]\n",
index 54a6265da7a06460ebe3912af1df96d58e7b5e1f..68f122140966d4de381b47fa192246eb7606707a 100644 (file)
@@ -346,7 +346,6 @@ static int ravb_ring_init(struct net_device *ndev, int q)
        int ring_size;
        int i;
 
-       /* +16 gets room from the status from the card. */
        priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
                ETH_HLEN + VLAN_HLEN;
 
index d7d5a6d15219d383f3b895671847a348682ef7fc..3557fe3f2bb543f2007cc7a30c22421aa2aff25a 100644 (file)
@@ -123,8 +123,8 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
        [TSU_FWSL0]     = 0x0030,
        [TSU_FWSL1]     = 0x0034,
        [TSU_FWSLC]     = 0x0038,
-       [TSU_QTAG0]     = 0x0040,
-       [TSU_QTAG1]     = 0x0044,
+       [TSU_QTAGM0]    = 0x0040,
+       [TSU_QTAGM1]    = 0x0044,
        [TSU_FWSR]      = 0x0050,
        [TSU_FWINMK]    = 0x0054,
        [TSU_ADQT0]     = 0x0048,
@@ -439,6 +439,17 @@ static void sh_eth_modify(struct net_device *ndev, int enum_index, u32 clear,
                     enum_index);
 }
 
+static void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data,
+                            int enum_index)
+{
+       iowrite32(data, mdp->tsu_addr + mdp->reg_offset[enum_index]);
+}
+
+static u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index)
+{
+       return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]);
+}
+
 static bool sh_eth_is_gether(struct sh_eth_private *mdp)
 {
        return mdp->reg_offset == sh_eth_offset_gigabit;
@@ -752,6 +763,7 @@ static struct sh_eth_cpu_data sh7757_data = {
        .rpadir         = 1,
        .rpadir_value   = 2 << 16,
        .rtrate         = 1,
+       .dual_port      = 1,
 };
 
 #define SH_GIGA_ETH_BASE       0xfee00000UL
@@ -830,6 +842,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
        .no_trimd       = 1,
        .no_ade         = 1,
        .tsu            = 1,
+       .dual_port      = 1,
 };
 
 /* SH7734 */
@@ -900,6 +913,7 @@ static struct sh_eth_cpu_data sh7763_data = {
        .tsu            = 1,
        .irq_flags      = IRQF_SHARED,
        .magic          = 1,
+       .dual_port      = 1,
 };
 
 static struct sh_eth_cpu_data sh7619_data = {
@@ -932,6 +946,7 @@ static struct sh_eth_cpu_data sh771x_data = {
                          EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
                          EESIPR_PREIP | EESIPR_CERFIP,
        .tsu            = 1,
+       .dual_port      = 1,
 };
 
 static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
@@ -2097,8 +2112,6 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
                add_tsu_reg(TSU_FWSL0);
                add_tsu_reg(TSU_FWSL1);
                add_tsu_reg(TSU_FWSLC);
-               add_tsu_reg(TSU_QTAG0);
-               add_tsu_reg(TSU_QTAG1);
                add_tsu_reg(TSU_QTAGM0);
                add_tsu_reg(TSU_QTAGM1);
                add_tsu_reg(TSU_FWSR);
@@ -2917,7 +2930,7 @@ static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev,
 /* SuperH's TSU register init function */
 static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 {
-       if (sh_eth_is_rz_fast_ether(mdp)) {
+       if (!mdp->cd->dual_port) {
                sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
                sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
                                 TSU_FWSLC);    /* Enable POST registers */
@@ -2934,13 +2947,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp)
        sh_eth_tsu_write(mdp, 0, TSU_FWSL0);
        sh_eth_tsu_write(mdp, 0, TSU_FWSL1);
        sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC);
-       if (sh_eth_is_gether(mdp)) {
-               sh_eth_tsu_write(mdp, 0, TSU_QTAG0);    /* Disable QTAG(0->1) */
-               sh_eth_tsu_write(mdp, 0, TSU_QTAG1);    /* Disable QTAG(1->0) */
-       } else {
-               sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);   /* Disable QTAG(0->1) */
-               sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);   /* Disable QTAG(1->0) */
-       }
+       sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);   /* Disable QTAG(0->1) */
+       sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);   /* Disable QTAG(1->0) */
        sh_eth_tsu_write(mdp, 0, TSU_FWSR);     /* all interrupt status clear */
        sh_eth_tsu_write(mdp, 0, TSU_FWINMK);   /* Disable all interrupt */
        sh_eth_tsu_write(mdp, 0, TSU_TEN);      /* Disable all CAM entry */
index a6753ccba711cd0dc331e132eaa7d0f20795f10c..21047d58a93f726e34d65eee0a54c4185a95ab90 100644 (file)
@@ -118,8 +118,8 @@ enum {
        TSU_FWSL0,
        TSU_FWSL1,
        TSU_FWSLC,
-       TSU_QTAG0,
-       TSU_QTAG1,
+       TSU_QTAG0,                      /* Same as TSU_QTAGM0 */
+       TSU_QTAG1,                      /* Same as TSU_QTAGM1 */
        TSU_QTAGM0,
        TSU_QTAGM1,
        TSU_FWSR,
@@ -509,6 +509,7 @@ struct sh_eth_cpu_data {
        unsigned rmiimode:1;    /* EtherC has RMIIMODE register */
        unsigned rtrate:1;      /* EtherC has RTRATE register */
        unsigned magic:1;       /* EtherC has ECMR.MPDE and ECSR.MPD */
+       unsigned dual_port:1;   /* Dual EtherC/E-DMAC */
 };
 
 struct sh_eth_private {
@@ -567,15 +568,4 @@ static inline void *sh_eth_tsu_get_offset(struct sh_eth_private *mdp,
        return mdp->tsu_addr + mdp->reg_offset[enum_index];
 }
 
-static inline void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data,
-                                   int enum_index)
-{
-       iowrite32(data, mdp->tsu_addr + mdp->reg_offset[enum_index]);
-}
-
-static inline u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index)
-{
-       return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]);
-}
-
 #endif /* #ifndef __SH_ETH_H__ */
index 75fbf58e421c327f52bbed81d60a53e8c456d118..e100273b623d36561fb99059f9b09b6b783411f4 100644 (file)
@@ -28,9 +28,6 @@ enum {
        EFX_EF10_TEST = 1,
        EFX_EF10_REFILL,
 };
-
-/* The reserved RSS context value */
-#define EFX_EF10_RSS_CONTEXT_INVALID   0xffffffff
 /* The maximum size of a shared RSS context */
 /* TODO: this should really be from the mcdi protocol export */
 #define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL
@@ -697,7 +694,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
        }
        nic_data->warm_boot_count = rc;
 
-       nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+       efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 
        nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
 
@@ -1489,8 +1486,8 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
        }
 
        /* don't fail init if RSS setup doesn't work */
-       rc = efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table, NULL);
-       efx->rss_active = (rc == 0);
+       rc = efx->type->rx_push_rss_config(efx, false,
+                                          efx->rss_context.rx_indir_table, NULL);
 
        return 0;
 }
@@ -1507,7 +1504,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
        nic_data->must_restore_filters = true;
        nic_data->must_restore_piobufs = true;
        efx_ef10_forget_old_piobufs(efx);
-       nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+       efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 
        /* Driver-created vswitches and vports must be re-created */
        nic_data->must_probe_vswitching = true;
@@ -2703,27 +2700,30 @@ static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags)
  * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
  * just need to set the UDP ports flags (for both IP versions).
  */
-static void efx_ef10_set_rss_flags(struct efx_nic *efx, u32 context)
+static void efx_ef10_set_rss_flags(struct efx_nic *efx,
+                                  struct efx_rss_context *ctx)
 {
        MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
        u32 flags;
 
        BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
 
-       if (efx_ef10_get_rss_flags(efx, context, &flags) != 0)
+       if (efx_ef10_get_rss_flags(efx, ctx->context_id, &flags) != 0)
                return;
-       MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, context);
+       MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
+                      ctx->context_id);
        flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
        flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
        if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
                          NULL, 0, NULL))
                /* Succeeded, so UDP 4-tuple is now enabled */
-               efx->rx_hash_udp_4tuple = true;
+               ctx->rx_hash_udp_4tuple = true;
 }
 
-static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
-                                     bool exclusive, unsigned *context_size)
+static int efx_ef10_alloc_rss_context(struct efx_nic *efx, bool exclusive,
+                                     struct efx_rss_context *ctx,
+                                     unsigned *context_size)
 {
        MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
        MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
@@ -2739,7 +2739,7 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
                                    EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);
 
        if (!exclusive && rss_spread == 1) {
-               *context = EFX_EF10_RSS_CONTEXT_INVALID;
+               ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
                if (context_size)
                        *context_size = 1;
                return 0;
@@ -2762,29 +2762,26 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
        if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
                return -EIO;
 
-       *context = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
+       ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
 
        if (context_size)
                *context_size = rss_spread;
 
        if (nic_data->datapath_caps &
            1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
-               efx_ef10_set_rss_flags(efx, *context);
+               efx_ef10_set_rss_flags(efx, ctx);
 
        return 0;
 }
 
-static void efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
+static int efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
 {
        MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);
-       int rc;
 
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
                       context);
-
-       rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
+       return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
                            NULL, 0, NULL);
-       WARN_ON(rc != 0);
 }
 
 static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
@@ -2796,15 +2793,15 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
        MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
                       context);
-       BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+       BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
                     MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);
 
-       /* This iterates over the length of efx->rx_indir_table, but copies
-        * bytes from rx_indir_table.  That's because the latter is a pointer
-        * rather than an array, but should have the same length.
-        * The efx->rx_hash_key loop below is similar.
+       /* This iterates over the length of efx->rss_context.rx_indir_table, but
+        * copies bytes from rx_indir_table.  That's because the latter is a
+        * pointer rather than an array, but should have the same length.
+        * The efx->rss_context.rx_hash_key loop below is similar.
         */
-       for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); ++i)
+       for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i)
                MCDI_PTR(tablebuf,
                         RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
                                (u8) rx_indir_table[i];
@@ -2816,9 +2813,9 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
        MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
                       context);
-       BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) !=
+       BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) !=
                     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
-       for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i)
+       for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i)
                MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i];
 
        return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
@@ -2827,27 +2824,27 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 static void efx_ef10_rx_free_indir_table(struct efx_nic *efx)
 {
-       struct efx_ef10_nic_data *nic_data = efx->nic_data;
+       int rc;
 
-       if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
-               efx_ef10_free_rss_context(efx, nic_data->rx_rss_context);
-       nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+       if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) {
+               rc = efx_ef10_free_rss_context(efx, efx->rss_context.context_id);
+               WARN_ON(rc != 0);
+       }
+       efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 }
 
 static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx,
                                              unsigned *context_size)
 {
-       u32 new_rx_rss_context;
        struct efx_ef10_nic_data *nic_data = efx->nic_data;
-       int rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context,
-                                           false, context_size);
+       int rc = efx_ef10_alloc_rss_context(efx, false, &efx->rss_context,
+                                           context_size);
 
        if (rc != 0)
                return rc;
 
-       nic_data->rx_rss_context = new_rx_rss_context;
        nic_data->rx_rss_context_exclusive = false;
-       efx_set_default_rx_indir_table(efx);
+       efx_set_default_rx_indir_table(efx, &efx->rss_context);
        return 0;
 }
 
@@ -2855,50 +2852,79 @@ static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx,
                                                 const u32 *rx_indir_table,
                                                 const u8 *key)
 {
+       u32 old_rx_rss_context = efx->rss_context.context_id;
        struct efx_ef10_nic_data *nic_data = efx->nic_data;
        int rc;
-       u32 new_rx_rss_context;
 
-       if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID ||
+       if (efx->rss_context.context_id == EFX_EF10_RSS_CONTEXT_INVALID ||
            !nic_data->rx_rss_context_exclusive) {
-               rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context,
-                                               true, NULL);
+               rc = efx_ef10_alloc_rss_context(efx, true, &efx->rss_context,
+                                               NULL);
                if (rc == -EOPNOTSUPP)
                        return rc;
                else if (rc != 0)
                        goto fail1;
-       } else {
-               new_rx_rss_context = nic_data->rx_rss_context;
        }
 
-       rc = efx_ef10_populate_rss_table(efx, new_rx_rss_context,
+       rc = efx_ef10_populate_rss_table(efx, efx->rss_context.context_id,
                                         rx_indir_table, key);
        if (rc != 0)
                goto fail2;
 
-       if (nic_data->rx_rss_context != new_rx_rss_context)
-               efx_ef10_rx_free_indir_table(efx);
-       nic_data->rx_rss_context = new_rx_rss_context;
+       if (efx->rss_context.context_id != old_rx_rss_context &&
+           old_rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
+               WARN_ON(efx_ef10_free_rss_context(efx, old_rx_rss_context) != 0);
        nic_data->rx_rss_context_exclusive = true;
-       if (rx_indir_table != efx->rx_indir_table)
-               memcpy(efx->rx_indir_table, rx_indir_table,
-                      sizeof(efx->rx_indir_table));
-       if (key != efx->rx_hash_key)
-               memcpy(efx->rx_hash_key, key, efx->type->rx_hash_key_size);
+       if (rx_indir_table != efx->rss_context.rx_indir_table)
+               memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+                      sizeof(efx->rss_context.rx_indir_table));
+       if (key != efx->rss_context.rx_hash_key)
+               memcpy(efx->rss_context.rx_hash_key, key,
+                      efx->type->rx_hash_key_size);
 
        return 0;
 
 fail2:
-       if (new_rx_rss_context != nic_data->rx_rss_context)
-               efx_ef10_free_rss_context(efx, new_rx_rss_context);
+       if (old_rx_rss_context != efx->rss_context.context_id) {
+               WARN_ON(efx_ef10_free_rss_context(efx, efx->rss_context.context_id) != 0);
+               efx->rss_context.context_id = old_rx_rss_context;
+       }
 fail1:
        netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
        return rc;
 }
 
-static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
+static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx,
+                                              struct efx_rss_context *ctx,
+                                              const u32 *rx_indir_table,
+                                              const u8 *key)
+{
+       int rc;
+
+       if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+               rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL);
+               if (rc)
+                       return rc;
+       }
+
+       if (!rx_indir_table) /* Delete this context */
+               return efx_ef10_free_rss_context(efx, ctx->context_id);
+
+       rc = efx_ef10_populate_rss_table(efx, ctx->context_id,
+                                        rx_indir_table, key);
+       if (rc)
+               return rc;
+
+       memcpy(ctx->rx_indir_table, rx_indir_table,
+              sizeof(efx->rss_context.rx_indir_table));
+       memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size);
+
+       return 0;
+}
+
+static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx,
+                                              struct efx_rss_context *ctx)
 {
-       struct efx_ef10_nic_data *nic_data = efx->nic_data;
        MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN);
        MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN);
        MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN);
@@ -2908,12 +2934,12 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
        BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
                     MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
 
-       if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID)
+       if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
                return -ENOENT;
 
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
-                      nic_data->rx_rss_context);
-       BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+                      ctx->context_id);
+       BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
                     MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
        rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
                          tablebuf, sizeof(tablebuf), &outlen);
@@ -2923,13 +2949,13 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
        if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN))
                return -EIO;
 
-       for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-               efx->rx_indir_table[i] = MCDI_PTR(tablebuf,
+       for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+               ctx->rx_indir_table[i] = MCDI_PTR(tablebuf,
                                RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];
 
        MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
-                      nic_data->rx_rss_context);
-       BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) !=
+                      ctx->context_id);
+       BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
                     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
        rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
                          keybuf, sizeof(keybuf), &outlen);
@@ -2939,13 +2965,38 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
        if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN))
                return -EIO;
 
-       for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i)
-               efx->rx_hash_key[i] = MCDI_PTR(
+       for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i)
+               ctx->rx_hash_key[i] = MCDI_PTR(
                                keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i];
 
        return 0;
 }
 
+static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
+{
+       return efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context);
+}
+
+static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx)
+{
+       struct efx_rss_context *ctx;
+       int rc;
+
+       list_for_each_entry(ctx, &efx->rss_context.list, list) {
+               /* previous NIC RSS context is gone */
+               ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+               /* so try to allocate a new one */
+               rc = efx_ef10_rx_push_rss_context_config(efx, ctx,
+                                                        ctx->rx_indir_table,
+                                                        ctx->rx_hash_key);
+               if (rc)
+                       netif_warn(efx, probe, efx->net_dev,
+                                  "failed to restore RSS context %u, rc=%d"
+                                  "; RSS filters may fail to be applied\n",
+                                  ctx->user_id, rc);
+       }
+}
+
 static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
                                          const u32 *rx_indir_table,
                                          const u8 *key)
@@ -2956,7 +3007,7 @@ static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
                return 0;
 
        if (!key)
-               key = efx->rx_hash_key;
+               key = efx->rss_context.rx_hash_key;
 
        rc = efx_ef10_rx_push_exclusive_rss_config(efx, rx_indir_table, key);
 
@@ -2965,7 +3016,8 @@ static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
                bool mismatch = false;
                size_t i;
 
-               for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table) && !mismatch;
+               for (i = 0;
+                    i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch;
                     i++)
                        mismatch = rx_indir_table[i] !=
                                ethtool_rxfh_indir_default(i, efx->rss_spread);
@@ -3000,11 +3052,9 @@ static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
                                          const u8 *key
                                          __attribute__ ((unused)))
 {
-       struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
        if (user)
                return -EOPNOTSUPP;
-       if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
+       if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID)
                return 0;
        return efx_ef10_rx_push_shared_rss_config(efx, NULL);
 }
@@ -4109,6 +4159,7 @@ efx_ef10_filter_push_prep_set_match_fields(struct efx_nic *efx,
 static void efx_ef10_filter_push_prep(struct efx_nic *efx,
                                      const struct efx_filter_spec *spec,
                                      efx_dword_t *inbuf, u64 handle,
+                                     struct efx_rss_context *ctx,
                                      bool replacing)
 {
        struct efx_ef10_nic_data *nic_data = efx->nic_data;
@@ -4116,11 +4167,16 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 
        memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);
 
-       /* Remove RSS flag if we don't have an RSS context. */
-       if (flags & EFX_FILTER_FLAG_RX_RSS &&
-           spec->rss_context == EFX_FILTER_RSS_CONTEXT_DEFAULT &&
-           nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID)
-               flags &= ~EFX_FILTER_FLAG_RX_RSS;
+       /* If RSS filter, caller better have given us an RSS context */
+       if (flags & EFX_FILTER_FLAG_RX_RSS) {
+               /* We don't have the ability to return an error, so we'll just
+                * log a warning and disable RSS for the filter.
+                */
+               if (WARN_ON_ONCE(!ctx))
+                       flags &= ~EFX_FILTER_FLAG_RX_RSS;
+               else if (WARN_ON_ONCE(ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID))
+                       flags &= ~EFX_FILTER_FLAG_RX_RSS;
+       }
 
        if (replacing) {
                MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
@@ -4146,21 +4202,18 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx,
                       MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
                       MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
        if (flags & EFX_FILTER_FLAG_RX_RSS)
-               MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT,
-                              spec->rss_context !=
-                              EFX_FILTER_RSS_CONTEXT_DEFAULT ?
-                              spec->rss_context : nic_data->rx_rss_context);
+               MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id);
 }
 
 static int efx_ef10_filter_push(struct efx_nic *efx,
-                               const struct efx_filter_spec *spec,
-                               u64 *handle, bool replacing)
+                               const struct efx_filter_spec *spec, u64 *handle,
+                               struct efx_rss_context *ctx, bool replacing)
 {
        MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
        MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
        int rc;
 
-       efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, replacing);
+       efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
        rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
                          outbuf, sizeof(outbuf), NULL);
        if (rc == 0)
@@ -4252,6 +4305,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
        struct efx_ef10_filter_table *table = efx->filter_state;
        DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
        struct efx_filter_spec *saved_spec;
+       struct efx_rss_context *ctx = NULL;
        unsigned int match_pri, hash;
        unsigned int priv_flags;
        bool replacing = false;
@@ -4275,6 +4329,18 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
        if (is_mc_recip)
                bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
 
+       if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+               if (spec->rss_context)
+                       ctx = efx_find_rss_context_entry(spec->rss_context,
+                                                        &efx->rss_context.list);
+               else
+                       ctx = &efx->rss_context;
+               if (!ctx)
+                       return -ENOENT;
+               if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
+                       return -EOPNOTSUPP;
+       }
+
        /* Find any existing filters with the same match tuple or
         * else a free slot to insert at.  If any of them are busy,
         * we have to wait and retry.
@@ -4390,7 +4456,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
        spin_unlock_bh(&efx->filter_lock);
 
        rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle,
-                                 replacing);
+                                 ctx, replacing);
 
        /* Finalise the software table entry */
        spin_lock_bh(&efx->filter_lock);
@@ -4534,12 +4600,13 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 
                new_spec.priority = EFX_FILTER_PRI_AUTO;
                new_spec.flags = (EFX_FILTER_FLAG_RX |
-                                 (efx_rss_enabled(efx) ?
+                                 (efx_rss_active(&efx->rss_context) ?
                                   EFX_FILTER_FLAG_RX_RSS : 0));
                new_spec.dmaq_id = 0;
-               new_spec.rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
+               new_spec.rss_context = 0;
                rc = efx_ef10_filter_push(efx, &new_spec,
                                          &table->entry[filter_idx].handle,
+                                         &efx->rss_context,
                                          true);
 
                spin_lock_bh(&efx->filter_lock);
@@ -4783,7 +4850,8 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
        cookie = replacing << 31 | ins_index << 16 | spec->dmaq_id;
 
        efx_ef10_filter_push_prep(efx, spec, inbuf,
-                                 table->entry[ins_index].handle, replacing);
+                                 table->entry[ins_index].handle, NULL,
+                                 replacing);
        efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
                           MC_CMD_FILTER_OP_OUT_LEN,
                           efx_ef10_filter_rfs_insert_complete, cookie);
@@ -5104,6 +5172,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
        unsigned int invalid_filters = 0, failed = 0;
        struct efx_ef10_filter_vlan *vlan;
        struct efx_filter_spec *spec;
+       struct efx_rss_context *ctx;
        unsigned int filter_idx;
        u32 mcdi_flags;
        int match_pri;
@@ -5133,17 +5202,34 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
                        invalid_filters++;
                        goto not_restored;
                }
-               if (spec->rss_context != EFX_FILTER_RSS_CONTEXT_DEFAULT &&
-                   spec->rss_context != nic_data->rx_rss_context)
-                       netif_warn(efx, drv, efx->net_dev,
-                                  "Warning: unable to restore a filter with specific RSS context.\n");
+               if (spec->rss_context)
+                       ctx = efx_find_rss_context_entry(spec->rss_context,
+                                                        &efx->rss_context.list);
+               else
+                       ctx = &efx->rss_context;
+               if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+                       if (!ctx) {
+                               netif_warn(efx, drv, efx->net_dev,
+                                          "Warning: unable to restore a filter with nonexistent RSS context %u.\n",
+                                          spec->rss_context);
+                               invalid_filters++;
+                               goto not_restored;
+                       }
+                       if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+                               netif_warn(efx, drv, efx->net_dev,
+                                          "Warning: unable to restore a filter with RSS context %u as it was not created.\n",
+                                          spec->rss_context);
+                               invalid_filters++;
+                               goto not_restored;
+                       }
+               }
 
                table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
                spin_unlock_bh(&efx->filter_lock);
 
                rc = efx_ef10_filter_push(efx, spec,
                                          &table->entry[filter_idx].handle,
-                                         false);
+                                         ctx, false);
                if (rc)
                        failed++;
                spin_lock_bh(&efx->filter_lock);
@@ -6784,6 +6870,9 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
        .tx_limit_len = efx_ef10_tx_limit_len,
        .rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
        .rx_pull_rss_config = efx_ef10_rx_pull_rss_config,
+       .rx_push_rss_context_config = efx_ef10_rx_push_rss_context_config,
+       .rx_pull_rss_context_config = efx_ef10_rx_pull_rss_context_config,
+       .rx_restore_rss_contexts = efx_ef10_rx_restore_rss_contexts,
        .rx_probe = efx_ef10_rx_probe,
        .rx_init = efx_ef10_rx_init,
        .rx_remove = efx_ef10_rx_remove,
index 16757cfc5b29258c8331d1f6238b064fabedc3a1..7321a4cf6f4dc9819132ef92441e211a558ce757 100644 (file)
@@ -1353,12 +1353,13 @@ static void efx_fini_io(struct efx_nic *efx)
                pci_disable_device(efx->pci_dev);
 }
 
-void efx_set_default_rx_indir_table(struct efx_nic *efx)
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+                                   struct efx_rss_context *ctx)
 {
        size_t i;
 
-       for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-               efx->rx_indir_table[i] =
+       for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+               ctx->rx_indir_table[i] =
                        ethtool_rxfh_indir_default(i, efx->rss_spread);
 }
 
@@ -1739,9 +1740,9 @@ static int efx_probe_nic(struct efx_nic *efx)
        } while (rc == -EAGAIN);
 
        if (efx->n_channels > 1)
-               netdev_rss_key_fill(&efx->rx_hash_key,
-                                   sizeof(efx->rx_hash_key));
-       efx_set_default_rx_indir_table(efx);
+               netdev_rss_key_fill(efx->rss_context.rx_hash_key,
+                                   sizeof(efx->rss_context.rx_hash_key));
+       efx_set_default_rx_indir_table(efx, &efx->rss_context);
 
        netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
        netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
@@ -2700,6 +2701,8 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
                           " VFs may not function\n", rc);
 #endif
 
+       if (efx->type->rx_restore_rss_contexts)
+               efx->type->rx_restore_rss_contexts(efx);
        down_read(&efx->filter_sem);
        efx_restore_filters(efx);
        up_read(&efx->filter_sem);
@@ -3003,6 +3006,7 @@ static int efx_init_struct(struct efx_nic *efx,
                efx->type->rx_hash_offset - efx->type->rx_prefix_size;
        efx->rx_packet_ts_offset =
                efx->type->rx_ts_offset - efx->type->rx_prefix_size;
+       INIT_LIST_HEAD(&efx->rss_context.list);
        spin_lock_init(&efx->stats_lock);
        efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
        efx->num_mac_stats = MC_CMD_MAC_NSTATS;
@@ -3072,6 +3076,55 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
        stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
 }
 
+/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
+ * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
+ */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *head)
+{
+       struct efx_rss_context *ctx, *new;
+       u32 id = 1; /* Don't use zero, that refers to the master RSS context */
+
+       /* Search for first gap in the numbering */
+       list_for_each_entry(ctx, head, list) {
+               if (ctx->user_id != id)
+                       break;
+               id++;
+               /* Check for wrap.  If this happens, we have nearly 2^32
+                * allocated RSS contexts, which seems unlikely.
+                */
+               if (WARN_ON_ONCE(!id))
+                       return NULL;
+       }
+
+       /* Create the new entry */
+       new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL);
+       if (!new)
+               return NULL;
+       new->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+       new->rx_hash_udp_4tuple = false;
+
+       /* Insert the new entry into the gap */
+       new->user_id = id;
+       list_add_tail(&new->list, &ctx->list);
+       return new;
+}
+
+struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *head)
+{
+       struct efx_rss_context *ctx;
+
+       list_for_each_entry(ctx, head, list)
+               if (ctx->user_id == id)
+                       return ctx;
+       return NULL;
+}
+
+void efx_free_rss_context_entry(struct efx_rss_context *ctx)
+{
+       list_del(&ctx->list);
+       kfree(ctx);
+}
+
 /**************************************************************************
  *
  * PCI interface
index 0cddc5ad77b169dc6000f7d7a301a276e5ac5555..3429ae3f3b083197ce4088596650addca15a7c2c 100644 (file)
@@ -34,7 +34,8 @@ extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
 
 /* RX */
-void efx_set_default_rx_indir_table(struct efx_nic *efx);
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+                                   struct efx_rss_context *ctx);
 void efx_rx_config_page_split(struct efx_nic *efx);
 int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
 void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
@@ -182,6 +183,15 @@ static inline void efx_filter_rfs_expire(struct efx_channel *channel) {}
 #endif
 bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
 
+/* RSS contexts */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *list);
+struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *list);
+void efx_free_rss_context_entry(struct efx_rss_context *ctx);
+static inline bool efx_rss_active(struct efx_rss_context *ctx)
+{
+       return ctx->context_id != EFX_EF10_RSS_CONTEXT_INVALID;
+}
+
 /* Channels */
 int efx_channel_dummy_op_int(struct efx_channel *channel);
 void efx_channel_dummy_op_void(struct efx_channel *channel);
index 4db2dc2bf52f6fd77d47153320f8cb0a403deed6..bb1c80d48d122c0b0263ab958703732ac73de96f 100644 (file)
@@ -808,7 +808,8 @@ static inline void ip6_fill_mask(__be32 *mask)
 }
 
 static int efx_ethtool_get_class_rule(struct efx_nic *efx,
-                                     struct ethtool_rx_flow_spec *rule)
+                                     struct ethtool_rx_flow_spec *rule,
+                                     u32 *rss_context)
 {
        struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
        struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
@@ -964,6 +965,11 @@ static int efx_ethtool_get_class_rule(struct efx_nic *efx,
                rule->m_ext.vlan_tci = htons(0xfff);
        }
 
+       if (spec.flags & EFX_FILTER_FLAG_RX_RSS) {
+               rule->flow_type |= FLOW_RSS;
+               *rss_context = spec.rss_context;
+       }
+
        return rc;
 }
 
@@ -972,6 +978,8 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
                      struct ethtool_rxnfc *info, u32 *rule_locs)
 {
        struct efx_nic *efx = netdev_priv(net_dev);
+       u32 rss_context = 0;
+       s32 rc;
 
        switch (info->cmd) {
        case ETHTOOL_GRXRINGS:
@@ -979,12 +987,20 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
                return 0;
 
        case ETHTOOL_GRXFH: {
+               struct efx_rss_context *ctx = &efx->rss_context;
+
+               if (info->flow_type & FLOW_RSS && info->rss_context) {
+                       ctx = efx_find_rss_context_entry(info->rss_context,
+                                                        &efx->rss_context.list);
+                       if (!ctx)
+                               return -ENOENT;
+               }
                info->data = 0;
-               if (!efx->rss_active) /* No RSS */
+               if (!efx_rss_active(ctx)) /* No RSS */
                        return 0;
-               switch (info->flow_type) {
+               switch (info->flow_type & ~FLOW_RSS) {
                case UDP_V4_FLOW:
-                       if (efx->rx_hash_udp_4tuple)
+                       if (ctx->rx_hash_udp_4tuple)
                                /* fall through */
                case TCP_V4_FLOW:
                                info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
@@ -995,7 +1011,7 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
                        info->data |= RXH_IP_SRC | RXH_IP_DST;
                        break;
                case UDP_V6_FLOW:
-                       if (efx->rx_hash_udp_4tuple)
+                       if (ctx->rx_hash_udp_4tuple)
                                /* fall through */
                case TCP_V6_FLOW:
                                info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
@@ -1023,10 +1039,14 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
        case ETHTOOL_GRXCLSRULE:
                if (efx_filter_get_rx_id_limit(efx) == 0)
                        return -EOPNOTSUPP;
-               return efx_ethtool_get_class_rule(efx, &info->fs);
+               rc = efx_ethtool_get_class_rule(efx, &info->fs, &rss_context);
+               if (rc < 0)
+                       return rc;
+               if (info->fs.flow_type & FLOW_RSS)
+                       info->rss_context = rss_context;
+               return 0;
 
-       case ETHTOOL_GRXCLSRLALL: {
-               s32 rc;
+       case ETHTOOL_GRXCLSRLALL:
                info->data = efx_filter_get_rx_id_limit(efx);
                if (info->data == 0)
                        return -EOPNOTSUPP;
@@ -1036,7 +1056,6 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
                        return rc;
                info->rule_cnt = rc;
                return 0;
-       }
 
        default:
                return -EOPNOTSUPP;
@@ -1054,7 +1073,8 @@ static inline bool ip6_mask_is_empty(__be32 mask[4])
 }
 
 static int efx_ethtool_set_class_rule(struct efx_nic *efx,
-                                     struct ethtool_rx_flow_spec *rule)
+                                     struct ethtool_rx_flow_spec *rule,
+                                     u32 rss_context)
 {
        struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
        struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
@@ -1066,6 +1086,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
        struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
        struct ethhdr *mac_entry = &rule->h_u.ether_spec;
        struct ethhdr *mac_mask = &rule->m_u.ether_spec;
+       enum efx_filter_flags flags = 0;
        struct efx_filter_spec spec;
        int rc;
 
@@ -1084,12 +1105,19 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
             rule->m_ext.data[1]))
                return -EINVAL;
 
-       efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL,
-                          efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
+       if (efx->rx_scatter)
+               flags |= EFX_FILTER_FLAG_RX_SCATTER;
+       if (rule->flow_type & FLOW_RSS)
+               flags |= EFX_FILTER_FLAG_RX_RSS;
+
+       efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, flags,
                           (rule->ring_cookie == RX_CLS_FLOW_DISC) ?
                           EFX_FILTER_RX_DMAQ_ID_DROP : rule->ring_cookie);
 
-       switch (rule->flow_type & ~FLOW_EXT) {
+       if (rule->flow_type & FLOW_RSS)
+               spec.rss_context = rss_context;
+
+       switch (rule->flow_type & ~(FLOW_EXT | FLOW_RSS)) {
        case TCP_V4_FLOW:
        case UDP_V4_FLOW:
                spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE |
@@ -1265,7 +1293,8 @@ static int efx_ethtool_set_rxnfc(struct net_device *net_dev,
 
        switch (info->cmd) {
        case ETHTOOL_SRXCLSRLINS:
-               return efx_ethtool_set_class_rule(efx, &info->fs);
+               return efx_ethtool_set_class_rule(efx, &info->fs,
+                                                 info->rss_context);
 
        case ETHTOOL_SRXCLSRLDEL:
                return efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_MANUAL,
@@ -1280,7 +1309,9 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 {
        struct efx_nic *efx = netdev_priv(net_dev);
 
-       return (efx->n_rx_channels == 1) ? 0 : ARRAY_SIZE(efx->rx_indir_table);
+       if (efx->n_rx_channels == 1)
+               return 0;
+       return ARRAY_SIZE(efx->rss_context.rx_indir_table);
 }
 
 static u32 efx_ethtool_get_rxfh_key_size(struct net_device *net_dev)
@@ -1303,9 +1334,11 @@ static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
        if (hfunc)
                *hfunc = ETH_RSS_HASH_TOP;
        if (indir)
-               memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
+               memcpy(indir, efx->rss_context.rx_indir_table,
+                      sizeof(efx->rss_context.rx_indir_table));
        if (key)
-               memcpy(key, efx->rx_hash_key, efx->type->rx_hash_key_size);
+               memcpy(key, efx->rss_context.rx_hash_key,
+                      efx->type->rx_hash_key_size);
        return 0;
 }
 
@@ -1321,13 +1354,93 @@ static int efx_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir,
                return 0;
 
        if (!key)
-               key = efx->rx_hash_key;
+               key = efx->rss_context.rx_hash_key;
        if (!indir)
-               indir = efx->rx_indir_table;
+               indir = efx->rss_context.rx_indir_table;
 
        return efx->type->rx_push_rss_config(efx, true, indir, key);
 }
 
+static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
+                                       u8 *key, u8 *hfunc, u32 rss_context)
+{
+       struct efx_nic *efx = netdev_priv(net_dev);
+       struct efx_rss_context *ctx;
+       int rc;
+
+       if (!efx->type->rx_pull_rss_context_config)
+               return -EOPNOTSUPP;
+       ctx = efx_find_rss_context_entry(rss_context, &efx->rss_context.list);
+       if (!ctx)
+               return -ENOENT;
+       rc = efx->type->rx_pull_rss_context_config(efx, ctx);
+       if (rc)
+               return rc;
+
+       if (hfunc)
+               *hfunc = ETH_RSS_HASH_TOP;
+       if (indir)
+               memcpy(indir, ctx->rx_indir_table, sizeof(ctx->rx_indir_table));
+       if (key)
+               memcpy(key, ctx->rx_hash_key, efx->type->rx_hash_key_size);
+       return 0;
+}
+
+static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
+                                       const u32 *indir, const u8 *key,
+                                       const u8 hfunc, u32 *rss_context,
+                                       bool delete)
+{
+       struct efx_nic *efx = netdev_priv(net_dev);
+       struct efx_rss_context *ctx;
+       bool allocated = false;
+       int rc;
+
+       if (!efx->type->rx_push_rss_context_config)
+               return -EOPNOTSUPP;
+       /* Hash function is Toeplitz, cannot be changed */
+       if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+               return -EOPNOTSUPP;
+       if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+               if (delete)
+                       /* alloc + delete == Nothing to do */
+                       return -EINVAL;
+               ctx = efx_alloc_rss_context_entry(&efx->rss_context.list);
+               if (!ctx)
+                       return -ENOMEM;
+               ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+               /* Initialise indir table and key to defaults */
+               efx_set_default_rx_indir_table(efx, ctx);
+               netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
+               allocated = true;
+       } else {
+               ctx = efx_find_rss_context_entry(*rss_context,
+                                                &efx->rss_context.list);
+               if (!ctx)
+                       return -ENOENT;
+       }
+
+       if (delete) {
+               /* delete this context */
+               rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL);
+               if (!rc)
+                       efx_free_rss_context_entry(ctx);
+               return rc;
+       }
+
+       if (!key)
+               key = ctx->rx_hash_key;
+       if (!indir)
+               indir = ctx->rx_indir_table;
+
+       rc = efx->type->rx_push_rss_context_config(efx, ctx, indir, key);
+       if (rc && allocated)
+               efx_free_rss_context_entry(ctx);
+       else
+               *rss_context = ctx->user_id;
+       return rc;
+}
+
 static int efx_ethtool_get_ts_info(struct net_device *net_dev,
                                   struct ethtool_ts_info *ts_info)
 {
@@ -1375,6 +1488,36 @@ static int efx_ethtool_get_module_info(struct net_device *net_dev,
        return ret;
 }
 
+static int efx_ethtool_get_fecparam(struct net_device *net_dev,
+                                   struct ethtool_fecparam *fecparam)
+{
+       struct efx_nic *efx = netdev_priv(net_dev);
+       int rc;
+
+       if (!efx->phy_op || !efx->phy_op->get_fecparam)
+               return -EOPNOTSUPP;
+       mutex_lock(&efx->mac_lock);
+       rc = efx->phy_op->get_fecparam(efx, fecparam);
+       mutex_unlock(&efx->mac_lock);
+
+       return rc;
+}
+
+static int efx_ethtool_set_fecparam(struct net_device *net_dev,
+                                   struct ethtool_fecparam *fecparam)
+{
+       struct efx_nic *efx = netdev_priv(net_dev);
+       int rc;
+
+       if (!efx->phy_op || !efx->phy_op->get_fecparam)
+               return -EOPNOTSUPP;
+       mutex_lock(&efx->mac_lock);
+       rc = efx->phy_op->set_fecparam(efx, fecparam);
+       mutex_unlock(&efx->mac_lock);
+
+       return rc;
+}
+
 const struct ethtool_ops efx_ethtool_ops = {
        .get_drvinfo            = efx_ethtool_get_drvinfo,
        .get_regs_len           = efx_ethtool_get_regs_len,
@@ -1403,9 +1546,13 @@ const struct ethtool_ops efx_ethtool_ops = {
        .get_rxfh_key_size      = efx_ethtool_get_rxfh_key_size,
        .get_rxfh               = efx_ethtool_get_rxfh,
        .set_rxfh               = efx_ethtool_set_rxfh,
+       .get_rxfh_context       = efx_ethtool_get_rxfh_context,
+       .set_rxfh_context       = efx_ethtool_set_rxfh_context,
        .get_ts_info            = efx_ethtool_get_ts_info,
        .get_module_info        = efx_ethtool_get_module_info,
        .get_module_eeprom      = efx_ethtool_get_module_eeprom,
        .get_link_ksettings     = efx_ethtool_get_link_ksettings,
        .set_link_ksettings     = efx_ethtool_set_link_ksettings,
+       .get_fecparam           = efx_ethtool_get_fecparam,
+       .set_fecparam           = efx_ethtool_set_fecparam,
 };
index 30a1136fc9092934541fa91d8befca61afff4446..4824fcf5c3d41be58ec3e57fa6f661e37d1841d3 100644 (file)
@@ -81,7 +81,6 @@ enum ef4_loopback_mode {
                            (1 << LOOPBACK_XAUI) |              \
                            (1 << LOOPBACK_GMII) |              \
                            (1 << LOOPBACK_SGMII) |             \
-                           (1 << LOOPBACK_SGMII) |             \
                            (1 << LOOPBACK_XGBR) |              \
                            (1 << LOOPBACK_XFI) |               \
                            (1 << LOOPBACK_XAUI_FAR) |          \
index 266b9bee1f3a75264a4f03d833864fab0ca472fa..ad001e77d554cae308c2c9c2d6929cf90e1ed00e 100644 (file)
@@ -1630,12 +1630,12 @@ void efx_farch_rx_push_indir_table(struct efx_nic *efx)
        size_t i = 0;
        efx_dword_t dword;
 
-       BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+       BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
                     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
        for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
                EFX_POPULATE_DWORD_1(dword, FRF_BZ_IT_QUEUE,
-                                    efx->rx_indir_table[i]);
+                                    efx->rss_context.rx_indir_table[i]);
                efx_writed(efx, &dword,
                           FR_BZ_RX_INDIRECTION_TBL +
                           FR_BZ_RX_INDIRECTION_TBL_STEP * i);
@@ -1647,14 +1647,14 @@ void efx_farch_rx_pull_indir_table(struct efx_nic *efx)
        size_t i = 0;
        efx_dword_t dword;
 
-       BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+       BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
                     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
        for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
                efx_readd(efx, &dword,
                           FR_BZ_RX_INDIRECTION_TBL +
                           FR_BZ_RX_INDIRECTION_TBL_STEP * i);
-               efx->rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
+               efx->rss_context.rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
        }
 }
 
@@ -2032,8 +2032,7 @@ efx_farch_filter_from_gen_spec(struct efx_farch_filter_spec *spec,
 {
        bool is_full = false;
 
-       if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) &&
-           gen_spec->rss_context != EFX_FILTER_RSS_CONTEXT_DEFAULT)
+       if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) && gen_spec->rss_context)
                return -EINVAL;
 
        spec->priority = gen_spec->priority;
index 8189a1cd973fd5ee44d5b70a1ce798301558aa7b..59021ad6d98d2f333332631b5838f411c5020dbc 100644 (file)
@@ -125,7 +125,9 @@ enum efx_encap_type {
  * @match_flags: Match type flags, from &enum efx_filter_match_flags
  * @priority: Priority of the filter, from &enum efx_filter_priority
  * @flags: Miscellaneous flags, from &enum efx_filter_flags
- * @rss_context: RSS context to use, if %EFX_FILTER_FLAG_RX_RSS is set
+ * @rss_context: RSS context to use, if %EFX_FILTER_FLAG_RX_RSS is set.  This
+ *     is a user_id (with 0 meaning the driver/default RSS context), not an
+ *     MCFW context_id.
  * @dmaq_id: Source/target queue index, or %EFX_FILTER_RX_DMAQ_ID_DROP for
  *     an RX drop filter
  * @outer_vid: Outer VLAN ID to match, if %EFX_FILTER_MATCH_OUTER_VID is set
@@ -173,7 +175,6 @@ struct efx_filter_spec {
 };
 
 enum {
-       EFX_FILTER_RSS_CONTEXT_DEFAULT = 0xffffffff,
        EFX_FILTER_RX_DMAQ_ID_DROP = 0xfff
 };
 
@@ -185,7 +186,7 @@ static inline void efx_filter_init_rx(struct efx_filter_spec *spec,
        memset(spec, 0, sizeof(*spec));
        spec->priority = priority;
        spec->flags = EFX_FILTER_FLAG_RX | flags;
-       spec->rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
+       spec->rss_context = 0;
        spec->dmaq_id = rxq_id;
 }
 
index 869d76f8f589b7f6a933c32d61c808d060cf6366..3839eec783eaa94b0241dcb93d3f8b6f8acdbd5c 100644 (file)
 #define MC_CMD_ERR_NO_PRIVILEGE 0x1013
 /* Workaround 26807 could not be turned on/off because some functions
  * have already installed filters. See the comment at
- * MC_CMD_WORKAROUND_BUG26807. */
+ * MC_CMD_WORKAROUND_BUG26807.
+ * May also returned for other operations such as sub-variant switching. */
 #define MC_CMD_ERR_FILTERS_PRESENT 0x1014
 /* The clock whose frequency you've attempted to set set
  * doesn't exist on this NIC */
  * away.  This is distinct from MC_CMD_ERR_DATAPATH_DISABLED in that the
  * datapath absence may be temporary*/
 #define MC_CMD_ERR_NO_DATAPATH 0x1019
+/* The operation could not complete because some VIs are allocated */
+#define MC_CMD_ERR_VIS_PRESENT 0x101a
+/* The operation could not complete because some PIO buffers are allocated */
+#define MC_CMD_ERR_PIOBUFS_PRESENT 0x101b
 
 #define MC_CMD_ERR_CODE_OFST 0
 
 #define SIENA_MC_BOOTROM_COPYCODE_VEC (0x800 - 3 * 0x4)
 #define HUNT_MC_BOOTROM_COPYCODE_VEC (0x8000 - 3 * 0x4)
 #define MEDFORD_MC_BOOTROM_COPYCODE_VEC (0x10000 - 3 * 0x4)
-/* Points to the recovery mode entry point. */
+/* Points to the recovery mode entry point. Misnamed but kept for compatibility. */
 #define SIENA_MC_BOOTROM_NOFLASH_VEC (0x800 - 2 * 0x4)
 #define HUNT_MC_BOOTROM_NOFLASH_VEC (0x8000 - 2 * 0x4)
 #define MEDFORD_MC_BOOTROM_NOFLASH_VEC (0x10000 - 2 * 0x4)
+/* Points to the recovery mode entry point. Same as above, but the right name. */
+#define SIENA_MC_BOOTROM_RECOVERY_VEC (0x800 - 2 * 0x4)
+#define HUNT_MC_BOOTROM_RECOVERY_VEC (0x8000 - 2 * 0x4)
+#define MEDFORD_MC_BOOTROM_RECOVERY_VEC (0x10000 - 2 * 0x4)
+
+/* Points to noflash mode entry point. */
+#define MEDFORD_MC_BOOTROM_REAL_NOFLASH_VEC (0x10000 - 4 * 0x4)
 
 /* The command set exported by the boot ROM (MCDI v0) */
 #define MC_CMD_GET_VERSION_V0_SUPPORTED_FUNCS {                \
 #define       MCDI_EVENT_LEVEL_LBN 33
 #define       MCDI_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          MCDI_EVENT_LEVEL_INFO  0x0
+#define          MCDI_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          MCDI_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
 #define        MCDI_EVENT_LINKCHANGE_SPEED_LBN 16
 #define        MCDI_EVENT_LINKCHANGE_SPEED_WIDTH 4
 /* enum: Link is down or link speed could not be determined */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_UNKNOWN  0x0
+#define          MCDI_EVENT_LINKCHANGE_SPEED_UNKNOWN 0x0
 /* enum: 100Mbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_100M  0x1
+#define          MCDI_EVENT_LINKCHANGE_SPEED_100M 0x1
 /* enum: 1Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_1G  0x2
+#define          MCDI_EVENT_LINKCHANGE_SPEED_1G 0x2
 /* enum: 10Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_10G  0x3
+#define          MCDI_EVENT_LINKCHANGE_SPEED_10G 0x3
 /* enum: 40Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_40G  0x4
+#define          MCDI_EVENT_LINKCHANGE_SPEED_40G 0x4
 /* enum: 25Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_25G  0x5
+#define          MCDI_EVENT_LINKCHANGE_SPEED_25G 0x5
 /* enum: 50Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_50G  0x6
+#define          MCDI_EVENT_LINKCHANGE_SPEED_50G 0x6
 /* enum: 100Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_100G  0x7
+#define          MCDI_EVENT_LINKCHANGE_SPEED_100G 0x7
 #define        MCDI_EVENT_LINKCHANGE_FCNTL_LBN 20
 #define        MCDI_EVENT_LINKCHANGE_FCNTL_WIDTH 4
 #define        MCDI_EVENT_LINKCHANGE_LINK_FLAGS_LBN 24
 /* enum: Transmit error */
 #define          MCDI_EVENT_CODE_TX_ERR 0xb
 /* enum: Tx flush has completed */
-#define          MCDI_EVENT_CODE_TX_FLUSH  0xc
+#define          MCDI_EVENT_CODE_TX_FLUSH 0xc
 /* enum: PTP packet received timestamp */
-#define          MCDI_EVENT_CODE_PTP_RX  0xd
+#define          MCDI_EVENT_CODE_PTP_RX 0xd
 /* enum: PTP NIC failure */
-#define          MCDI_EVENT_CODE_PTP_FAULT  0xe
+#define          MCDI_EVENT_CODE_PTP_FAULT 0xe
 /* enum: PTP PPS event */
-#define          MCDI_EVENT_CODE_PTP_PPS  0xf
+#define          MCDI_EVENT_CODE_PTP_PPS 0xf
 /* enum: Rx flush has completed */
-#define          MCDI_EVENT_CODE_RX_FLUSH  0x10
+#define          MCDI_EVENT_CODE_RX_FLUSH 0x10
 /* enum: Receive error */
 #define          MCDI_EVENT_CODE_RX_ERR 0x11
 /* enum: AOE fault */
-#define          MCDI_EVENT_CODE_AOE  0x12
+#define          MCDI_EVENT_CODE_AOE 0x12
 /* enum: Network port calibration failed (VCAL). */
-#define          MCDI_EVENT_CODE_VCAL_FAIL  0x13
+#define          MCDI_EVENT_CODE_VCAL_FAIL 0x13
 /* enum: HW PPS event */
-#define          MCDI_EVENT_CODE_HW_PPS  0x14
+#define          MCDI_EVENT_CODE_HW_PPS 0x14
 /* enum: The MC has rebooted (huntington and later, siena uses CODE_REBOOT and
  * a different format)
  */
 /* enum: Artificial event generated by host and posted via MC for test
  * purposes.
  */
-#define          MCDI_EVENT_CODE_TESTGEN  0xfa
+#define          MCDI_EVENT_CODE_TESTGEN 0xfa
 #define       MCDI_EVENT_CMDDONE_DATA_OFST 0
 #define       MCDI_EVENT_CMDDONE_DATA_LEN 4
 #define       MCDI_EVENT_CMDDONE_DATA_LBN 0
 #define       FCDI_EVENT_LEVEL_LBN 33
 #define       FCDI_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          FCDI_EVENT_LEVEL_INFO  0x0
+#define          FCDI_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          FCDI_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
 #define       MUM_EVENT_LEVEL_LBN 33
 #define       MUM_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          MUM_EVENT_LEVEL_INFO  0x0
+#define          MUM_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          MUM_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
 
 /***********************************/
 /* MC_CMD_READ32
- * Read multiple 32byte words from MC memory.
+ * Read multiple 32byte words from MC memory. Note - this command really
+ * belongs to INSECURE category but is required by shmboot. The command handler
+ * has additional checks to reject insecure calls.
  */
 #define MC_CMD_READ32 0x1
 
 
 /***********************************/
 /* MC_CMD_COPYCODE
- * Copy MC code between two locations and jump.
+ * Copy MC code between two locations and jump. Note - this command really
+ * belongs to INSECURE category but is required by shmboot. The command handler
+ * has additional checks to reject insecure calls.
  */
 #define MC_CMD_COPYCODE 0x3
 
 #define       MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_OFST 0
 #define       MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_LEN 4
 /* enum: indicates that the MC wasn't flash booted */
-#define          MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_NULL  0xdeadbeef
+#define          MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_NULL 0xdeadbeef
 #define       MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_OFST 4
 #define       MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_LEN 4
 #define        MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_WATCHDOG_LBN 0
 #define       MC_CMD_PTP_IN_MANFTEST_PACKET_TEST_ENABLE_OFST 8
 #define       MC_CMD_PTP_IN_MANFTEST_PACKET_TEST_ENABLE_LEN 4
 
-/* MC_CMD_PTP_IN_RESET_STATS msgrequest */
+/* MC_CMD_PTP_IN_RESET_STATS msgrequest: Reset PTP statistics */
 #define    MC_CMD_PTP_IN_RESET_STATS_LEN 8
 /*            MC_CMD_PTP_IN_CMD_OFST 0 */
 /*            MC_CMD_PTP_IN_CMD_LEN 4 */
-/* Reset PTP statistics */
 /*            MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */
 /*            MC_CMD_PTP_IN_PERIPH_ID_LEN 4 */
 
 /* enum: External. */
 #define          MC_CMD_PTP_CLK_SRC_EXTERNAL 0x1
 
-/* MC_CMD_PTP_IN_RST_CLK msgrequest */
+/* MC_CMD_PTP_IN_RST_CLK msgrequest: Reset value of Timer Reg. */
 #define    MC_CMD_PTP_IN_RST_CLK_LEN 8
 /*            MC_CMD_PTP_IN_CMD_OFST 0 */
 /*            MC_CMD_PTP_IN_CMD_LEN 4 */
-/* Reset value of Timer Reg. */
 /*            MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */
 /*            MC_CMD_PTP_IN_PERIPH_ID_LEN 4 */
 
 #define       MC_CMD_DRV_ATTACH_IN_NEW_STATE_LEN 4
 #define        MC_CMD_DRV_ATTACH_LBN 0
 #define        MC_CMD_DRV_ATTACH_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_ATTACH_LBN 0
+#define        MC_CMD_DRV_ATTACH_IN_ATTACH_WIDTH 1
 #define        MC_CMD_DRV_PREBOOT_LBN 1
 #define        MC_CMD_DRV_PREBOOT_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_PREBOOT_LBN 1
+#define        MC_CMD_DRV_ATTACH_IN_PREBOOT_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_LBN 2
+#define        MC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_LBN 3
+#define        MC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_WIDTH 1
 /* 1 to set new state, or 0 to just report the existing state */
 #define       MC_CMD_DRV_ATTACH_IN_UPDATE_OFST 4
 #define       MC_CMD_DRV_ATTACH_IN_UPDATE_LEN 4
  * support
  */
 #define          MC_CMD_FW_RULES_ENGINE 0x5
+/* enum: Prefer to use firmware with additional DPDK support */
+#define          MC_CMD_FW_DPDK 0x6
+/* enum: Prefer to use "l3xudp" custom datapath firmware (see SF-119495-PD and
+ * bug69716)
+ */
+#define          MC_CMD_FW_L3XUDP 0x7
 /* enum: Only this option is allowed for non-admin functions */
-#define          MC_CMD_FW_DONT_CARE  0xffffffff
+#define          MC_CMD_FW_DONT_CARE 0xffffffff
 
 /* MC_CMD_DRV_ATTACH_OUT msgresponse */
 #define    MC_CMD_DRV_ATTACH_OUT_LEN 4
  * refers to the Sorrento external FPGA port.
  */
 #define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_NO_ACTIVE_PORT 0x3
+/* enum: If set, indicates that VI spreading is currently enabled. Will always
+ * indicate the current state, regardless of the value in the WANT_VI_SPREADING
+ * input.
+ */
+#define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_VI_SPREADING_ENABLED 0x4
 
 
 /***********************************/
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_100M_LO_OFST 0
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_100M_HI_OFST 4
 /* enum: None. */
-#define          MC_CMD_LOOPBACK_NONE  0x0
+#define          MC_CMD_LOOPBACK_NONE 0x0
 /* enum: Data. */
-#define          MC_CMD_LOOPBACK_DATA  0x1
+#define          MC_CMD_LOOPBACK_DATA 0x1
 /* enum: GMAC. */
-#define          MC_CMD_LOOPBACK_GMAC  0x2
+#define          MC_CMD_LOOPBACK_GMAC 0x2
 /* enum: XGMII. */
 #define          MC_CMD_LOOPBACK_XGMII 0x3
 /* enum: XGXS. */
-#define          MC_CMD_LOOPBACK_XGXS  0x4
+#define          MC_CMD_LOOPBACK_XGXS 0x4
 /* enum: XAUI. */
-#define          MC_CMD_LOOPBACK_XAUI  0x5
+#define          MC_CMD_LOOPBACK_XAUI 0x5
 /* enum: GMII. */
-#define          MC_CMD_LOOPBACK_GMII  0x6
+#define          MC_CMD_LOOPBACK_GMII 0x6
 /* enum: SGMII. */
-#define          MC_CMD_LOOPBACK_SGMII  0x7
+#define          MC_CMD_LOOPBACK_SGMII 0x7
 /* enum: XGBR. */
-#define          MC_CMD_LOOPBACK_XGBR  0x8
+#define          MC_CMD_LOOPBACK_XGBR 0x8
 /* enum: XFI. */
-#define          MC_CMD_LOOPBACK_XFI  0x9
+#define          MC_CMD_LOOPBACK_XFI 0x9
 /* enum: XAUI Far. */
-#define          MC_CMD_LOOPBACK_XAUI_FAR  0xa
+#define          MC_CMD_LOOPBACK_XAUI_FAR 0xa
 /* enum: GMII Far. */
-#define          MC_CMD_LOOPBACK_GMII_FAR  0xb
+#define          MC_CMD_LOOPBACK_GMII_FAR 0xb
 /* enum: SGMII Far. */
-#define          MC_CMD_LOOPBACK_SGMII_FAR  0xc
+#define          MC_CMD_LOOPBACK_SGMII_FAR 0xc
 /* enum: XFI Far. */
-#define          MC_CMD_LOOPBACK_XFI_FAR  0xd
+#define          MC_CMD_LOOPBACK_XFI_FAR 0xd
 /* enum: GPhy. */
-#define          MC_CMD_LOOPBACK_GPHY  0xe
+#define          MC_CMD_LOOPBACK_GPHY 0xe
 /* enum: PhyXS. */
-#define          MC_CMD_LOOPBACK_PHYXS  0xf
+#define          MC_CMD_LOOPBACK_PHYXS 0xf
 /* enum: PCS. */
-#define          MC_CMD_LOOPBACK_PCS  0x10
+#define          MC_CMD_LOOPBACK_PCS 0x10
 /* enum: PMA-PMD. */
-#define          MC_CMD_LOOPBACK_PMAPMD  0x11
+#define          MC_CMD_LOOPBACK_PMAPMD 0x11
 /* enum: Cross-Port. */
-#define          MC_CMD_LOOPBACK_XPORT  0x12
+#define          MC_CMD_LOOPBACK_XPORT 0x12
 /* enum: XGMII-Wireside. */
-#define          MC_CMD_LOOPBACK_XGMII_WS  0x13
+#define          MC_CMD_LOOPBACK_XGMII_WS 0x13
 /* enum: XAUI Wireside. */
-#define          MC_CMD_LOOPBACK_XAUI_WS  0x14
+#define          MC_CMD_LOOPBACK_XAUI_WS 0x14
 /* enum: XAUI Wireside Far. */
-#define          MC_CMD_LOOPBACK_XAUI_WS_FAR  0x15
+#define          MC_CMD_LOOPBACK_XAUI_WS_FAR 0x15
 /* enum: XAUI Wireside near. */
-#define          MC_CMD_LOOPBACK_XAUI_WS_NEAR  0x16
+#define          MC_CMD_LOOPBACK_XAUI_WS_NEAR 0x16
 /* enum: GMII Wireside. */
-#define          MC_CMD_LOOPBACK_GMII_WS  0x17
+#define          MC_CMD_LOOPBACK_GMII_WS 0x17
 /* enum: XFI Wireside. */
-#define          MC_CMD_LOOPBACK_XFI_WS  0x18
+#define          MC_CMD_LOOPBACK_XFI_WS 0x18
 /* enum: XFI Wireside Far. */
-#define          MC_CMD_LOOPBACK_XFI_WS_FAR  0x19
+#define          MC_CMD_LOOPBACK_XFI_WS_FAR 0x19
 /* enum: PhyXS Wireside. */
-#define          MC_CMD_LOOPBACK_PHYXS_WS  0x1a
+#define          MC_CMD_LOOPBACK_PHYXS_WS 0x1a
 /* enum: PMA lanes MAC-Serdes. */
-#define          MC_CMD_LOOPBACK_PMA_INT  0x1b
+#define          MC_CMD_LOOPBACK_PMA_INT 0x1b
 /* enum: KR Serdes Parallel (Encoder). */
-#define          MC_CMD_LOOPBACK_SD_NEAR  0x1c
+#define          MC_CMD_LOOPBACK_SD_NEAR 0x1c
 /* enum: KR Serdes Serial. */
-#define          MC_CMD_LOOPBACK_SD_FAR  0x1d
+#define          MC_CMD_LOOPBACK_SD_FAR 0x1d
 /* enum: PMA lanes MAC-Serdes Wireside. */
-#define          MC_CMD_LOOPBACK_PMA_INT_WS  0x1e
+#define          MC_CMD_LOOPBACK_PMA_INT_WS 0x1e
 /* enum: KR Serdes Parallel Wireside (Full PCS). */
-#define          MC_CMD_LOOPBACK_SD_FEP2_WS  0x1f
+#define          MC_CMD_LOOPBACK_SD_FEP2_WS 0x1f
 /* enum: KR Serdes Parallel Wireside (Sym Aligner to TX). */
-#define          MC_CMD_LOOPBACK_SD_FEP1_5_WS  0x20
+#define          MC_CMD_LOOPBACK_SD_FEP1_5_WS 0x20
 /* enum: KR Serdes Parallel Wireside (Deserializer to Serializer). */
-#define          MC_CMD_LOOPBACK_SD_FEP_WS  0x21
+#define          MC_CMD_LOOPBACK_SD_FEP_WS 0x21
 /* enum: KR Serdes Serial Wireside. */
-#define          MC_CMD_LOOPBACK_SD_FES_WS  0x22
+#define          MC_CMD_LOOPBACK_SD_FES_WS 0x22
 /* enum: Near side of AOE Siena side port */
-#define          MC_CMD_LOOPBACK_AOE_INT_NEAR  0x23
+#define          MC_CMD_LOOPBACK_AOE_INT_NEAR 0x23
 /* enum: Medford Wireside datapath loopback */
-#define          MC_CMD_LOOPBACK_DATA_WS  0x24
+#define          MC_CMD_LOOPBACK_DATA_WS 0x24
 /* enum: Force link up without setting up any physical loopback (snapper use
  * only)
  */
-#define          MC_CMD_LOOPBACK_FORCE_EXT_LINK  0x25
+#define          MC_CMD_LOOPBACK_FORCE_EXT_LINK 0x25
 /* Supported loopbacks. */
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_1G_OFST 8
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_1G_LEN 8
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_100M_LO_OFST 0
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_100M_HI_OFST 4
 /* enum: None. */
-/*               MC_CMD_LOOPBACK_NONE  0x0 */
+/*               MC_CMD_LOOPBACK_NONE 0x0 */
 /* enum: Data. */
-/*               MC_CMD_LOOPBACK_DATA  0x1 */
+/*               MC_CMD_LOOPBACK_DATA 0x1 */
 /* enum: GMAC. */
-/*               MC_CMD_LOOPBACK_GMAC  0x2 */
+/*               MC_CMD_LOOPBACK_GMAC 0x2 */
 /* enum: XGMII. */
 /*               MC_CMD_LOOPBACK_XGMII 0x3 */
 /* enum: XGXS. */
-/*               MC_CMD_LOOPBACK_XGXS  0x4 */
+/*               MC_CMD_LOOPBACK_XGXS 0x4 */
 /* enum: XAUI. */
-/*               MC_CMD_LOOPBACK_XAUI  0x5 */
+/*               MC_CMD_LOOPBACK_XAUI 0x5 */
 /* enum: GMII. */
-/*               MC_CMD_LOOPBACK_GMII  0x6 */
+/*               MC_CMD_LOOPBACK_GMII 0x6 */
 /* enum: SGMII. */
-/*               MC_CMD_LOOPBACK_SGMII  0x7 */
+/*               MC_CMD_LOOPBACK_SGMII 0x7 */
 /* enum: XGBR. */
-/*               MC_CMD_LOOPBACK_XGBR  0x8 */
+/*               MC_CMD_LOOPBACK_XGBR 0x8 */
 /* enum: XFI. */
-/*               MC_CMD_LOOPBACK_XFI  0x9 */
+/*               MC_CMD_LOOPBACK_XFI 0x9 */
 /* enum: XAUI Far. */
-/*               MC_CMD_LOOPBACK_XAUI_FAR  0xa */
+/*               MC_CMD_LOOPBACK_XAUI_FAR 0xa */
 /* enum: GMII Far. */
-/*               MC_CMD_LOOPBACK_GMII_FAR  0xb */
+/*               MC_CMD_LOOPBACK_GMII_FAR 0xb */
 /* enum: SGMII Far. */
-/*               MC_CMD_LOOPBACK_SGMII_FAR  0xc */
+/*               MC_CMD_LOOPBACK_SGMII_FAR 0xc */
 /* enum: XFI Far. */
-/*               MC_CMD_LOOPBACK_XFI_FAR  0xd */
+/*               MC_CMD_LOOPBACK_XFI_FAR 0xd */
 /* enum: GPhy. */
-/*               MC_CMD_LOOPBACK_GPHY  0xe */
+/*               MC_CMD_LOOPBACK_GPHY 0xe */
 /* enum: PhyXS. */
-/*               MC_CMD_LOOPBACK_PHYXS  0xf */
+/*               MC_CMD_LOOPBACK_PHYXS 0xf */
 /* enum: PCS. */
-/*               MC_CMD_LOOPBACK_PCS  0x10 */
+/*               MC_CMD_LOOPBACK_PCS 0x10 */
 /* enum: PMA-PMD. */
-/*               MC_CMD_LOOPBACK_PMAPMD  0x11 */
+/*               MC_CMD_LOOPBACK_PMAPMD 0x11 */
 /* enum: Cross-Port. */
-/*               MC_CMD_LOOPBACK_XPORT  0x12 */
+/*               MC_CMD_LOOPBACK_XPORT 0x12 */
 /* enum: XGMII-Wireside. */
-/*               MC_CMD_LOOPBACK_XGMII_WS  0x13 */
+/*               MC_CMD_LOOPBACK_XGMII_WS 0x13 */
 /* enum: XAUI Wireside. */
-/*               MC_CMD_LOOPBACK_XAUI_WS  0x14 */
+/*               MC_CMD_LOOPBACK_XAUI_WS 0x14 */
 /* enum: XAUI Wireside Far. */
-/*               MC_CMD_LOOPBACK_XAUI_WS_FAR  0x15 */
+/*               MC_CMD_LOOPBACK_XAUI_WS_FAR 0x15 */
 /* enum: XAUI Wireside near. */
-/*               MC_CMD_LOOPBACK_XAUI_WS_NEAR  0x16 */
+/*               MC_CMD_LOOPBACK_XAUI_WS_NEAR 0x16 */
 /* enum: GMII Wireside. */
-/*               MC_CMD_LOOPBACK_GMII_WS  0x17 */
+/*               MC_CMD_LOOPBACK_GMII_WS 0x17 */
 /* enum: XFI Wireside. */
-/*               MC_CMD_LOOPBACK_XFI_WS  0x18 */
+/*               MC_CMD_LOOPBACK_XFI_WS 0x18 */
 /* enum: XFI Wireside Far. */
-/*               MC_CMD_LOOPBACK_XFI_WS_FAR  0x19 */
+/*               MC_CMD_LOOPBACK_XFI_WS_FAR 0x19 */
 /* enum: PhyXS Wireside. */
-/*               MC_CMD_LOOPBACK_PHYXS_WS  0x1a */
+/*               MC_CMD_LOOPBACK_PHYXS_WS 0x1a */
 /* enum: PMA lanes MAC-Serdes. */
-/*               MC_CMD_LOOPBACK_PMA_INT  0x1b */
+/*               MC_CMD_LOOPBACK_PMA_INT 0x1b */
 /* enum: KR Serdes Parallel (Encoder). */
-/*               MC_CMD_LOOPBACK_SD_NEAR  0x1c */
+/*               MC_CMD_LOOPBACK_SD_NEAR 0x1c */
 /* enum: KR Serdes Serial. */
-/*               MC_CMD_LOOPBACK_SD_FAR  0x1d */
+/*               MC_CMD_LOOPBACK_SD_FAR 0x1d */
 /* enum: PMA lanes MAC-Serdes Wireside. */
-/*               MC_CMD_LOOPBACK_PMA_INT_WS  0x1e */
+/*               MC_CMD_LOOPBACK_PMA_INT_WS 0x1e */
 /* enum: KR Serdes Parallel Wireside (Full PCS). */
-/*               MC_CMD_LOOPBACK_SD_FEP2_WS  0x1f */
+/*               MC_CMD_LOOPBACK_SD_FEP2_WS 0x1f */
 /* enum: KR Serdes Parallel Wireside (Sym Aligner to TX). */
-/*               MC_CMD_LOOPBACK_SD_FEP1_5_WS  0x20 */
+/*               MC_CMD_LOOPBACK_SD_FEP1_5_WS 0x20 */
 /* enum: KR Serdes Parallel Wireside (Deserializer to Serializer). */
-/*               MC_CMD_LOOPBACK_SD_FEP_WS  0x21 */
+/*               MC_CMD_LOOPBACK_SD_FEP_WS 0x21 */
 /* enum: KR Serdes Serial Wireside. */
-/*               MC_CMD_LOOPBACK_SD_FES_WS  0x22 */
+/*               MC_CMD_LOOPBACK_SD_FES_WS 0x22 */
 /* enum: Near side of AOE Siena side port */
-/*               MC_CMD_LOOPBACK_AOE_INT_NEAR  0x23 */
+/*               MC_CMD_LOOPBACK_AOE_INT_NEAR 0x23 */
 /* enum: Medford Wireside datapath loopback */
-/*               MC_CMD_LOOPBACK_DATA_WS  0x24 */
+/*               MC_CMD_LOOPBACK_DATA_WS 0x24 */
 /* enum: Force link up without setting up any physical loopback (snapper use
  * only)
  */
-/*               MC_CMD_LOOPBACK_FORCE_EXT_LINK  0x25 */
+/*               MC_CMD_LOOPBACK_FORCE_EXT_LINK 0x25 */
 /* Supported loopbacks. */
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_1G_OFST 8
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_1G_LEN 8
 /*            Enum values, see field(s): */
 /*               100M */
 
+/* AN_TYPE structuredef: Auto-negotiation types defined in IEEE802.3 */
+#define    AN_TYPE_LEN 4
+#define       AN_TYPE_TYPE_OFST 0
+#define       AN_TYPE_TYPE_LEN 4
+/* enum: None, AN disabled or not supported */
+#define          MC_CMD_AN_NONE 0x0
+/* enum: Clause 28 - BASE-T */
+#define          MC_CMD_AN_CLAUSE28 0x1
+/* enum: Clause 37 - BASE-X */
+#define          MC_CMD_AN_CLAUSE37 0x2
+/* enum: Clause 73 - BASE-R startup protocol for backplane and copper cable
+ * assemblies. Includes Clause 72/Clause 92 link-training.
+ */
+#define          MC_CMD_AN_CLAUSE73 0x3
+#define       AN_TYPE_TYPE_LBN 0
+#define       AN_TYPE_TYPE_WIDTH 32
+
+/* FEC_TYPE structuredef: Forward error correction types defined in IEEE802.3
+ */
+#define    FEC_TYPE_LEN 4
+#define       FEC_TYPE_TYPE_OFST 0
+#define       FEC_TYPE_TYPE_LEN 4
+/* enum: No FEC */
+#define          MC_CMD_FEC_NONE 0x0
+/* enum: Clause 74 BASE-R FEC (a.k.a Firecode) */
+#define          MC_CMD_FEC_BASER 0x1
+/* enum: Clause 91/Clause 108 Reed-Solomon FEC */
+#define          MC_CMD_FEC_RS 0x2
+#define       FEC_TYPE_TYPE_LBN 0
+#define       FEC_TYPE_TYPE_WIDTH 32
+
 
 /***********************************/
 /* MC_CMD_GET_LINK
 
 /* MC_CMD_GET_LINK_OUT msgresponse */
 #define    MC_CMD_GET_LINK_OUT_LEN 28
-/* near-side advertised capabilities */
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_GET_LINK_OUT_CAP_OFST 0
 #define       MC_CMD_GET_LINK_OUT_CAP_LEN 4
-/* link-partner advertised capabilities */
+/* Link-partner advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_GET_LINK_OUT_LP_CAP_OFST 4
 #define       MC_CMD_GET_LINK_OUT_LP_CAP_LEN 4
 /* Autonegotiated speed in mbit/s. The link may still be down even if this
 #define        MC_CMD_MAC_FAULT_PENDING_RECONFIG_LBN 3
 #define        MC_CMD_MAC_FAULT_PENDING_RECONFIG_WIDTH 1
 
+/* MC_CMD_GET_LINK_OUT_V2 msgresponse: Extended link state information */
+#define    MC_CMD_GET_LINK_OUT_V2_LEN 44
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_CAP_OFST 0
+#define       MC_CMD_GET_LINK_OUT_V2_CAP_LEN 4
+/* Link-partner advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LP_CAP_OFST 4
+#define       MC_CMD_GET_LINK_OUT_V2_LP_CAP_LEN 4
+/* Autonegotiated speed in mbit/s. The link may still be down even if this
+ * reads non-zero.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LINK_SPEED_OFST 8
+#define       MC_CMD_GET_LINK_OUT_V2_LINK_SPEED_LEN 4
+/* Current loopback setting. */
+#define       MC_CMD_GET_LINK_OUT_V2_LOOPBACK_MODE_OFST 12
+#define       MC_CMD_GET_LINK_OUT_V2_LOOPBACK_MODE_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_GET_LOOPBACK_MODES/MC_CMD_GET_LOOPBACK_MODES_OUT/100M */
+#define       MC_CMD_GET_LINK_OUT_V2_FLAGS_OFST 16
+#define       MC_CMD_GET_LINK_OUT_V2_FLAGS_LEN 4
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_UP_LBN 0
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_FULL_DUPLEX_LBN 1
+#define        MC_CMD_GET_LINK_OUT_V2_FULL_DUPLEX_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_BPX_LINK_LBN 2
+#define        MC_CMD_GET_LINK_OUT_V2_BPX_LINK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PHY_LINK_LBN 3
+#define        MC_CMD_GET_LINK_OUT_V2_PHY_LINK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_RX_LBN 6
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_RX_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_TX_LBN 7
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_TX_WIDTH 1
+/* This returns the negotiated flow control value. */
+#define       MC_CMD_GET_LINK_OUT_V2_FCNTL_OFST 20
+#define       MC_CMD_GET_LINK_OUT_V2_FCNTL_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_SET_MAC/MC_CMD_SET_MAC_IN/FCNTL */
+#define       MC_CMD_GET_LINK_OUT_V2_MAC_FAULT_OFST 24
+#define       MC_CMD_GET_LINK_OUT_V2_MAC_FAULT_LEN 4
+/*             MC_CMD_MAC_FAULT_XGMII_LOCAL_LBN 0 */
+/*             MC_CMD_MAC_FAULT_XGMII_LOCAL_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_XGMII_REMOTE_LBN 1 */
+/*             MC_CMD_MAC_FAULT_XGMII_REMOTE_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_SGMII_REMOTE_LBN 2 */
+/*             MC_CMD_MAC_FAULT_SGMII_REMOTE_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_PENDING_RECONFIG_LBN 3 */
+/*             MC_CMD_MAC_FAULT_PENDING_RECONFIG_WIDTH 1 */
+/* True local device capabilities (taking into account currently used PMD/MDI,
+ * e.g. plugged-in module). In general, subset of
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP, but may include extra _FEC_REQUEST
+ * bits, if the PMD requires FEC. 0 if unknown (e.g. module unplugged). Equal
+ * to SUPPORTED_CAP for non-pluggable PMDs. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LD_CAP_OFST 28
+#define       MC_CMD_GET_LINK_OUT_V2_LD_CAP_LEN 4
+/* Auto-negotiation type used on the link */
+#define       MC_CMD_GET_LINK_OUT_V2_AN_TYPE_OFST 32
+#define       MC_CMD_GET_LINK_OUT_V2_AN_TYPE_LEN 4
+/*            Enum values, see field(s): */
+/*               AN_TYPE/TYPE */
+/* Forward error correction used on the link */
+#define       MC_CMD_GET_LINK_OUT_V2_FEC_TYPE_OFST 36
+#define       MC_CMD_GET_LINK_OUT_V2_FEC_TYPE_LEN 4
+/*            Enum values, see field(s): */
+/*               FEC_TYPE/TYPE */
+#define       MC_CMD_GET_LINK_OUT_V2_EXT_FLAGS_OFST 40
+#define       MC_CMD_GET_LINK_OUT_V2_EXT_FLAGS_LEN 4
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_MDI_CONNECTED_LBN 0
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_MDI_CONNECTED_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_READY_LBN 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_READY_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_LINK_UP_LBN 2
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMA_LINK_UP_LBN 3
+#define        MC_CMD_GET_LINK_OUT_V2_PMA_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PCS_LOCK_LBN 4
+#define        MC_CMD_GET_LINK_OUT_V2_PCS_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_ALIGN_LOCK_LBN 5
+#define        MC_CMD_GET_LINK_OUT_V2_ALIGN_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_HI_BER_LBN 6
+#define        MC_CMD_GET_LINK_OUT_V2_HI_BER_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_FEC_LOCK_LBN 7
+#define        MC_CMD_GET_LINK_OUT_V2_FEC_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_AN_DONE_LBN 8
+#define        MC_CMD_GET_LINK_OUT_V2_AN_DONE_WIDTH 1
+
 
 /***********************************/
 /* MC_CMD_SET_LINK
 
 /* MC_CMD_SET_LINK_IN msgrequest */
 #define    MC_CMD_SET_LINK_IN_LEN 16
-/* ??? */
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_SET_LINK_IN_CAP_OFST 0
 #define       MC_CMD_SET_LINK_IN_CAP_LEN 4
 /* Flags */
 /* Set LED state. */
 #define       MC_CMD_SET_ID_LED_IN_STATE_OFST 0
 #define       MC_CMD_SET_ID_LED_IN_STATE_LEN 4
-#define          MC_CMD_LED_OFF  0x0 /* enum */
-#define          MC_CMD_LED_ON  0x1 /* enum */
-#define          MC_CMD_LED_DEFAULT  0x2 /* enum */
+#define          MC_CMD_LED_OFF 0x0 /* enum */
+#define          MC_CMD_LED_ON 0x1 /* enum */
+#define          MC_CMD_LED_DEFAULT 0x2 /* enum */
 
 /* MC_CMD_SET_ID_LED_OUT msgresponse */
 #define    MC_CMD_SET_ID_LED_OUT_LEN 0
 #define       MC_CMD_PHY_STATS_OUT_NO_DMA_STATISTICS_LEN 4
 #define       MC_CMD_PHY_STATS_OUT_NO_DMA_STATISTICS_NUM MC_CMD_PHY_NSTATS
 /* enum: OUI. */
-#define          MC_CMD_OUI  0x0
+#define          MC_CMD_OUI 0x0
 /* enum: PMA-PMD Link Up. */
-#define          MC_CMD_PMA_PMD_LINK_UP  0x1
+#define          MC_CMD_PMA_PMD_LINK_UP 0x1
 /* enum: PMA-PMD RX Fault. */
-#define          MC_CMD_PMA_PMD_RX_FAULT  0x2
+#define          MC_CMD_PMA_PMD_RX_FAULT 0x2
 /* enum: PMA-PMD TX Fault. */
-#define          MC_CMD_PMA_PMD_TX_FAULT  0x3
+#define          MC_CMD_PMA_PMD_TX_FAULT 0x3
 /* enum: PMA-PMD Signal */
-#define          MC_CMD_PMA_PMD_SIGNAL  0x4
+#define          MC_CMD_PMA_PMD_SIGNAL 0x4
 /* enum: PMA-PMD SNR A. */
-#define          MC_CMD_PMA_PMD_SNR_A  0x5
+#define          MC_CMD_PMA_PMD_SNR_A 0x5
 /* enum: PMA-PMD SNR B. */
-#define          MC_CMD_PMA_PMD_SNR_B  0x6
+#define          MC_CMD_PMA_PMD_SNR_B 0x6
 /* enum: PMA-PMD SNR C. */
-#define          MC_CMD_PMA_PMD_SNR_C  0x7
+#define          MC_CMD_PMA_PMD_SNR_C 0x7
 /* enum: PMA-PMD SNR D. */
-#define          MC_CMD_PMA_PMD_SNR_D  0x8
+#define          MC_CMD_PMA_PMD_SNR_D 0x8
 /* enum: PCS Link Up. */
-#define          MC_CMD_PCS_LINK_UP  0x9
+#define          MC_CMD_PCS_LINK_UP 0x9
 /* enum: PCS RX Fault. */
-#define          MC_CMD_PCS_RX_FAULT  0xa
+#define          MC_CMD_PCS_RX_FAULT 0xa
 /* enum: PCS TX Fault. */
-#define          MC_CMD_PCS_TX_FAULT  0xb
+#define          MC_CMD_PCS_TX_FAULT 0xb
 /* enum: PCS BER. */
-#define          MC_CMD_PCS_BER  0xc
+#define          MC_CMD_PCS_BER 0xc
 /* enum: PCS Block Errors. */
-#define          MC_CMD_PCS_BLOCK_ERRORS  0xd
+#define          MC_CMD_PCS_BLOCK_ERRORS 0xd
 /* enum: PhyXS Link Up. */
-#define          MC_CMD_PHYXS_LINK_UP  0xe
+#define          MC_CMD_PHYXS_LINK_UP 0xe
 /* enum: PhyXS RX Fault. */
-#define          MC_CMD_PHYXS_RX_FAULT  0xf
+#define          MC_CMD_PHYXS_RX_FAULT 0xf
 /* enum: PhyXS TX Fault. */
-#define          MC_CMD_PHYXS_TX_FAULT  0x10
+#define          MC_CMD_PHYXS_TX_FAULT 0x10
 /* enum: PhyXS Align. */
-#define          MC_CMD_PHYXS_ALIGN  0x11
+#define          MC_CMD_PHYXS_ALIGN 0x11
 /* enum: PhyXS Sync. */
-#define          MC_CMD_PHYXS_SYNC  0x12
+#define          MC_CMD_PHYXS_SYNC 0x12
 /* enum: AN link-up. */
-#define          MC_CMD_AN_LINK_UP  0x13
+#define          MC_CMD_AN_LINK_UP 0x13
 /* enum: AN Complete. */
-#define          MC_CMD_AN_COMPLETE  0x14
+#define          MC_CMD_AN_COMPLETE 0x14
 /* enum: AN 10GBaseT Status. */
-#define          MC_CMD_AN_10GBT_STATUS  0x15
+#define          MC_CMD_AN_10GBT_STATUS 0x15
 /* enum: Clause 22 Link-Up. */
-#define          MC_CMD_CL22_LINK_UP  0x16
+#define          MC_CMD_CL22_LINK_UP 0x16
 /* enum: (Last entry) */
-#define          MC_CMD_PHY_NSTATS  0x17
+#define          MC_CMD_PHY_NSTATS 0x17
 
 
 /***********************************/
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_LO_OFST 0
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS
-#define          MC_CMD_MAC_GENERATION_START  0x0 /* enum */
-#define          MC_CMD_MAC_DMABUF_START  0x1 /* enum */
-#define          MC_CMD_MAC_TX_PKTS  0x1 /* enum */
-#define          MC_CMD_MAC_TX_PAUSE_PKTS  0x2 /* enum */
-#define          MC_CMD_MAC_TX_CONTROL_PKTS  0x3 /* enum */
-#define          MC_CMD_MAC_TX_UNICAST_PKTS  0x4 /* enum */
-#define          MC_CMD_MAC_TX_MULTICAST_PKTS  0x5 /* enum */
-#define          MC_CMD_MAC_TX_BROADCAST_PKTS  0x6 /* enum */
-#define          MC_CMD_MAC_TX_BYTES  0x7 /* enum */
-#define          MC_CMD_MAC_TX_BAD_BYTES  0x8 /* enum */
-#define          MC_CMD_MAC_TX_LT64_PKTS  0x9 /* enum */
-#define          MC_CMD_MAC_TX_64_PKTS  0xa /* enum */
-#define          MC_CMD_MAC_TX_65_TO_127_PKTS  0xb /* enum */
-#define          MC_CMD_MAC_TX_128_TO_255_PKTS  0xc /* enum */
-#define          MC_CMD_MAC_TX_256_TO_511_PKTS  0xd /* enum */
-#define          MC_CMD_MAC_TX_512_TO_1023_PKTS  0xe /* enum */
-#define          MC_CMD_MAC_TX_1024_TO_15XX_PKTS  0xf /* enum */
-#define          MC_CMD_MAC_TX_15XX_TO_JUMBO_PKTS  0x10 /* enum */
-#define          MC_CMD_MAC_TX_GTJUMBO_PKTS  0x11 /* enum */
-#define          MC_CMD_MAC_TX_BAD_FCS_PKTS  0x12 /* enum */
-#define          MC_CMD_MAC_TX_SINGLE_COLLISION_PKTS  0x13 /* enum */
-#define          MC_CMD_MAC_TX_MULTIPLE_COLLISION_PKTS  0x14 /* enum */
-#define          MC_CMD_MAC_TX_EXCESSIVE_COLLISION_PKTS  0x15 /* enum */
-#define          MC_CMD_MAC_TX_LATE_COLLISION_PKTS  0x16 /* enum */
-#define          MC_CMD_MAC_TX_DEFERRED_PKTS  0x17 /* enum */
-#define          MC_CMD_MAC_TX_EXCESSIVE_DEFERRED_PKTS  0x18 /* enum */
-#define          MC_CMD_MAC_TX_NON_TCPUDP_PKTS  0x19 /* enum */
-#define          MC_CMD_MAC_TX_MAC_SRC_ERR_PKTS  0x1a /* enum */
-#define          MC_CMD_MAC_TX_IP_SRC_ERR_PKTS  0x1b /* enum */
-#define          MC_CMD_MAC_RX_PKTS  0x1c /* enum */
-#define          MC_CMD_MAC_RX_PAUSE_PKTS  0x1d /* enum */
-#define          MC_CMD_MAC_RX_GOOD_PKTS  0x1e /* enum */
-#define          MC_CMD_MAC_RX_CONTROL_PKTS  0x1f /* enum */
-#define          MC_CMD_MAC_RX_UNICAST_PKTS  0x20 /* enum */
-#define          MC_CMD_MAC_RX_MULTICAST_PKTS  0x21 /* enum */
-#define          MC_CMD_MAC_RX_BROADCAST_PKTS  0x22 /* enum */
-#define          MC_CMD_MAC_RX_BYTES  0x23 /* enum */
-#define          MC_CMD_MAC_RX_BAD_BYTES  0x24 /* enum */
-#define          MC_CMD_MAC_RX_64_PKTS  0x25 /* enum */
-#define          MC_CMD_MAC_RX_65_TO_127_PKTS  0x26 /* enum */
-#define          MC_CMD_MAC_RX_128_TO_255_PKTS  0x27 /* enum */
-#define          MC_CMD_MAC_RX_256_TO_511_PKTS  0x28 /* enum */
-#define          MC_CMD_MAC_RX_512_TO_1023_PKTS  0x29 /* enum */
-#define          MC_CMD_MAC_RX_1024_TO_15XX_PKTS  0x2a /* enum */
-#define          MC_CMD_MAC_RX_15XX_TO_JUMBO_PKTS  0x2b /* enum */
-#define          MC_CMD_MAC_RX_GTJUMBO_PKTS  0x2c /* enum */
-#define          MC_CMD_MAC_RX_UNDERSIZE_PKTS  0x2d /* enum */
-#define          MC_CMD_MAC_RX_BAD_FCS_PKTS  0x2e /* enum */
-#define          MC_CMD_MAC_RX_OVERFLOW_PKTS  0x2f /* enum */
-#define          MC_CMD_MAC_RX_FALSE_CARRIER_PKTS  0x30 /* enum */
-#define          MC_CMD_MAC_RX_SYMBOL_ERROR_PKTS  0x31 /* enum */
-#define          MC_CMD_MAC_RX_ALIGN_ERROR_PKTS  0x32 /* enum */
-#define          MC_CMD_MAC_RX_LENGTH_ERROR_PKTS  0x33 /* enum */
-#define          MC_CMD_MAC_RX_INTERNAL_ERROR_PKTS  0x34 /* enum */
-#define          MC_CMD_MAC_RX_JABBER_PKTS  0x35 /* enum */
-#define          MC_CMD_MAC_RX_NODESC_DROPS  0x36 /* enum */
-#define          MC_CMD_MAC_RX_LANES01_CHAR_ERR  0x37 /* enum */
-#define          MC_CMD_MAC_RX_LANES23_CHAR_ERR  0x38 /* enum */
-#define          MC_CMD_MAC_RX_LANES01_DISP_ERR  0x39 /* enum */
-#define          MC_CMD_MAC_RX_LANES23_DISP_ERR  0x3a /* enum */
-#define          MC_CMD_MAC_RX_MATCH_FAULT  0x3b /* enum */
+#define          MC_CMD_MAC_GENERATION_START 0x0 /* enum */
+#define          MC_CMD_MAC_DMABUF_START 0x1 /* enum */
+#define          MC_CMD_MAC_TX_PKTS 0x1 /* enum */
+#define          MC_CMD_MAC_TX_PAUSE_PKTS 0x2 /* enum */
+#define          MC_CMD_MAC_TX_CONTROL_PKTS 0x3 /* enum */
+#define          MC_CMD_MAC_TX_UNICAST_PKTS 0x4 /* enum */
+#define          MC_CMD_MAC_TX_MULTICAST_PKTS 0x5 /* enum */
+#define          MC_CMD_MAC_TX_BROADCAST_PKTS 0x6 /* enum */
+#define          MC_CMD_MAC_TX_BYTES 0x7 /* enum */
+#define          MC_CMD_MAC_TX_BAD_BYTES 0x8 /* enum */
+#define          MC_CMD_MAC_TX_LT64_PKTS 0x9 /* enum */
+#define          MC_CMD_MAC_TX_64_PKTS 0xa /* enum */
+#define          MC_CMD_MAC_TX_65_TO_127_PKTS 0xb /* enum */
+#define          MC_CMD_MAC_TX_128_TO_255_PKTS 0xc /* enum */
+#define          MC_CMD_MAC_TX_256_TO_511_PKTS 0xd /* enum */
+#define          MC_CMD_MAC_TX_512_TO_1023_PKTS 0xe /* enum */
+#define          MC_CMD_MAC_TX_1024_TO_15XX_PKTS 0xf /* enum */
+#define          MC_CMD_MAC_TX_15XX_TO_JUMBO_PKTS 0x10 /* enum */
+#define          MC_CMD_MAC_TX_GTJUMBO_PKTS 0x11 /* enum */
+#define          MC_CMD_MAC_TX_BAD_FCS_PKTS 0x12 /* enum */
+#define          MC_CMD_MAC_TX_SINGLE_COLLISION_PKTS 0x13 /* enum */
+#define          MC_CMD_MAC_TX_MULTIPLE_COLLISION_PKTS 0x14 /* enum */
+#define          MC_CMD_MAC_TX_EXCESSIVE_COLLISION_PKTS 0x15 /* enum */
+#define          MC_CMD_MAC_TX_LATE_COLLISION_PKTS 0x16 /* enum */
+#define          MC_CMD_MAC_TX_DEFERRED_PKTS 0x17 /* enum */
+#define          MC_CMD_MAC_TX_EXCESSIVE_DEFERRED_PKTS 0x18 /* enum */
+#define          MC_CMD_MAC_TX_NON_TCPUDP_PKTS 0x19 /* enum */
+#define          MC_CMD_MAC_TX_MAC_SRC_ERR_PKTS 0x1a /* enum */
+#define          MC_CMD_MAC_TX_IP_SRC_ERR_PKTS 0x1b /* enum */
+#define          MC_CMD_MAC_RX_PKTS 0x1c /* enum */
+#define          MC_CMD_MAC_RX_PAUSE_PKTS 0x1d /* enum */
+#define          MC_CMD_MAC_RX_GOOD_PKTS 0x1e /* enum */
+#define          MC_CMD_MAC_RX_CONTROL_PKTS 0x1f /* enum */
+#define          MC_CMD_MAC_RX_UNICAST_PKTS 0x20 /* enum */
+#define          MC_CMD_MAC_RX_MULTICAST_PKTS 0x21 /* enum */
+#define          MC_CMD_MAC_RX_BROADCAST_PKTS 0x22 /* enum */
+#define          MC_CMD_MAC_RX_BYTES 0x23 /* enum */
+#define          MC_CMD_MAC_RX_BAD_BYTES 0x24 /* enum */
+#define          MC_CMD_MAC_RX_64_PKTS 0x25 /* enum */
+#define          MC_CMD_MAC_RX_65_TO_127_PKTS 0x26 /* enum */
+#define          MC_CMD_MAC_RX_128_TO_255_PKTS 0x27 /* enum */
+#define          MC_CMD_MAC_RX_256_TO_511_PKTS 0x28 /* enum */
+#define          MC_CMD_MAC_RX_512_TO_1023_PKTS 0x29 /* enum */
+#define          MC_CMD_MAC_RX_1024_TO_15XX_PKTS 0x2a /* enum */
+#define          MC_CMD_MAC_RX_15XX_TO_JUMBO_PKTS 0x2b /* enum */
+#define          MC_CMD_MAC_RX_GTJUMBO_PKTS 0x2c /* enum */
+#define          MC_CMD_MAC_RX_UNDERSIZE_PKTS 0x2d /* enum */
+#define          MC_CMD_MAC_RX_BAD_FCS_PKTS 0x2e /* enum */
+#define          MC_CMD_MAC_RX_OVERFLOW_PKTS 0x2f /* enum */
+#define          MC_CMD_MAC_RX_FALSE_CARRIER_PKTS 0x30 /* enum */
+#define          MC_CMD_MAC_RX_SYMBOL_ERROR_PKTS 0x31 /* enum */
+#define          MC_CMD_MAC_RX_ALIGN_ERROR_PKTS 0x32 /* enum */
+#define          MC_CMD_MAC_RX_LENGTH_ERROR_PKTS 0x33 /* enum */
+#define          MC_CMD_MAC_RX_INTERNAL_ERROR_PKTS 0x34 /* enum */
+#define          MC_CMD_MAC_RX_JABBER_PKTS 0x35 /* enum */
+#define          MC_CMD_MAC_RX_NODESC_DROPS 0x36 /* enum */
+#define          MC_CMD_MAC_RX_LANES01_CHAR_ERR 0x37 /* enum */
+#define          MC_CMD_MAC_RX_LANES23_CHAR_ERR 0x38 /* enum */
+#define          MC_CMD_MAC_RX_LANES01_DISP_ERR 0x39 /* enum */
+#define          MC_CMD_MAC_RX_LANES23_DISP_ERR 0x3a /* enum */
+#define          MC_CMD_MAC_RX_MATCH_FAULT 0x3b /* enum */
 /* enum: PM trunc_bb_overflow counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW  0x3c
+#define          MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW 0x3c
 /* enum: PM discard_bb_overflow counter. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW  0x3d
+#define          MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW 0x3d
 /* enum: PM trunc_vfifo_full counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_VFIFO_FULL  0x3e
+#define          MC_CMD_MAC_PM_TRUNC_VFIFO_FULL 0x3e
 /* enum: PM discard_vfifo_full counter. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_VFIFO_FULL  0x3f
+#define          MC_CMD_MAC_PM_DISCARD_VFIFO_FULL 0x3f
 /* enum: PM trunc_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_QBB  0x40
+#define          MC_CMD_MAC_PM_TRUNC_QBB 0x40
 /* enum: PM discard_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_QBB  0x41
+#define          MC_CMD_MAC_PM_DISCARD_QBB 0x41
 /* enum: PM discard_mapping counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_MAPPING  0x42
+#define          MC_CMD_MAC_PM_DISCARD_MAPPING 0x42
 /* enum: RXDP counter: Number of packets dropped due to the queue being
  * disabled. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_Q_DISABLED_PKTS  0x43
+#define          MC_CMD_MAC_RXDP_Q_DISABLED_PKTS 0x43
 /* enum: RXDP counter: Number of packets dropped by the DICPU. Valid for EF10
  * with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_DI_DROPPED_PKTS  0x45
+#define          MC_CMD_MAC_RXDP_DI_DROPPED_PKTS 0x45
 /* enum: RXDP counter: Number of non-host packets. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_STREAMING_PKTS  0x46
+#define          MC_CMD_MAC_RXDP_STREAMING_PKTS 0x46
 /* enum: RXDP counter: Number of times an hlb descriptor fetch was performed.
  * Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_HLB_FETCH_CONDITIONS  0x47
+#define          MC_CMD_MAC_RXDP_HLB_FETCH_CONDITIONS 0x47
 /* enum: RXDP counter: Number of times the DPCPU waited for an existing
  * descriptor fetch. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_HLB_WAIT_CONDITIONS  0x48
-#define          MC_CMD_MAC_VADAPTER_RX_DMABUF_START  0x4c /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_PACKETS  0x4c /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_BYTES  0x4d /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_PACKETS  0x4e /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_BYTES  0x4f /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_PACKETS  0x50 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_BYTES  0x51 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BAD_PACKETS  0x52 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BAD_BYTES  0x53 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_OVERFLOW  0x54 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_DMABUF_START  0x57 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_PACKETS  0x57 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_BYTES  0x58 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_PACKETS  0x59 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_BYTES  0x5a /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_PACKETS  0x5b /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_BYTES  0x5c /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BAD_PACKETS  0x5d /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BAD_BYTES  0x5e /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_OVERFLOW  0x5f /* enum */
+#define          MC_CMD_MAC_RXDP_HLB_WAIT_CONDITIONS 0x48
+#define          MC_CMD_MAC_VADAPTER_RX_DMABUF_START 0x4c /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_PACKETS 0x4c /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_BYTES 0x4d /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_PACKETS 0x4e /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_BYTES 0x4f /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_PACKETS 0x50 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_BYTES 0x51 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BAD_PACKETS 0x52 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BAD_BYTES 0x53 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_OVERFLOW 0x54 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_DMABUF_START 0x57 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_PACKETS 0x57 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_BYTES 0x58 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_PACKETS 0x59 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_BYTES 0x5a /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_PACKETS 0x5b /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_BYTES 0x5c /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BAD_PACKETS 0x5d /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BAD_BYTES 0x5e /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_OVERFLOW 0x5f /* enum */
 /* enum: Start of GMAC stats buffer space, for Siena only. */
-#define          MC_CMD_GMAC_DMABUF_START  0x40
+#define          MC_CMD_GMAC_DMABUF_START 0x40
 /* enum: End of GMAC stats buffer space, for Siena only. */
-#define          MC_CMD_GMAC_DMABUF_END    0x5f
+#define          MC_CMD_GMAC_DMABUF_END 0x5f
 /* enum: GENERATION_END value, used together with GENERATION_START to verify
  * consistency of DMAd data. For legacy firmware / drivers without extended
  * stats (more precisely, when DMA_LEN == MC_CMD_MAC_NSTATS *
  * sizeof(uint64_t). See SF-109306-TC, Section 9.2 for details.
  */
 #define          MC_CMD_MAC_GENERATION_END 0x60
-#define          MC_CMD_MAC_NSTATS  0x61 /* enum */
+#define          MC_CMD_MAC_NSTATS 0x61 /* enum */
 
 /* MC_CMD_MAC_STATS_V2_OUT_DMA msgresponse */
 #define    MC_CMD_MAC_STATS_V2_OUT_DMA_LEN 0
 #define       MC_CMD_MAC_STATS_V2_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_V2_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS_V2
 /* enum: Start of FEC stats buffer space, Medford2 and up */
-#define          MC_CMD_MAC_FEC_DMABUF_START  0x61
+#define          MC_CMD_MAC_FEC_DMABUF_START 0x61
 /* enum: Number of uncorrected FEC codewords on link (RS-FEC only for Medford2)
  */
-#define          MC_CMD_MAC_FEC_UNCORRECTED_ERRORS  0x61
+#define          MC_CMD_MAC_FEC_UNCORRECTED_ERRORS 0x61
 /* enum: Number of corrected FEC codewords on link (RS-FEC only for Medford2)
  */
-#define          MC_CMD_MAC_FEC_CORRECTED_ERRORS  0x62
+#define          MC_CMD_MAC_FEC_CORRECTED_ERRORS 0x62
 /* enum: Number of corrected 10-bit symbol errors, lane 0 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE0  0x63
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE0 0x63
 /* enum: Number of corrected 10-bit symbol errors, lane 1 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE1  0x64
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE1 0x64
 /* enum: Number of corrected 10-bit symbol errors, lane 2 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE2  0x65
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE2 0x65
 /* enum: Number of corrected 10-bit symbol errors, lane 3 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE3  0x66
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE3 0x66
 /* enum: This includes the space at offset 103 which is the final
  * GENERATION_END in a MAC_STATS_V2 response and otherwise unused.
  */
-#define          MC_CMD_MAC_NSTATS_V2  0x68
+#define          MC_CMD_MAC_NSTATS_V2 0x68
 /*            Other enum values, see field(s): */
 /*               MC_CMD_MAC_STATS_OUT_NO_DMA/STATISTICS */
 
 #define       MC_CMD_MAC_STATS_V3_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_V3_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS_V3
 /* enum: Start of CTPIO stats buffer space, Medford2 and up */
-#define          MC_CMD_MAC_CTPIO_DMABUF_START  0x68
+#define          MC_CMD_MAC_CTPIO_DMABUF_START 0x68
 /* enum: Number of CTPIO fallbacks because a DMA packet was in progress on the
  * target VI
  */
-#define          MC_CMD_MAC_CTPIO_VI_BUSY_FALLBACK  0x68
+#define          MC_CMD_MAC_CTPIO_VI_BUSY_FALLBACK 0x68
 /* enum: Number of times a CTPIO send wrote beyond frame end (informational
  * only)
  */
-#define          MC_CMD_MAC_CTPIO_LONG_WRITE_SUCCESS  0x69
+#define          MC_CMD_MAC_CTPIO_LONG_WRITE_SUCCESS 0x69
 /* enum: Number of CTPIO failures because the TX doorbell was written before
  * the end of the frame data
  */
-#define          MC_CMD_MAC_CTPIO_MISSING_DBELL_FAIL  0x6a
+#define          MC_CMD_MAC_CTPIO_MISSING_DBELL_FAIL 0x6a
 /* enum: Number of CTPIO failures because the internal FIFO overflowed */
-#define          MC_CMD_MAC_CTPIO_OVERFLOW_FAIL  0x6b
+#define          MC_CMD_MAC_CTPIO_OVERFLOW_FAIL 0x6b
 /* enum: Number of CTPIO failures because the host did not deliver data fast
  * enough to avoid MAC underflow
  */
-#define          MC_CMD_MAC_CTPIO_UNDERFLOW_FAIL  0x6c
+#define          MC_CMD_MAC_CTPIO_UNDERFLOW_FAIL 0x6c
 /* enum: Number of CTPIO failures because the host did not deliver all the
  * frame data within the timeout
  */
-#define          MC_CMD_MAC_CTPIO_TIMEOUT_FAIL  0x6d
+#define          MC_CMD_MAC_CTPIO_TIMEOUT_FAIL 0x6d
 /* enum: Number of CTPIO failures because the frame data arrived out of order
  * or with gaps
  */
-#define          MC_CMD_MAC_CTPIO_NONCONTIG_WR_FAIL  0x6e
+#define          MC_CMD_MAC_CTPIO_NONCONTIG_WR_FAIL 0x6e
 /* enum: Number of CTPIO failures because the host started a new frame before
  * completing the previous one
  */
-#define          MC_CMD_MAC_CTPIO_FRM_CLOBBER_FAIL  0x6f
+#define          MC_CMD_MAC_CTPIO_FRM_CLOBBER_FAIL 0x6f
 /* enum: Number of CTPIO failures because a write was not a multiple of 32 bits
  * or not 32-bit aligned
  */
-#define          MC_CMD_MAC_CTPIO_INVALID_WR_FAIL  0x70
+#define          MC_CMD_MAC_CTPIO_INVALID_WR_FAIL 0x70
 /* enum: Number of CTPIO fallbacks because another VI on the same port was
  * sending a CTPIO frame
  */
-#define          MC_CMD_MAC_CTPIO_VI_CLOBBER_FALLBACK  0x71
+#define          MC_CMD_MAC_CTPIO_VI_CLOBBER_FALLBACK 0x71
 /* enum: Number of CTPIO fallbacks because target VI did not have CTPIO enabled
  */
-#define          MC_CMD_MAC_CTPIO_UNQUALIFIED_FALLBACK  0x72
+#define          MC_CMD_MAC_CTPIO_UNQUALIFIED_FALLBACK 0x72
 /* enum: Number of CTPIO fallbacks because length in header was less than 29
  * bytes
  */
-#define          MC_CMD_MAC_CTPIO_RUNT_FALLBACK  0x73
+#define          MC_CMD_MAC_CTPIO_RUNT_FALLBACK 0x73
 /* enum: Total number of successful CTPIO sends on this port */
-#define          MC_CMD_MAC_CTPIO_SUCCESS  0x74
+#define          MC_CMD_MAC_CTPIO_SUCCESS 0x74
 /* enum: Total number of CTPIO fallbacks on this port */
-#define          MC_CMD_MAC_CTPIO_FALLBACK  0x75
+#define          MC_CMD_MAC_CTPIO_FALLBACK 0x75
 /* enum: Total number of CTPIO poisoned frames on this port, whether erased or
  * not
  */
-#define          MC_CMD_MAC_CTPIO_POISON  0x76
+#define          MC_CMD_MAC_CTPIO_POISON 0x76
 /* enum: Total number of CTPIO erased frames on this port */
-#define          MC_CMD_MAC_CTPIO_ERASE  0x77
+#define          MC_CMD_MAC_CTPIO_ERASE 0x77
 /* enum: This includes the space at offset 120 which is the final
  * GENERATION_END in a MAC_STATS_V3 response and otherwise unused.
  */
-#define          MC_CMD_MAC_NSTATS_V3  0x79
+#define          MC_CMD_MAC_NSTATS_V3 0x79
 /*            Other enum values, see field(s): */
 /*               MC_CMD_MAC_STATS_V2_OUT_NO_DMA/STATISTICS */
 
 #define    MC_CMD_WOL_FILTER_SET_IN_LEN 192
 #define       MC_CMD_WOL_FILTER_SET_IN_FILTER_MODE_OFST 0
 #define       MC_CMD_WOL_FILTER_SET_IN_FILTER_MODE_LEN 4
-#define          MC_CMD_FILTER_MODE_SIMPLE    0x0 /* enum */
+#define          MC_CMD_FILTER_MODE_SIMPLE 0x0 /* enum */
 #define          MC_CMD_FILTER_MODE_STRUCTURED 0xffffffff /* enum */
 /* A type value of 1 is unused. */
 #define       MC_CMD_WOL_FILTER_SET_IN_WOL_TYPE_OFST 4
 #define       MC_CMD_WOL_FILTER_SET_IN_WOL_TYPE_LEN 4
 /* enum: Magic */
-#define          MC_CMD_WOL_TYPE_MAGIC      0x0
+#define          MC_CMD_WOL_TYPE_MAGIC 0x0
 /* enum: MS Windows Magic */
 #define          MC_CMD_WOL_TYPE_WIN_MAGIC 0x2
 /* enum: IPv4 Syn */
-#define          MC_CMD_WOL_TYPE_IPV4_SYN   0x3
+#define          MC_CMD_WOL_TYPE_IPV4_SYN 0x3
 /* enum: IPv6 Syn */
-#define          MC_CMD_WOL_TYPE_IPV6_SYN   0x4
+#define          MC_CMD_WOL_TYPE_IPV6_SYN 0x4
 /* enum: Bitmap */
-#define          MC_CMD_WOL_TYPE_BITMAP     0x5
+#define          MC_CMD_WOL_TYPE_BITMAP 0x5
 /* enum: Link */
-#define          MC_CMD_WOL_TYPE_LINK       0x6
+#define          MC_CMD_WOL_TYPE_LINK 0x6
 /* enum: (Above this for future use) */
-#define          MC_CMD_WOL_TYPE_MAX        0x7
+#define          MC_CMD_WOL_TYPE_MAX 0x7
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_OFST 8
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_LEN 4
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_NUM 46
 #define        MC_CMD_NVRAM_INFO_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_IF_TSA_BOUND_LBN 2
+#define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_IF_TSA_BOUND_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_LBN 5
 #define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_CMAC_LBN 6
 #define        MC_CMD_NVRAM_INFO_V2_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_IF_TSA_BOUND_LBN 2
+#define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_IF_TSA_BOUND_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_LBN 5
 #define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_A_B_LBN 7
 /* MC_CMD_NVRAM_UPDATE_START
  * Start a group of update operations on a virtual NVRAM partition. Locks
  * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad type), EACCES (if
- * PHY_LOCK required and not held).
+ * PHY_LOCK required and not held). In an adapter bound to a TSA controller,
+ * MC_CMD_NVRAM_UPDATE_START can only be used on a subset of partition types
+ * i.e. static config, dynamic config and expansion ROM config. Attempting to
+ * perform this operation on a restricted partition will return the error
+ * EPERM.
  */
 #define MC_CMD_NVRAM_UPDATE_START 0x38
 
 /***********************************/
 /* MC_CMD_NVRAM_UPDATE_FINISH
  * Finish a group of update operations on a virtual NVRAM partition. Locks
- * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad
- * type/offset/length), EACCES (if PHY_LOCK required and not held)
+ * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad type/offset/
+ * length), EACCES (if PHY_LOCK required and not held). In an adapter bound to
+ * a TSA controller, MC_CMD_NVRAM_UPDATE_FINISH can only be used on a subset of
+ * partition types i.e. static config, dynamic config and expansion ROM config.
+ * Attempting to perform this operation on a restricted partition will return
+ * the error EPERM.
  */
 #define MC_CMD_NVRAM_UPDATE_FINISH 0x3c
 
 #define       MC_CMD_SENSOR_INFO_OUT_MASK_OFST 0
 #define       MC_CMD_SENSOR_INFO_OUT_MASK_LEN 4
 /* enum: Controller temperature: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_TEMP  0x0
+#define          MC_CMD_SENSOR_CONTROLLER_TEMP 0x0
 /* enum: Phy common temperature: degC */
-#define          MC_CMD_SENSOR_PHY_COMMON_TEMP  0x1
+#define          MC_CMD_SENSOR_PHY_COMMON_TEMP 0x1
 /* enum: Controller cooling: bool */
-#define          MC_CMD_SENSOR_CONTROLLER_COOLING  0x2
+#define          MC_CMD_SENSOR_CONTROLLER_COOLING 0x2
 /* enum: Phy 0 temperature: degC */
-#define          MC_CMD_SENSOR_PHY0_TEMP  0x3
+#define          MC_CMD_SENSOR_PHY0_TEMP 0x3
 /* enum: Phy 0 cooling: bool */
-#define          MC_CMD_SENSOR_PHY0_COOLING  0x4
+#define          MC_CMD_SENSOR_PHY0_COOLING 0x4
 /* enum: Phy 1 temperature: degC */
-#define          MC_CMD_SENSOR_PHY1_TEMP  0x5
+#define          MC_CMD_SENSOR_PHY1_TEMP 0x5
 /* enum: Phy 1 cooling: bool */
-#define          MC_CMD_SENSOR_PHY1_COOLING  0x6
+#define          MC_CMD_SENSOR_PHY1_COOLING 0x6
 /* enum: 1.0v power: mV */
-#define          MC_CMD_SENSOR_IN_1V0  0x7
+#define          MC_CMD_SENSOR_IN_1V0 0x7
 /* enum: 1.2v power: mV */
-#define          MC_CMD_SENSOR_IN_1V2  0x8
+#define          MC_CMD_SENSOR_IN_1V2 0x8
 /* enum: 1.8v power: mV */
-#define          MC_CMD_SENSOR_IN_1V8  0x9
+#define          MC_CMD_SENSOR_IN_1V8 0x9
 /* enum: 2.5v power: mV */
-#define          MC_CMD_SENSOR_IN_2V5  0xa
+#define          MC_CMD_SENSOR_IN_2V5 0xa
 /* enum: 3.3v power: mV */
-#define          MC_CMD_SENSOR_IN_3V3  0xb
+#define          MC_CMD_SENSOR_IN_3V3 0xb
 /* enum: 12v power: mV */
-#define          MC_CMD_SENSOR_IN_12V0  0xc
+#define          MC_CMD_SENSOR_IN_12V0 0xc
 /* enum: 1.2v analogue power: mV */
-#define          MC_CMD_SENSOR_IN_1V2A  0xd
+#define          MC_CMD_SENSOR_IN_1V2A 0xd
 /* enum: reference voltage: mV */
-#define          MC_CMD_SENSOR_IN_VREF  0xe
+#define          MC_CMD_SENSOR_IN_VREF 0xe
 /* enum: AOE FPGA power: mV */
-#define          MC_CMD_SENSOR_OUT_VAOE  0xf
+#define          MC_CMD_SENSOR_OUT_VAOE 0xf
 /* enum: AOE FPGA temperature: degC */
-#define          MC_CMD_SENSOR_AOE_TEMP  0x10
+#define          MC_CMD_SENSOR_AOE_TEMP 0x10
 /* enum: AOE FPGA PSU temperature: degC */
-#define          MC_CMD_SENSOR_PSU_AOE_TEMP  0x11
+#define          MC_CMD_SENSOR_PSU_AOE_TEMP 0x11
 /* enum: AOE PSU temperature: degC */
-#define          MC_CMD_SENSOR_PSU_TEMP  0x12
+#define          MC_CMD_SENSOR_PSU_TEMP 0x12
 /* enum: Fan 0 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_0  0x13
+#define          MC_CMD_SENSOR_FAN_0 0x13
 /* enum: Fan 1 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_1  0x14
+#define          MC_CMD_SENSOR_FAN_1 0x14
 /* enum: Fan 2 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_2  0x15
+#define          MC_CMD_SENSOR_FAN_2 0x15
 /* enum: Fan 3 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_3  0x16
+#define          MC_CMD_SENSOR_FAN_3 0x16
 /* enum: Fan 4 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_4  0x17
+#define          MC_CMD_SENSOR_FAN_4 0x17
 /* enum: AOE FPGA input power: mV */
-#define          MC_CMD_SENSOR_IN_VAOE  0x18
+#define          MC_CMD_SENSOR_IN_VAOE 0x18
 /* enum: AOE FPGA current: mA */
-#define          MC_CMD_SENSOR_OUT_IAOE  0x19
+#define          MC_CMD_SENSOR_OUT_IAOE 0x19
 /* enum: AOE FPGA input current: mA */
-#define          MC_CMD_SENSOR_IN_IAOE  0x1a
+#define          MC_CMD_SENSOR_IN_IAOE 0x1a
 /* enum: NIC power consumption: W */
-#define          MC_CMD_SENSOR_NIC_POWER  0x1b
+#define          MC_CMD_SENSOR_NIC_POWER 0x1b
 /* enum: 0.9v power voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9  0x1c
+#define          MC_CMD_SENSOR_IN_0V9 0x1c
 /* enum: 0.9v power current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9  0x1d
+#define          MC_CMD_SENSOR_IN_I0V9 0x1d
 /* enum: 1.2v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V2  0x1e
+#define          MC_CMD_SENSOR_IN_I1V2 0x1e
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE0_NEXT  0x1f
+#define          MC_CMD_SENSOR_PAGE0_NEXT 0x1f
 /* enum: 0.9v power voltage (at ADC): mV */
-#define          MC_CMD_SENSOR_IN_0V9_ADC  0x20
+#define          MC_CMD_SENSOR_IN_0V9_ADC 0x20
 /* enum: Controller temperature 2: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_2_TEMP  0x21
+#define          MC_CMD_SENSOR_CONTROLLER_2_TEMP 0x21
 /* enum: Voltage regulator internal temperature: degC */
-#define          MC_CMD_SENSOR_VREG_INTERNAL_TEMP  0x22
+#define          MC_CMD_SENSOR_VREG_INTERNAL_TEMP 0x22
 /* enum: 0.9V voltage regulator temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_TEMP  0x23
+#define          MC_CMD_SENSOR_VREG_0V9_TEMP 0x23
 /* enum: 1.2V voltage regulator temperature: degC */
-#define          MC_CMD_SENSOR_VREG_1V2_TEMP  0x24
+#define          MC_CMD_SENSOR_VREG_1V2_TEMP 0x24
 /* enum: controller internal temperature sensor voltage (internal ADC): mV */
-#define          MC_CMD_SENSOR_CONTROLLER_VPTAT  0x25
+#define          MC_CMD_SENSOR_CONTROLLER_VPTAT 0x25
 /* enum: controller internal temperature (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP  0x26
+#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP 0x26
 /* enum: controller internal temperature sensor voltage (external ADC): mV */
-#define          MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC  0x27
+#define          MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC 0x27
 /* enum: controller internal temperature (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC  0x28
+#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC 0x28
 /* enum: ambient temperature: degC */
-#define          MC_CMD_SENSOR_AMBIENT_TEMP  0x29
+#define          MC_CMD_SENSOR_AMBIENT_TEMP 0x29
 /* enum: air flow: bool */
-#define          MC_CMD_SENSOR_AIRFLOW  0x2a
+#define          MC_CMD_SENSOR_AIRFLOW 0x2a
 /* enum: voltage between VSS08D and VSS08D at CSR: mV */
-#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR  0x2b
+#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR 0x2b
 /* enum: voltage between VSS08D and VSS08D at CSR (external ADC): mV */
-#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC  0x2c
+#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC 0x2c
 /* enum: Hotpoint temperature: degC */
-#define          MC_CMD_SENSOR_HOTPOINT_TEMP  0x2d
+#define          MC_CMD_SENSOR_HOTPOINT_TEMP 0x2d
 /* enum: Port 0 PHY power switch over-current: bool */
-#define          MC_CMD_SENSOR_PHY_POWER_PORT0  0x2e
+#define          MC_CMD_SENSOR_PHY_POWER_PORT0 0x2e
 /* enum: Port 1 PHY power switch over-current: bool */
-#define          MC_CMD_SENSOR_PHY_POWER_PORT1  0x2f
-/* enum: Mop-up microcontroller reference voltage (millivolts) */
-#define          MC_CMD_SENSOR_MUM_VCC  0x30
+#define          MC_CMD_SENSOR_PHY_POWER_PORT1 0x2f
+/* enum: Mop-up microcontroller reference voltage: mV */
+#define          MC_CMD_SENSOR_MUM_VCC 0x30
 /* enum: 0.9v power phase A voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9_A  0x31
+#define          MC_CMD_SENSOR_IN_0V9_A 0x31
 /* enum: 0.9v power phase A current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9_A  0x32
+#define          MC_CMD_SENSOR_IN_I0V9_A 0x32
 /* enum: 0.9V voltage regulator phase A temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_A_TEMP  0x33
+#define          MC_CMD_SENSOR_VREG_0V9_A_TEMP 0x33
 /* enum: 0.9v power phase B voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9_B  0x34
+#define          MC_CMD_SENSOR_IN_0V9_B 0x34
 /* enum: 0.9v power phase B current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9_B  0x35
+#define          MC_CMD_SENSOR_IN_I0V9_B 0x35
 /* enum: 0.9V voltage regulator phase B temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_B_TEMP  0x36
+#define          MC_CMD_SENSOR_VREG_0V9_B_TEMP 0x36
 /* enum: CCOM AVREG 1v2 supply (interval ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY  0x37
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY 0x37
 /* enum: CCOM AVREG 1v2 supply (external ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC  0x38
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC 0x38
 /* enum: CCOM AVREG 1v8 supply (interval ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY  0x39
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY 0x39
 /* enum: CCOM AVREG 1v8 supply (external ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC  0x3a
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC 0x3a
 /* enum: CCOM RTS temperature: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_RTS  0x3b
+#define          MC_CMD_SENSOR_CONTROLLER_RTS 0x3b
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE1_NEXT  0x3f
+#define          MC_CMD_SENSOR_PAGE1_NEXT 0x3f
 /* enum: controller internal temperature sensor voltage on master core
  * (internal ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT  0x40
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT 0x40
 /* enum: controller internal temperature on master core (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP  0x41
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP 0x41
 /* enum: controller internal temperature sensor voltage on master core
  * (external ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC  0x42
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC 0x42
 /* enum: controller internal temperature on master core (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC  0x43
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC 0x43
 /* enum: controller internal temperature on slave core sensor voltage (internal
  * ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT  0x44
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT 0x44
 /* enum: controller internal temperature on slave core (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP  0x45
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP 0x45
 /* enum: controller internal temperature on slave core sensor voltage (external
  * ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC  0x46
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC 0x46
 /* enum: controller internal temperature on slave core (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC  0x47
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC 0x47
 /* enum: Voltage supplied to the SODIMMs from their power supply: mV */
-#define          MC_CMD_SENSOR_SODIMM_VOUT  0x49
+#define          MC_CMD_SENSOR_SODIMM_VOUT 0x49
 /* enum: Temperature of SODIMM 0 (if installed): degC */
-#define          MC_CMD_SENSOR_SODIMM_0_TEMP  0x4a
+#define          MC_CMD_SENSOR_SODIMM_0_TEMP 0x4a
 /* enum: Temperature of SODIMM 1 (if installed): degC */
-#define          MC_CMD_SENSOR_SODIMM_1_TEMP  0x4b
+#define          MC_CMD_SENSOR_SODIMM_1_TEMP 0x4b
 /* enum: Voltage supplied to the QSFP #0 from their power supply: mV */
-#define          MC_CMD_SENSOR_PHY0_VCC  0x4c
+#define          MC_CMD_SENSOR_PHY0_VCC 0x4c
 /* enum: Voltage supplied to the QSFP #1 from their power supply: mV */
-#define          MC_CMD_SENSOR_PHY1_VCC  0x4d
+#define          MC_CMD_SENSOR_PHY1_VCC 0x4d
 /* enum: Controller die temperature (TDIODE): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP  0x4e
+#define          MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP 0x4e
 /* enum: Board temperature (front): degC */
-#define          MC_CMD_SENSOR_BOARD_FRONT_TEMP  0x4f
+#define          MC_CMD_SENSOR_BOARD_FRONT_TEMP 0x4f
 /* enum: Board temperature (back): degC */
-#define          MC_CMD_SENSOR_BOARD_BACK_TEMP  0x50
+#define          MC_CMD_SENSOR_BOARD_BACK_TEMP 0x50
 /* enum: 1.8v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V8  0x51
+#define          MC_CMD_SENSOR_IN_I1V8 0x51
 /* enum: 2.5v power current: mA */
-#define          MC_CMD_SENSOR_IN_I2V5  0x52
+#define          MC_CMD_SENSOR_IN_I2V5 0x52
 /* enum: 3.3v power current: mA */
-#define          MC_CMD_SENSOR_IN_I3V3  0x53
+#define          MC_CMD_SENSOR_IN_I3V3 0x53
 /* enum: 12v power current: mA */
-#define          MC_CMD_SENSOR_IN_I12V0  0x54
+#define          MC_CMD_SENSOR_IN_I12V0 0x54
 /* enum: 1.3v power: mV */
-#define          MC_CMD_SENSOR_IN_1V3  0x55
+#define          MC_CMD_SENSOR_IN_1V3 0x55
 /* enum: 1.3v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V3  0x56
+#define          MC_CMD_SENSOR_IN_I1V3 0x56
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE2_NEXT  0x5f
+#define          MC_CMD_SENSOR_PAGE2_NEXT 0x5f
 /* MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF */
 #define       MC_CMD_SENSOR_ENTRY_OFST 4
 #define       MC_CMD_SENSOR_ENTRY_LEN 8
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_OFST 2
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_LEN 1
 /* enum: Ok. */
-#define          MC_CMD_SENSOR_STATE_OK  0x0
+#define          MC_CMD_SENSOR_STATE_OK 0x0
 /* enum: Breached warning threshold. */
-#define          MC_CMD_SENSOR_STATE_WARNING  0x1
+#define          MC_CMD_SENSOR_STATE_WARNING 0x1
 /* enum: Breached fatal threshold. */
-#define          MC_CMD_SENSOR_STATE_FATAL  0x2
+#define          MC_CMD_SENSOR_STATE_FATAL 0x2
 /* enum: Fault with sensor. */
-#define          MC_CMD_SENSOR_STATE_BROKEN  0x3
+#define          MC_CMD_SENSOR_STATE_BROKEN 0x3
 /* enum: Sensor is working but does not currently have a reading. */
-#define          MC_CMD_SENSOR_STATE_NO_READING  0x4
+#define          MC_CMD_SENSOR_STATE_NO_READING 0x4
 /* enum: Sensor initialisation failed. */
-#define          MC_CMD_SENSOR_STATE_INIT_FAILED  0x5
+#define          MC_CMD_SENSOR_STATE_INIT_FAILED 0x5
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_LBN 16
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_WIDTH 8
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_TYPE_OFST 3
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_PROTOCOL_OFST 0
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_PROTOCOL_LEN 4
 #define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_ARP 0x1 /* enum */
-#define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_NS  0x2 /* enum */
+#define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_NS 0x2 /* enum */
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_OFST 4
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_LEN 4
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_MINNUM 1
 /* enum: Assert using the FAIL_ASSERTION_WITH_USEFUL_VALUES macro. Unless
  * you're testing firmware, this is what you want.
  */
-#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES  0x0
+#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES 0x0
 /* enum: Assert using assert(0); */
-#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE  0x1
+#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE 0x1
 /* enum: Deliberately trigger a watchdog */
-#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG  0x2
+#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG 0x2
 /* enum: Deliberately trigger a trap by loading from an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP  0x3
+#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP 0x3
 /* enum: Deliberately trigger a trap by storing to an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP  0x4
+#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP 0x4
 /* enum: Jump to an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP  0x5
+#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP 0x5
 
 /* MC_CMD_TESTASSERT_V2_OUT msgresponse */
 #define    MC_CMD_TESTASSERT_V2_OUT_LEN 0
 /*            MC_CMD_MUM_IN_CMD_LEN 4 */
 #define       MC_CMD_MUM_IN_LOG_OP_OFST 4
 #define       MC_CMD_MUM_IN_LOG_OP_LEN 4
-#define          MC_CMD_MUM_IN_LOG_OP_UART  0x1 /* enum */
+#define          MC_CMD_MUM_IN_LOG_OP_UART 0x1 /* enum */
 
 /* MC_CMD_MUM_IN_LOG_OP_UART msgrequest */
 #define    MC_CMD_MUM_IN_LOG_OP_UART_LEN 12
 #define       EVB_PORT_ID_PORT_ID_OFST 0
 #define       EVB_PORT_ID_PORT_ID_LEN 4
 /* enum: An invalid port handle. */
-#define          EVB_PORT_ID_NULL  0x0
+#define          EVB_PORT_ID_NULL 0x0
 /* enum: The port assigned to this function.. */
-#define          EVB_PORT_ID_ASSIGNED  0x1000000
+#define          EVB_PORT_ID_ASSIGNED 0x1000000
 /* enum: External network port 0 */
-#define          EVB_PORT_ID_MAC0  0x2000000
+#define          EVB_PORT_ID_MAC0 0x2000000
 /* enum: External network port 1 */
-#define          EVB_PORT_ID_MAC1  0x2000001
+#define          EVB_PORT_ID_MAC1 0x2000001
 /* enum: External network port 2 */
-#define          EVB_PORT_ID_MAC2  0x2000002
+#define          EVB_PORT_ID_MAC2 0x2000002
 /* enum: External network port 3 */
-#define          EVB_PORT_ID_MAC3  0x2000003
+#define          EVB_PORT_ID_MAC3 0x2000003
 #define       EVB_PORT_ID_PORT_ID_LBN 0
 #define       EVB_PORT_ID_PORT_ID_WIDTH 32
 
 #define       EVB_VLAN_TAG_MODE_LBN 12
 #define       EVB_VLAN_TAG_MODE_WIDTH 4
 /* enum: Insert the VLAN. */
-#define          EVB_VLAN_TAG_INSERT  0x0
+#define          EVB_VLAN_TAG_INSERT 0x0
 /* enum: Replace the VLAN if already present. */
 #define          EVB_VLAN_TAG_REPLACE 0x1
 
 #define       NVRAM_PARTITION_TYPE_ID_OFST 0
 #define       NVRAM_PARTITION_TYPE_ID_LEN 2
 /* enum: Primary MC firmware partition */
-#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE          0x100
+#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE 0x100
 /* enum: Secondary MC firmware partition */
-#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP   0x200
+#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP 0x200
 /* enum: Expansion ROM partition */
-#define          NVRAM_PARTITION_TYPE_EXPANSION_ROM        0x300
+#define          NVRAM_PARTITION_TYPE_EXPANSION_ROM 0x300
 /* enum: Static configuration TLV partition */
-#define          NVRAM_PARTITION_TYPE_STATIC_CONFIG        0x400
+#define          NVRAM_PARTITION_TYPE_STATIC_CONFIG 0x400
 /* enum: Dynamic configuration TLV partition */
-#define          NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG       0x500
+#define          NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG 0x500
 /* enum: Expansion ROM configuration data for port 0 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0  0x600
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0 0x600
 /* enum: Synonym for EXPROM_CONFIG_PORT0 as used in pmap files */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG        0x600
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG 0x600
 /* enum: Expansion ROM configuration data for port 1 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1  0x601
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1 0x601
 /* enum: Expansion ROM configuration data for port 2 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2  0x602
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2 0x602
 /* enum: Expansion ROM configuration data for port 3 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3  0x603
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3 0x603
 /* enum: Non-volatile log output partition */
-#define          NVRAM_PARTITION_TYPE_LOG                  0x700
+#define          NVRAM_PARTITION_TYPE_LOG 0x700
 /* enum: Non-volatile log output of second core on dual-core device */
-#define          NVRAM_PARTITION_TYPE_LOG_SLAVE            0x701
+#define          NVRAM_PARTITION_TYPE_LOG_SLAVE 0x701
 /* enum: Device state dump output partition */
-#define          NVRAM_PARTITION_TYPE_DUMP                 0x800
+#define          NVRAM_PARTITION_TYPE_DUMP 0x800
 /* enum: Application license key storage partition */
-#define          NVRAM_PARTITION_TYPE_LICENSE              0x900
+#define          NVRAM_PARTITION_TYPE_LICENSE 0x900
 /* enum: Start of range used for PHY partitions (low 8 bits are the PHY ID) */
-#define          NVRAM_PARTITION_TYPE_PHY_MIN              0xa00
+#define          NVRAM_PARTITION_TYPE_PHY_MIN 0xa00
 /* enum: End of range used for PHY partitions (low 8 bits are the PHY ID) */
-#define          NVRAM_PARTITION_TYPE_PHY_MAX              0xaff
+#define          NVRAM_PARTITION_TYPE_PHY_MAX 0xaff
 /* enum: Primary FPGA partition */
-#define          NVRAM_PARTITION_TYPE_FPGA                 0xb00
+#define          NVRAM_PARTITION_TYPE_FPGA 0xb00
 /* enum: Secondary FPGA partition */
-#define          NVRAM_PARTITION_TYPE_FPGA_BACKUP          0xb01
+#define          NVRAM_PARTITION_TYPE_FPGA_BACKUP 0xb01
 /* enum: FC firmware partition */
-#define          NVRAM_PARTITION_TYPE_FC_FIRMWARE          0xb02
+#define          NVRAM_PARTITION_TYPE_FC_FIRMWARE 0xb02
 /* enum: FC License partition */
-#define          NVRAM_PARTITION_TYPE_FC_LICENSE           0xb03
+#define          NVRAM_PARTITION_TYPE_FC_LICENSE 0xb03
 /* enum: Non-volatile log output partition for FC */
-#define          NVRAM_PARTITION_TYPE_FC_LOG               0xb04
+#define          NVRAM_PARTITION_TYPE_FC_LOG 0xb04
 /* enum: MUM firmware partition */
-#define          NVRAM_PARTITION_TYPE_MUM_FIRMWARE         0xc00
+#define          NVRAM_PARTITION_TYPE_MUM_FIRMWARE 0xc00
 /* enum: SUC firmware partition (this is intentionally an alias of
  * MUM_FIRMWARE)
  */
-#define          NVRAM_PARTITION_TYPE_SUC_FIRMWARE         0xc00
+#define          NVRAM_PARTITION_TYPE_SUC_FIRMWARE 0xc00
 /* enum: MUM Non-volatile log output partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_LOG              0xc01
+#define          NVRAM_PARTITION_TYPE_MUM_LOG 0xc01
 /* enum: MUM Application table partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_APPTABLE         0xc02
+#define          NVRAM_PARTITION_TYPE_MUM_APPTABLE 0xc02
 /* enum: MUM boot rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_BOOT_ROM         0xc03
+#define          NVRAM_PARTITION_TYPE_MUM_BOOT_ROM 0xc03
 /* enum: MUM production signatures & calibration rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_PROD_ROM         0xc04
+#define          NVRAM_PARTITION_TYPE_MUM_PROD_ROM 0xc04
 /* enum: MUM user signatures & calibration rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_USER_ROM         0xc05
+#define          NVRAM_PARTITION_TYPE_MUM_USER_ROM 0xc05
 /* enum: MUM fuses and lockbits partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_FUSELOCK         0xc06
+#define          NVRAM_PARTITION_TYPE_MUM_FUSELOCK 0xc06
 /* enum: UEFI expansion ROM if separate from PXE */
-#define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI       0xd00
+#define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI 0xd00
 /* enum: Used by the expansion ROM for logging */
-#define          NVRAM_PARTITION_TYPE_PXE_LOG              0x1000
+#define          NVRAM_PARTITION_TYPE_PXE_LOG 0x1000
 /* enum: Used for XIP code of shmbooted images */
-#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH          0x1100
+#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH 0x1100
 /* enum: Spare partition 2 */
-#define          NVRAM_PARTITION_TYPE_SPARE_2              0x1200
+#define          NVRAM_PARTITION_TYPE_SPARE_2 0x1200
 /* enum: Manufacturing partition. Used during manufacture to pass information
  * between XJTAG and Manftest.
  */
-#define          NVRAM_PARTITION_TYPE_MANUFACTURING        0x1300
+#define          NVRAM_PARTITION_TYPE_MANUFACTURING 0x1300
 /* enum: Spare partition 4 */
-#define          NVRAM_PARTITION_TYPE_SPARE_4              0x1400
+#define          NVRAM_PARTITION_TYPE_SPARE_4 0x1400
 /* enum: Spare partition 5 */
-#define          NVRAM_PARTITION_TYPE_SPARE_5              0x1500
+#define          NVRAM_PARTITION_TYPE_SPARE_5 0x1500
 /* enum: Partition for reporting MC status. See mc_flash_layout.h
  * medford_mc_status_hdr_t for layout on Medford.
  */
-#define          NVRAM_PARTITION_TYPE_STATUS               0x1600
+#define          NVRAM_PARTITION_TYPE_STATUS 0x1600
 /* enum: Spare partition 13 */
-#define          NVRAM_PARTITION_TYPE_SPARE_13              0x1700
+#define          NVRAM_PARTITION_TYPE_SPARE_13 0x1700
 /* enum: Spare partition 14 */
-#define          NVRAM_PARTITION_TYPE_SPARE_14              0x1800
+#define          NVRAM_PARTITION_TYPE_SPARE_14 0x1800
 /* enum: Spare partition 15 */
-#define          NVRAM_PARTITION_TYPE_SPARE_15              0x1900
+#define          NVRAM_PARTITION_TYPE_SPARE_15 0x1900
 /* enum: Spare partition 16 */
-#define          NVRAM_PARTITION_TYPE_SPARE_16              0x1a00
+#define          NVRAM_PARTITION_TYPE_SPARE_16 0x1a00
 /* enum: Factory defaults for dynamic configuration */
-#define          NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS    0x1b00
+#define          NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS 0x1b00
 /* enum: Factory defaults for expansion ROM configuration */
-#define          NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS    0x1c00
+#define          NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS 0x1c00
 /* enum: Field Replaceable Unit inventory information for use on IPMI
  * platforms. See SF-119124-PS. The STATIC_CONFIG partition may contain a
  * subset of the information stored in this partition.
  */
-#define          NVRAM_PARTITION_TYPE_FRU_INFORMATION       0x1d00
+#define          NVRAM_PARTITION_TYPE_FRU_INFORMATION 0x1d00
 /* enum: Start of reserved value range (firmware may use for any purpose) */
-#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN  0xff00
+#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN 0xff00
 /* enum: End of reserved value range (firmware may use for any purpose) */
-#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MAX  0xfffd
+#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MAX 0xfffd
 /* enum: Recovery partition map (provided if real map is missing or corrupt) */
-#define          NVRAM_PARTITION_TYPE_RECOVERY_MAP         0xfffe
+#define          NVRAM_PARTITION_TYPE_RECOVERY_MAP 0xfffe
 /* enum: Partition map (real map as stored in flash) */
-#define          NVRAM_PARTITION_TYPE_PARTITION_MAP        0xffff
+#define          NVRAM_PARTITION_TYPE_PARTITION_MAP 0xffff
 #define       NVRAM_PARTITION_TYPE_ID_LBN 0
 #define       NVRAM_PARTITION_TYPE_ID_WIDTH 16
 
 #define       LICENSED_APP_ID_ID_OFST 0
 #define       LICENSED_APP_ID_ID_LEN 4
 /* enum: OpenOnload */
-#define          LICENSED_APP_ID_ONLOAD                  0x1
+#define          LICENSED_APP_ID_ONLOAD 0x1
 /* enum: PTP timestamping */
-#define          LICENSED_APP_ID_PTP                     0x2
+#define          LICENSED_APP_ID_PTP 0x2
 /* enum: SolarCapture Pro */
-#define          LICENSED_APP_ID_SOLARCAPTURE_PRO        0x4
+#define          LICENSED_APP_ID_SOLARCAPTURE_PRO 0x4
 /* enum: SolarSecure filter engine */
-#define          LICENSED_APP_ID_SOLARSECURE             0x8
+#define          LICENSED_APP_ID_SOLARSECURE 0x8
 /* enum: Performance monitor */
-#define          LICENSED_APP_ID_PERF_MONITOR            0x10
+#define          LICENSED_APP_ID_PERF_MONITOR 0x10
 /* enum: SolarCapture Live */
-#define          LICENSED_APP_ID_SOLARCAPTURE_LIVE       0x20
+#define          LICENSED_APP_ID_SOLARCAPTURE_LIVE 0x20
 /* enum: Capture SolarSystem */
-#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM     0x40
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM 0x40
 /* enum: Network Access Control */
-#define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL  0x80
+#define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL 0x80
 /* enum: TCP Direct */
-#define          LICENSED_APP_ID_TCP_DIRECT              0x100
+#define          LICENSED_APP_ID_TCP_DIRECT 0x100
 /* enum: Low Latency */
-#define          LICENSED_APP_ID_LOW_LATENCY             0x200
+#define          LICENSED_APP_ID_LOW_LATENCY 0x200
 /* enum: SolarCapture Tap */
-#define          LICENSED_APP_ID_SOLARCAPTURE_TAP        0x400
+#define          LICENSED_APP_ID_SOLARCAPTURE_TAP 0x400
 /* enum: Capture SolarSystem 40G */
 #define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_40G 0x800
 /* enum: Capture SolarSystem 1G */
-#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_1G  0x1000
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_1G 0x1000
 /* enum: ScaleOut Onload */
-#define          LICENSED_APP_ID_SCALEOUT_ONLOAD         0x2000
+#define          LICENSED_APP_ID_SCALEOUT_ONLOAD 0x2000
 /* enum: SCS Network Analytics Dashboard */
-#define          LICENSED_APP_ID_DSHBRD                  0x4000
+#define          LICENSED_APP_ID_DSHBRD 0x4000
 /* enum: SolarCapture Trading Analytics */
-#define          LICENSED_APP_ID_SCATRD                  0x8000
+#define          LICENSED_APP_ID_SCATRD 0x8000
 #define       LICENSED_APP_ID_ID_LBN 0
 #define       LICENSED_APP_ID_ID_WIDTH 32
 
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_OFST 3
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_LEN 1
 /* enum: This is a TX completion event, not a timestamp */
-#define          TX_TIMESTAMP_EVENT_TX_EV_COMPLETION  0x0
+#define          TX_TIMESTAMP_EVENT_TX_EV_COMPLETION 0x0
 /* enum: This is a TX completion event for a CTPIO transmit. The event format
  * is the same as for TX_EV_COMPLETION.
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_COMPLETION  0x11
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_COMPLETION 0x11
 /* enum: This is the low part of a TX timestamp for a CTPIO transmission. The
  * event format is the same as for TX_EV_TSTAMP_LO
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_LO  0x12
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_LO 0x12
 /* enum: This is the high part of a TX timestamp for a CTPIO transmission. The
  * event format is the same as for TX_EV_TSTAMP_HI
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_HI  0x13
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_HI 0x13
 /* enum: This is the low part of a TX timestamp event */
-#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO  0x51
+#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO 0x51
 /* enum: This is the high part of a TX timestamp event */
-#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI  0x52
+#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI 0x52
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_LBN 24
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_WIDTH 8
 /* upper 16 bits of timestamp data */
 #define       QUEUE_CRC_MODE_MODE_LBN 0
 #define       QUEUE_CRC_MODE_MODE_WIDTH 4
 /* enum: No CRC. */
-#define          QUEUE_CRC_MODE_NONE  0x0
+#define          QUEUE_CRC_MODE_NONE 0x0
 /* enum: CRC Fiber channel over ethernet. */
-#define          QUEUE_CRC_MODE_FCOE  0x1
+#define          QUEUE_CRC_MODE_FCOE 0x1
 /* enum: CRC (digest) iSCSI header only. */
-#define          QUEUE_CRC_MODE_ISCSI_HDR  0x2
+#define          QUEUE_CRC_MODE_ISCSI_HDR 0x2
 /* enum: CRC (digest) iSCSI header and payload. */
-#define          QUEUE_CRC_MODE_ISCSI  0x3
+#define          QUEUE_CRC_MODE_ISCSI 0x3
 /* enum: CRC Fiber channel over IP over ethernet. */
-#define          QUEUE_CRC_MODE_FCOIPOE  0x4
+#define          QUEUE_CRC_MODE_FCOIPOE 0x4
 /* enum: CRC MPA. */
-#define          QUEUE_CRC_MODE_MPA  0x5
+#define          QUEUE_CRC_MODE_MPA 0x5
 #define       QUEUE_CRC_MODE_SPARE_LBN 4
 #define       QUEUE_CRC_MODE_SPARE_WIDTH 4
 
 /* Size, in entries */
 #define       MC_CMD_INIT_RXQ_EXT_IN_SIZE_OFST 0
 #define       MC_CMD_INIT_RXQ_EXT_IN_SIZE_LEN 4
-/* The EVQ to send events to. This is an index originally specified to INIT_EVQ
+/* The EVQ to send events to. This is an index originally specified to
+ * INIT_EVQ. If DMA_MODE == PACKED_STREAM this must be equal to INSTANCE.
  */
 #define       MC_CMD_INIT_RXQ_EXT_IN_TARGET_EVQ_OFST 4
 #define       MC_CMD_INIT_RXQ_EXT_IN_TARGET_EVQ_LEN 4
-/* The value to put in the event data. Check hardware spec. for valid range. */
+/* The value to put in the event data. Check hardware spec. for valid range.
+ * This field is ignored if DMA_MODE == EQUAL_STRIDE_PACKED_STREAM or DMA_MODE
+ * == PACKED_STREAM.
+ */
 #define       MC_CMD_INIT_RXQ_EXT_IN_LABEL_OFST 8
 #define       MC_CMD_INIT_RXQ_EXT_IN_LABEL_LEN 4
 /* Desired instance. Must be set to a specific instance, which is a function
 #define        MC_CMD_INIT_RXQ_EXT_IN_DMA_MODE_LBN 10
 #define        MC_CMD_INIT_RXQ_EXT_IN_DMA_MODE_WIDTH 4
 /* enum: One packet per descriptor (for normal networking) */
-#define          MC_CMD_INIT_RXQ_EXT_IN_SINGLE_PACKET  0x0
+#define          MC_CMD_INIT_RXQ_EXT_IN_SINGLE_PACKET 0x0
 /* enum: Pack multiple packets into large descriptors (for SolarCapture) */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM  0x1
+#define          MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM 0x1
+/* enum: Pack multiple packets into large descriptors using the format designed
+ * to maximise packet rate. This mode uses 1 "bucket" per descriptor with
+ * multiple fixed-size packet buffers within each bucket. For a full
+ * description see SF-119419-TC. This mode is only supported by "dpdk" datapath
+ * firmware.
+ */
+#define          MC_CMD_INIT_RXQ_EXT_IN_EQUAL_STRIDE_PACKED_STREAM 0x2
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_SNAPSHOT_MODE_LBN 14
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_SNAPSHOT_MODE_WIDTH 1
 #define        MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM_BUFF_SIZE_LBN 15
 #define        MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM_BUFF_SIZE_WIDTH 3
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_1M  0x0 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_512K  0x1 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_256K  0x2 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_128K  0x3 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_64K  0x4 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_1M 0x0 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_512K 0x1 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_256K 0x2 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_128K 0x3 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_64K 0x4 /* enum */
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_LBN 18
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_WIDTH 1
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_FORCE_EV_MERGING_LBN 19
 #define       MC_CMD_INIT_RXQ_EXT_IN_SNAPSHOT_LENGTH_OFST 540
 #define       MC_CMD_INIT_RXQ_EXT_IN_SNAPSHOT_LENGTH_LEN 4
 
+/* MC_CMD_INIT_RXQ_V3_IN msgrequest */
+#define    MC_CMD_INIT_RXQ_V3_IN_LEN 560
+/* Size, in entries */
+#define       MC_CMD_INIT_RXQ_V3_IN_SIZE_OFST 0
+#define       MC_CMD_INIT_RXQ_V3_IN_SIZE_LEN 4
+/* The EVQ to send events to. This is an index originally specified to
+ * INIT_EVQ. If DMA_MODE == PACKED_STREAM this must be equal to INSTANCE.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_TARGET_EVQ_OFST 4
+#define       MC_CMD_INIT_RXQ_V3_IN_TARGET_EVQ_LEN 4
+/* The value to put in the event data. Check hardware spec. for valid range.
+ * This field is ignored if DMA_MODE == EQUAL_STRIDE_PACKED_STREAM or DMA_MODE
+ * == PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_LABEL_OFST 8
+#define       MC_CMD_INIT_RXQ_V3_IN_LABEL_LEN 4
+/* Desired instance. Must be set to a specific instance, which is a function
+ * local queue index.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_INSTANCE_OFST 12
+#define       MC_CMD_INIT_RXQ_V3_IN_INSTANCE_LEN 4
+/* There will be more flags here. */
+#define       MC_CMD_INIT_RXQ_V3_IN_FLAGS_OFST 16
+#define       MC_CMD_INIT_RXQ_V3_IN_FLAGS_LEN 4
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_BUFF_MODE_LBN 0
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_BUFF_MODE_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_HDR_SPLIT_LBN 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_HDR_SPLIT_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_TIMESTAMP_LBN 2
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_TIMESTAMP_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_CRC_MODE_LBN 3
+#define        MC_CMD_INIT_RXQ_V3_IN_CRC_MODE_WIDTH 4
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_CHAIN_LBN 7
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_CHAIN_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_PREFIX_LBN 8
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_PREFIX_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_DISABLE_SCATTER_LBN 9
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_DISABLE_SCATTER_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_DMA_MODE_LBN 10
+#define        MC_CMD_INIT_RXQ_V3_IN_DMA_MODE_WIDTH 4
+/* enum: One packet per descriptor (for normal networking) */
+#define          MC_CMD_INIT_RXQ_V3_IN_SINGLE_PACKET 0x0
+/* enum: Pack multiple packets into large descriptors (for SolarCapture) */
+#define          MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM 0x1
+/* enum: Pack multiple packets into large descriptors using the format designed
+ * to maximise packet rate. This mode uses 1 "bucket" per descriptor with
+ * multiple fixed-size packet buffers within each bucket. For a full
+ * description see SF-119419-TC. This mode is only supported by "dpdk" datapath
+ * firmware.
+ */
+#define          MC_CMD_INIT_RXQ_V3_IN_EQUAL_STRIDE_PACKED_STREAM 0x2
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_SNAPSHOT_MODE_LBN 14
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_SNAPSHOT_MODE_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM_BUFF_SIZE_LBN 15
+#define        MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM_BUFF_SIZE_WIDTH 3
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_1M 0x0 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_512K 0x1 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_256K 0x2 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_128K 0x3 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_64K 0x4 /* enum */
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_WANT_OUTER_CLASSES_LBN 18
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_WANT_OUTER_CLASSES_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_FORCE_EV_MERGING_LBN 19
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_FORCE_EV_MERGING_WIDTH 1
+/* Owner ID to use if in buffer mode (zero if physical) */
+#define       MC_CMD_INIT_RXQ_V3_IN_OWNER_ID_OFST 20
+#define       MC_CMD_INIT_RXQ_V3_IN_OWNER_ID_LEN 4
+/* The port ID associated with the v-adaptor which should contain this DMAQ. */
+#define       MC_CMD_INIT_RXQ_V3_IN_PORT_ID_OFST 24
+#define       MC_CMD_INIT_RXQ_V3_IN_PORT_ID_LEN 4
+/* 64-bit address of 4k of 4k-aligned host memory buffer */
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_OFST 28
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_LEN 8
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_LO_OFST 28
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_HI_OFST 32
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_NUM 64
+/* Maximum length of packet to receive, if SNAPSHOT_MODE flag is set */
+#define       MC_CMD_INIT_RXQ_V3_IN_SNAPSHOT_LENGTH_OFST 540
+#define       MC_CMD_INIT_RXQ_V3_IN_SNAPSHOT_LENGTH_LEN 4
+/* The number of packet buffers that will be contained within each
+ * EQUAL_STRIDE_PACKED_STREAM format bucket supplied by the driver. This field
+ * is ignored unless DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_BUFFERS_PER_BUCKET_OFST 544
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_BUFFERS_PER_BUCKET_LEN 4
+/* The length in bytes of the area in each packet buffer that can be written to
+ * by the adapter. This is used to store the packet prefix and the packet
+ * payload. This length does not include any end padding added by the driver.
+ * This field is ignored unless DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_MAX_DMA_LEN_OFST 548
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_MAX_DMA_LEN_LEN 4
+/* The length in bytes of a single packet buffer within a
+ * EQUAL_STRIDE_PACKED_STREAM format bucket. This field is ignored unless
+ * DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_STRIDE_OFST 552
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_STRIDE_LEN 4
+/* The maximum time in nanoseconds that the datapath will be backpressured if
+ * there are no RX descriptors available. If the timeout is reached and there
+ * are still no descriptors then the packet will be dropped. A timeout of 0
+ * means the datapath will never be blocked. This field is ignored unless
+ * DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_HEAD_OF_LINE_BLOCK_TIMEOUT_OFST 556
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_HEAD_OF_LINE_BLOCK_TIMEOUT_LEN 4
+
 /* MC_CMD_INIT_RXQ_OUT msgresponse */
 #define    MC_CMD_INIT_RXQ_OUT_LEN 0
 
 /* MC_CMD_INIT_RXQ_EXT_OUT msgresponse */
 #define    MC_CMD_INIT_RXQ_EXT_OUT_LEN 0
 
+/* MC_CMD_INIT_RXQ_V3_OUT msgresponse */
+#define    MC_CMD_INIT_RXQ_V3_OUT_LEN 0
+
 
 /***********************************/
 /* MC_CMD_INIT_TXQ
 #define        MC_CMD_PROXY_CMD_IN_TARGET_PF_WIDTH 16
 #define        MC_CMD_PROXY_CMD_IN_TARGET_VF_LBN 16
 #define        MC_CMD_PROXY_CMD_IN_TARGET_VF_WIDTH 16
-#define          MC_CMD_PROXY_CMD_IN_VF_NULL  0xffff /* enum */
+#define          MC_CMD_PROXY_CMD_IN_VF_NULL 0xffff /* enum */
 
 /* MC_CMD_PROXY_CMD_OUT msgresponse */
 #define    MC_CMD_PROXY_CMD_OUT_LEN 0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_OFST 0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_LEN 4
 /* enum: An invalid handle. */
-#define          MC_PROXY_STATUS_BUFFER_HANDLE_INVALID  0x0
+#define          MC_PROXY_STATUS_BUFFER_HANDLE_INVALID 0x0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_LBN 0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_WIDTH 32
 /* The requesting physical function number */
 #define       MC_CMD_FILTER_OP_IN_OP_OFST 0
 #define       MC_CMD_FILTER_OP_IN_OP_LEN 4
 /* enum: single-recipient filter insert */
-#define          MC_CMD_FILTER_OP_IN_OP_INSERT  0x0
+#define          MC_CMD_FILTER_OP_IN_OP_INSERT 0x0
 /* enum: single-recipient filter remove */
-#define          MC_CMD_FILTER_OP_IN_OP_REMOVE  0x1
+#define          MC_CMD_FILTER_OP_IN_OP_REMOVE 0x1
 /* enum: multi-recipient filter subscribe */
-#define          MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE  0x2
+#define          MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE 0x2
 /* enum: multi-recipient filter unsubscribe */
-#define          MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE  0x3
+#define          MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE 0x3
 /* enum: replace one recipient with another (warning - the filter handle may
  * change)
  */
-#define          MC_CMD_FILTER_OP_IN_OP_REPLACE  0x4
+#define          MC_CMD_FILTER_OP_IN_OP_REPLACE 0x4
 /* filter handle (for remove / unsubscribe operations) */
 #define       MC_CMD_FILTER_OP_IN_HANDLE_OFST 4
 #define       MC_CMD_FILTER_OP_IN_HANDLE_LEN 8
 #define       MC_CMD_FILTER_OP_IN_RX_DEST_OFST 20
 #define       MC_CMD_FILTER_OP_IN_RX_DEST_LEN 4
 /* enum: drop packets */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_DROP  0x0
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_DROP 0x0
 /* enum: receive to host */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_HOST  0x1
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_HOST 0x1
 /* enum: receive to MC */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_MC  0x2
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_MC 0x2
 /* enum: loop back to TXDP 0 */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX0  0x3
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX0 0x3
 /* enum: loop back to TXDP 1 */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX1  0x4
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX1 0x4
 /* receive queue handle (for multiple queue modes, this is the base queue) */
 #define       MC_CMD_FILTER_OP_IN_RX_QUEUE_OFST 24
 #define       MC_CMD_FILTER_OP_IN_RX_QUEUE_LEN 4
 #define       MC_CMD_FILTER_OP_IN_RX_MODE_OFST 28
 #define       MC_CMD_FILTER_OP_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_RSS 0x1
 /* enum: receive to multiple queues using .1p mapping */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_DOT1P_MAPPING  0x2
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_DOT1P_MAPPING 0x2
 /* enum: install a filter entry that will never match; for test purposes only
  */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_TEST_NEVER_MATCH  0x80000000
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
 /* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
  * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
  * MC_CMD_DOT1P_MAPPING_ALLOC.
 #define       MC_CMD_FILTER_OP_IN_TX_DEST_OFST 40
 #define       MC_CMD_FILTER_OP_IN_TX_DEST_LEN 4
 /* enum: request default behaviour (based on filter type) */
-#define          MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT  0xffffffff
+#define          MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT 0xffffffff
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_MAC_LBN 0
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_MAC_WIDTH 1
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_PM_LBN 1
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_DEST_OFST 20
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_DEST_LEN 4
 /* enum: drop packets */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_DROP  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_DROP 0x0
 /* enum: receive to host */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST 0x1
 /* enum: receive to MC */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_MC  0x2
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_MC 0x2
 /* enum: loop back to TXDP 0 */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX0  0x3
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX0 0x3
 /* enum: loop back to TXDP 1 */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX1  0x4
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX1 0x4
 /* receive queue handle (for multiple queue modes, this is the base queue) */
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_QUEUE_OFST 24
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_QUEUE_LEN 4
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_MODE_OFST 28
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS 0x1
 /* enum: receive to multiple queues using .1p mapping */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_DOT1P_MAPPING  0x2
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_DOT1P_MAPPING 0x2
 /* enum: install a filter entry that will never match; for test purposes only
  */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_TEST_NEVER_MATCH  0x80000000
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
 /* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
  * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
  * MC_CMD_DOT1P_MAPPING_ALLOC.
 #define       MC_CMD_FILTER_OP_EXT_IN_TX_DEST_OFST 40
 #define       MC_CMD_FILTER_OP_EXT_IN_TX_DEST_LEN 4
 /* enum: request default behaviour (based on filter type) */
-#define          MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT  0xffffffff
+#define          MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT 0xffffffff
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_MAC_LBN 0
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_MAC_WIDTH 1
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_PM_LBN 1
 #define        MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_LBN 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_WIDTH 8
 /* enum: Match VXLAN traffic with this VNI */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN 0x0
 /* enum: Match Geneve traffic with this VNI */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE 0x1
 /* enum: Reserved for experimental development use */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_EXPERIMENTAL  0xfe
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_EXPERIMENTAL 0xfe
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_VALUE_LBN 0
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_VALUE_WIDTH 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_LBN 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_WIDTH 8
 /* enum: Match NVGRE traffic with this VSID */
-#define          MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE 0x0
 /* source IP address to match (as bytes in network order; set last 12 bytes to
  * 0 for IPv4 address)
  */
 #define       MC_CMD_FILTER_OP_EXT_IN_IFRM_DST_IP_OFST 156
 #define       MC_CMD_FILTER_OP_EXT_IN_IFRM_DST_IP_LEN 16
 
+/* MC_CMD_FILTER_OP_V3_IN msgrequest: FILTER_OP extension to support additional
+ * filter actions for Intel's DPDK (Data Plane Development Kit, dpdk.org) via
+ * its rte_flow API. This extension is only useful with the sfc_efx driver
+ * included as part of DPDK, used in conjunction with the dpdk datapath
+ * firmware variant.
+ */
+#define    MC_CMD_FILTER_OP_V3_IN_LEN 180
+/* identifies the type of operation requested */
+#define       MC_CMD_FILTER_OP_V3_IN_OP_OFST 0
+#define       MC_CMD_FILTER_OP_V3_IN_OP_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_FILTER_OP_IN/OP */
+/* filter handle (for remove / unsubscribe operations) */
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_OFST 4
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_LEN 8
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_LO_OFST 4
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_HI_OFST 8
+/* The port ID associated with the v-adaptor which should contain this filter.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_PORT_ID_OFST 12
+#define       MC_CMD_FILTER_OP_V3_IN_PORT_ID_LEN 4
+/* fields to include in match criteria */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_FIELDS_OFST 16
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_FIELDS_LEN 4
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_IP_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_IP_LBN 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_MAC_LBN 2
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_PORT_LBN 3
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_MAC_LBN 4
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_PORT_LBN 5
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_ETHER_TYPE_LBN 6
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_ETHER_TYPE_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_INNER_VLAN_LBN 7
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_INNER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_OUTER_VLAN_LBN 8
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_OUTER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IP_PROTO_LBN 9
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IP_PROTO_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_FWDEF0_LBN 10
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_FWDEF0_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_VNI_OR_VSID_LBN 11
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_VNI_OR_VSID_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_IP_LBN 12
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_IP_LBN 13
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_MAC_LBN 14
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_PORT_LBN 15
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_MAC_LBN 16
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_PORT_LBN 17
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_ETHER_TYPE_LBN 18
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_ETHER_TYPE_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_INNER_VLAN_LBN 19
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_INNER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_OUTER_VLAN_LBN 20
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_OUTER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_IP_PROTO_LBN 21
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_IP_PROTO_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF0_LBN 22
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF0_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF1_LBN 23
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF1_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN 25
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_MCAST_DST_LBN 30
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_MCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_UCAST_DST_LBN 31
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_UCAST_DST_WIDTH 1
+/* receive destination */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_DEST_OFST 20
+#define       MC_CMD_FILTER_OP_V3_IN_RX_DEST_LEN 4
+/* enum: drop packets */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_DROP 0x0
+/* enum: receive to host */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_HOST 0x1
+/* enum: receive to MC */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_MC 0x2
+/* enum: loop back to TXDP 0 */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_TX0 0x3
+/* enum: loop back to TXDP 1 */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_TX1 0x4
+/* receive queue handle (for multiple queue modes, this is the base queue) */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_QUEUE_OFST 24
+#define       MC_CMD_FILTER_OP_V3_IN_RX_QUEUE_LEN 4
+/* receive mode */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_MODE_OFST 28
+#define       MC_CMD_FILTER_OP_V3_IN_RX_MODE_LEN 4
+/* enum: receive to just the specified queue */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_SIMPLE 0x0
+/* enum: receive to multiple queues using RSS context */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_RSS 0x1
+/* enum: receive to multiple queues using .1p mapping */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_DOT1P_MAPPING 0x2
+/* enum: install a filter entry that will never match; for test purposes only
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
+/* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
+ * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
+ * MC_CMD_DOT1P_MAPPING_ALLOC.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_CONTEXT_OFST 32
+#define       MC_CMD_FILTER_OP_V3_IN_RX_CONTEXT_LEN 4
+/* transmit domain (reserved; set to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DOMAIN_OFST 36
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DOMAIN_LEN 4
+/* transmit destination (either set the MAC and/or PM bits for explicit
+ * control, or set this field to TX_DEST_DEFAULT for sensible default
+ * behaviour)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DEST_OFST 40
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DEST_LEN 4
+/* enum: request default behaviour (based on filter type) */
+#define          MC_CMD_FILTER_OP_V3_IN_TX_DEST_DEFAULT 0xffffffff
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_MAC_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_PM_LBN 1
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_PM_WIDTH 1
+/* source MAC address to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_MAC_OFST 44
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_MAC_LEN 6
+/* source port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_PORT_OFST 50
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_PORT_LEN 2
+/* destination MAC address to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_MAC_OFST 52
+#define       MC_CMD_FILTER_OP_V3_IN_DST_MAC_LEN 6
+/* destination port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_PORT_OFST 58
+#define       MC_CMD_FILTER_OP_V3_IN_DST_PORT_LEN 2
+/* Ethernet type to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_ETHER_TYPE_OFST 60
+#define       MC_CMD_FILTER_OP_V3_IN_ETHER_TYPE_LEN 2
+/* Inner VLAN tag to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_INNER_VLAN_OFST 62
+#define       MC_CMD_FILTER_OP_V3_IN_INNER_VLAN_LEN 2
+/* Outer VLAN tag to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_OUTER_VLAN_OFST 64
+#define       MC_CMD_FILTER_OP_V3_IN_OUTER_VLAN_LEN 2
+/* IP protocol to match (in low byte; set high byte to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_IP_PROTO_OFST 66
+#define       MC_CMD_FILTER_OP_V3_IN_IP_PROTO_LEN 2
+/* Firmware defined register 0 to match (reserved; set to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_FWDEF0_OFST 68
+#define       MC_CMD_FILTER_OP_V3_IN_FWDEF0_LEN 4
+/* VNI (for VXLAN/Geneve, when IP protocol is UDP) or VSID (for NVGRE, when IP
+ * protocol is GRE) to match (as bytes in network order; set last byte to 0 for
+ * VXLAN/NVGRE, or 1 for Geneve)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_VNI_OR_VSID_OFST 72
+#define       MC_CMD_FILTER_OP_V3_IN_VNI_OR_VSID_LEN 4
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_VALUE_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_VALUE_WIDTH 24
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_WIDTH 8
+/* enum: Match VXLAN traffic with this VNI */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_VXLAN 0x0
+/* enum: Match Geneve traffic with this VNI */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_GENEVE 0x1
+/* enum: Reserved for experimental development use */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_EXPERIMENTAL 0xfe
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_VALUE_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_VALUE_WIDTH 24
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_WIDTH 8
+/* enum: Match NVGRE traffic with this VSID */
+#define          MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_NVGRE 0x0
+/* source IP address to match (as bytes in network order; set last 12 bytes to
+ * 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_IP_OFST 76
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_IP_LEN 16
+/* destination IP address to match (as bytes in network order; set last 12
+ * bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_IP_OFST 92
+#define       MC_CMD_FILTER_OP_V3_IN_DST_IP_LEN 16
+/* VXLAN/NVGRE inner frame source MAC address to match (as bytes in network
+ * order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_MAC_OFST 108
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_MAC_LEN 6
+/* VXLAN/NVGRE inner frame source port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_PORT_OFST 114
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_PORT_LEN 2
+/* VXLAN/NVGRE inner frame destination MAC address to match (as bytes in
+ * network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_MAC_OFST 116
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_MAC_LEN 6
+/* VXLAN/NVGRE inner frame destination port to match (as bytes in network
+ * order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_PORT_OFST 122
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_PORT_LEN 2
+/* VXLAN/NVGRE inner frame Ethernet type to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_ETHER_TYPE_OFST 124
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_ETHER_TYPE_LEN 2
+/* VXLAN/NVGRE inner frame Inner VLAN tag to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_INNER_VLAN_OFST 126
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_INNER_VLAN_LEN 2
+/* VXLAN/NVGRE inner frame Outer VLAN tag to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_OUTER_VLAN_OFST 128
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_OUTER_VLAN_LEN 2
+/* VXLAN/NVGRE inner frame IP protocol to match (in low byte; set high byte to
+ * 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_IP_PROTO_OFST 130
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_IP_PROTO_LEN 2
+/* VXLAN/NVGRE inner frame Firmware defined register 0 to match (reserved; set
+ * to 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF0_OFST 132
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF0_LEN 4
+/* VXLAN/NVGRE inner frame Firmware defined register 1 to match (reserved; set
+ * to 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF1_OFST 136
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF1_LEN 4
+/* VXLAN/NVGRE inner frame source IP address to match (as bytes in network
+ * order; set last 12 bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_IP_OFST 140
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_IP_LEN 16
+/* VXLAN/NVGRE inner frame destination IP address to match (as bytes in network
+ * order; set last 12 bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_IP_OFST 156
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_IP_LEN 16
+/* Set an action for all packets matching this filter. The DPDK driver and dpdk
+ * f/w variant use their own specific delivery structures, which are documented
+ * in the DPDK Firmware Driver Interface (SF-119419-TC). Requesting anything
+ * other than MATCH_ACTION_NONE when the NIC is running another f/w variant
+ * will cause the filter insertion to fail with ENOTSUP.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_OFST 172
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_LEN 4
+/* enum: do nothing extra */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_NONE 0x0
+/* enum: Set the match flag in the packet prefix for packets matching the
+ * filter (only with dpdk firmware, otherwise fails with ENOTSUP). Used to
+ * support the DPDK rte_flow "FLAG" action.
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_FLAG 0x1
+/* enum: Insert MATCH_MARK_VALUE into the packet prefix for packets matching
+ * the filter (only with dpdk firmware, otherwise fails with ENOTSUP). Used to
+ * support the DPDK rte_flow "MARK" action.
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_MARK 0x2
+/* the mark value for MATCH_ACTION_MARK */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_MARK_VALUE_OFST 176
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_MARK_VALUE_LEN 4
+
 /* MC_CMD_FILTER_OP_OUT msgresponse */
 #define    MC_CMD_FILTER_OP_OUT_LEN 12
 /* identifies the type of operation requested */
 #define       MC_CMD_FILTER_OP_OUT_HANDLE_LO_OFST 4
 #define       MC_CMD_FILTER_OP_OUT_HANDLE_HI_OFST 8
 /* enum: guaranteed invalid filter handle (low 32 bits) */
-#define          MC_CMD_FILTER_OP_OUT_HANDLE_LO_INVALID  0xffffffff
+#define          MC_CMD_FILTER_OP_OUT_HANDLE_LO_INVALID 0xffffffff
 /* enum: guaranteed invalid filter handle (high 32 bits) */
-#define          MC_CMD_FILTER_OP_OUT_HANDLE_HI_INVALID  0xffffffff
+#define          MC_CMD_FILTER_OP_OUT_HANDLE_HI_INVALID 0xffffffff
 
 /* MC_CMD_FILTER_OP_EXT_OUT msgresponse */
 #define    MC_CMD_FILTER_OP_EXT_OUT_LEN 12
 #define       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_OFST 0
 #define       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_LEN 4
 /* enum: read the list of supported RX filter matches */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES  0x1
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES 0x1
 /* enum: read flags indicating restrictions on filter insertion for the calling
  * client
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_RESTRICTIONS  0x2
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_RESTRICTIONS 0x2
 /* enum: read properties relating to security rules (Medford-only; for use by
  * SolarSecure apps, not directly by drivers. See SF-114946-SW.)
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SECURITY_RULE_INFO  0x3
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SECURITY_RULE_INFO 0x3
 /* enum: read the list of supported RX filter matches for VXLAN/NVGRE
  * encapsulated frames, which follow a different match sequence to normal
  * frames (Medford only)
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES  0x4
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES 0x4
 
 /* MC_CMD_GET_PARSER_DISP_INFO_OUT msgresponse */
 #define    MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMIN 8
  * Direct read/write of parser-dispatcher state (DICPUs and LUE) for debugging.
  * Please note that this interface is only of use to debug tools which have
  * knowledge of firmware and hardware data structures; nothing here is intended
- * for use by normal driver code.
+ * for use by normal driver code. Note that although this command is in the
+ * Admin privilege group, in tamperproof adapters, only read operations are
+ * permitted.
  */
 #define MC_CMD_PARSER_DISP_RW 0xe5
 
 #define       MC_CMD_PARSER_DISP_RW_IN_TARGET_OFST 0
 #define       MC_CMD_PARSER_DISP_RW_IN_TARGET_LEN 4
 /* enum: RX dispatcher CPU */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX_DICPU  0x0
+#define          MC_CMD_PARSER_DISP_RW_IN_RX_DICPU 0x0
 /* enum: TX dispatcher CPU */
-#define          MC_CMD_PARSER_DISP_RW_IN_TX_DICPU  0x1
+#define          MC_CMD_PARSER_DISP_RW_IN_TX_DICPU 0x1
 /* enum: Lookup engine (with original metadata format). Deprecated; used only
  * by cmdclient as a fallback for very old Huntington firmware, and not
  * supported in firmware beyond v6.4.0.1005. Use LUE_VERSIONED_METADATA
  * instead.
  */
-#define          MC_CMD_PARSER_DISP_RW_IN_LUE  0x2
+#define          MC_CMD_PARSER_DISP_RW_IN_LUE 0x2
 /* enum: Lookup engine (with requested metadata format) */
-#define          MC_CMD_PARSER_DISP_RW_IN_LUE_VERSIONED_METADATA  0x3
+#define          MC_CMD_PARSER_DISP_RW_IN_LUE_VERSIONED_METADATA 0x3
 /* enum: RX0 dispatcher CPU (alias for RX_DICPU; Medford has 2 RX DICPUs) */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX0_DICPU  0x0
+#define          MC_CMD_PARSER_DISP_RW_IN_RX0_DICPU 0x0
 /* enum: RX1 dispatcher CPU (only valid for Medford) */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX1_DICPU  0x4
+#define          MC_CMD_PARSER_DISP_RW_IN_RX1_DICPU 0x4
 /* enum: Miscellaneous other state (only valid for Medford) */
-#define          MC_CMD_PARSER_DISP_RW_IN_MISC_STATE  0x5
+#define          MC_CMD_PARSER_DISP_RW_IN_MISC_STATE 0x5
 /* identifies the type of operation requested */
 #define       MC_CMD_PARSER_DISP_RW_IN_OP_OFST 4
 #define       MC_CMD_PARSER_DISP_RW_IN_OP_LEN 4
 /* enum: Read a word of DICPU DMEM or a LUE entry */
-#define          MC_CMD_PARSER_DISP_RW_IN_READ  0x0
-/* enum: Write a word of DICPU DMEM or a LUE entry. */
-#define          MC_CMD_PARSER_DISP_RW_IN_WRITE  0x1
-/* enum: Read-modify-write a word of DICPU DMEM (not valid for LUE). */
-#define          MC_CMD_PARSER_DISP_RW_IN_RMW  0x2
+#define          MC_CMD_PARSER_DISP_RW_IN_READ 0x0
+/* enum: Write a word of DICPU DMEM or a LUE entry. Not permitted on
+ * tamperproof adapters.
+ */
+#define          MC_CMD_PARSER_DISP_RW_IN_WRITE 0x1
+/* enum: Read-modify-write a word of DICPU DMEM (not valid for LUE). Not
+ * permitted on tamperproof adapters.
+ */
+#define          MC_CMD_PARSER_DISP_RW_IN_RMW 0x2
 /* data memory address (DICPU targets) or LUE index (LUE targets) */
 #define       MC_CMD_PARSER_DISP_RW_IN_ADDRESS_OFST 8
 #define       MC_CMD_PARSER_DISP_RW_IN_ADDRESS_LEN 4
 #define       MC_CMD_PARSER_DISP_RW_IN_SELECTOR_OFST 8
 #define       MC_CMD_PARSER_DISP_RW_IN_SELECTOR_LEN 4
 /* enum: Port to datapath mapping */
-#define          MC_CMD_PARSER_DISP_RW_IN_PORT_DP_MAPPING  0x1
+#define          MC_CMD_PARSER_DISP_RW_IN_PORT_DP_MAPPING 0x1
 /* value to write (for DMEM writes) */
 #define       MC_CMD_PARSER_DISP_RW_IN_DMEM_WRITE_VALUE_OFST 12
 #define       MC_CMD_PARSER_DISP_RW_IN_DMEM_WRITE_VALUE_LEN 4
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_OFST 0
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_LEN 4
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_NUM 4
-#define          MC_CMD_PARSER_DISP_RW_OUT_DP0  0x1 /* enum */
-#define          MC_CMD_PARSER_DISP_RW_OUT_DP1  0x2 /* enum */
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP0 0x1 /* enum */
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP1 0x2 /* enum */
 
 
 /***********************************/
 #define       MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_OFST 0
 #define       MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_LEN 4
 /* enum: MISC. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_MISC  0x0
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_MISC 0x0
 /* enum: IDO. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_IDO  0x1
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_IDO 0x1
 /* enum: RO. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_RO  0x2
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_RO 0x2
 /* enum: TPH Type. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_TPH_TYPE  0x3
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_TPH_TYPE 0x3
 
 /* MC_CMD_GET_TLP_PROCESSING_GLOBALS_OUT msgresponse */
 #define    MC_CMD_GET_TLP_PROCESSING_GLOBALS_OUT_LEN 8
  */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_OFST 0
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_LEN 4
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IDLE     0x0 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_RESET    0x1 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IMEMS    0x2 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_VECTORS  0x3 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_READY    0x4 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IDLE 0x0 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_RESET 0x1 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IMEMS 0x2 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_VECTORS 0x3 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_READY 0x4 /* enum */
 /* Target for download. (These match the blob numbers defined in
  * mc_flash_layout.h.)
  */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_OFST 4
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_LEN 4
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_TEXT  0x0
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_TEXT 0x0
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_TEXT  0x1
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_TEXT 0x1
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDP_TEXT  0x2
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDP_TEXT 0x2
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDP_TEXT  0x3
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDP_TEXT 0x3
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT  0x4
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT 0x4
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT_CFG  0x5
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT_CFG 0x5
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT  0x6
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT 0x6
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT_CFG  0x7
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT_CFG 0x7
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_PGM  0x8
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_PGM 0x8
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_SL_PGM  0x9
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_SL_PGM 0x9
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_PGM  0xa
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_PGM 0xa
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_SL_PGM  0xb
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_SL_PGM 0xb
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL0  0xc
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL0 0xc
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL0  0xd
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL0 0xd
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL1  0xe
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL1 0xe
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL1  0xf
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL1 0xf
 /* enum: Valid in phases 1 (PHASE_RESET) and 4 (PHASE_READY) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_ALL  0xffffffff
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_ALL 0xffffffff
 /* Chunk ID, or CHUNK_ID_LAST or CHUNK_ID_ABORT */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_OFST 8
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LEN 4
 /* enum: Last chunk, containing checksum rather than data */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LAST  0xffffffff
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LAST 0xffffffff
 /* enum: Abort download of this item */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_ABORT  0xfffffffe
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_ABORT 0xfffffffe
 /* Length of this chunk in bytes */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_LEN_OFST 12
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_LEN_LEN 4
 #define       MC_CMD_SATELLITE_DOWNLOAD_OUT_INFO_OFST 4
 #define       MC_CMD_SATELLITE_DOWNLOAD_OUT_INFO_LEN 4
 /* enum: Code download OK, completed. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_COMPLETE  0x0
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_COMPLETE 0x0
 /* enum: Code download aborted as requested. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_ABORTED  0x1
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_ABORTED 0x1
 /* enum: Code download OK so far, send next chunk. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_NEXT_CHUNK  0x2
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_NEXT_CHUNK 0x2
 /* enum: Download phases out of sequence */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_PHASE  0x100
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_PHASE 0x100
 /* enum: Bad target for this phase */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_TARGET  0x101
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_TARGET 0x101
 /* enum: Chunk ID out of sequence */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_ID  0x200
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_ID 0x200
 /* enum: Chunk length zero or too large */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_LEN  0x201
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_LEN 0x201
 /* enum: Checksum was incorrect */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHECKSUM  0x300
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHECKSUM 0x300
 
 
 /***********************************/
 #define       MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_REV_LBN 0
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_REV_LBN 0
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_OUT_HW_CAPABILITIES_LEN 4
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_REV_LBN 0
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_REV_LBN 0
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_HW_CAPABILITIES_LEN 4
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_LEN 2
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_LBN 0
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_LBN 0
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_LEN 4
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_LEN 2
 /* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
  * CTPIO is not mapped.
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K   0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K 0x0
 /* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K 0x1
 /* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K 0x2
 /* Number of vFIFOs per adapter that can be used for VFIFO Stuffing
  * (SF-115995-SW) in the present configuration of firmware and port mode.
  */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_REV_LBN 0
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_REV_LBN 0
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_HW_CAPABILITIES_LEN 4
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VIS_PER_PORT_LEN 2
 /* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
  * CTPIO is not mapped.
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_8K   0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_8K 0x0
 /* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_16K  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_16K 0x1
 /* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_64K  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_64K 0x2
 /* Number of vFIFOs per adapter that can be used for VFIFO Stuffing
  * (SF-115995-SW) in the present configuration of firmware and port mode.
  */
 #define       MC_CMD_V2_EXTN_IN_ACTUAL_LEN_LBN 16
 #define       MC_CMD_V2_EXTN_IN_ACTUAL_LEN_WIDTH 10
 #define       MC_CMD_V2_EXTN_IN_UNUSED2_LBN 26
-#define       MC_CMD_V2_EXTN_IN_UNUSED2_WIDTH 6
+#define       MC_CMD_V2_EXTN_IN_UNUSED2_WIDTH 2
+/* Type of command/response */
+#define       MC_CMD_V2_EXTN_IN_MESSAGE_TYPE_LBN 28
+#define       MC_CMD_V2_EXTN_IN_MESSAGE_TYPE_WIDTH 4
+/* enum: MCDI command directed to or response originating from the MC. */
+#define          MC_CMD_V2_EXTN_IN_MCDI_MESSAGE_TYPE_MC 0x0
+/* enum: MCDI command directed to a TSA controller. MCDI responses of this type
+ * are not defined.
+ */
+#define          MC_CMD_V2_EXTN_IN_MCDI_MESSAGE_TYPE_TSA 0x1
 
 
 /***********************************/
 #define       MC_CMD_VSWITCH_ALLOC_IN_TYPE_OFST 4
 #define       MC_CMD_VSWITCH_ALLOC_IN_TYPE_LEN 4
 /* enum: VLAN */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VLAN  0x1
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VLAN 0x1
 /* enum: VEB */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB  0x2
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB 0x2
 /* enum: VEPA (obsolete) */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEPA  0x3
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEPA 0x3
 /* enum: MUX */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_MUX  0x4
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_MUX 0x4
 /* enum: Snapper specific; semantics TBD */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_TEST  0x5
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_TEST 0x5
 /* Flags controlling v-port creation */
 #define       MC_CMD_VSWITCH_ALLOC_IN_FLAGS_OFST 8
 #define       MC_CMD_VSWITCH_ALLOC_IN_FLAGS_LEN 4
 #define       MC_CMD_VPORT_ALLOC_IN_TYPE_OFST 4
 #define       MC_CMD_VPORT_ALLOC_IN_TYPE_LEN 4
 /* enum: VLAN (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VLAN  0x1
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VLAN 0x1
 /* enum: VEB (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEB  0x2
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEB 0x2
 /* enum: VEPA (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEPA  0x3
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEPA 0x3
 /* enum: A normal v-port receives packets which match a specified MAC and/or
  * VLAN.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL  0x4
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL 0x4
 /* enum: An expansion v-port packets traffic which don't match any other
  * v-port.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_EXPANSION  0x5
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_EXPANSION 0x5
 /* enum: An test v-port receives packets which match any filters installed by
  * its downstream components.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_TEST  0x6
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_TEST 0x6
 /* Flags controlling v-port creation */
 #define       MC_CMD_VPORT_ALLOC_IN_FLAGS_OFST 8
 #define       MC_CMD_VPORT_ALLOC_IN_FLAGS_LEN 4
 #define       MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_OFST 24
 #define       MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_LEN 6
 /* enum: Derive the MAC address from the upstream port */
-#define          MC_CMD_VADAPTOR_ALLOC_IN_AUTO_MAC  0x0
+#define          MC_CMD_VADAPTOR_ALLOC_IN_AUTO_MAC 0x0
 
 /* MC_CMD_VADAPTOR_ALLOC_OUT msgresponse */
 #define    MC_CMD_VADAPTOR_ALLOC_OUT_LEN 0
 /* enum: Allocate a context for exclusive use. The key and indirection table
  * must be explicitly configured.
  */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE  0x0
+#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE 0x0
 /* enum: Allocate a context for shared use; this will spread across a range of
  * queues, but the key and indirection table are pre-configured and may not be
  * changed. For this mode, NUM_QUEUES must 2, 4, 8, 16, 32 or 64.
  */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED  0x1
+#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED 0x1
 /* Number of queues spanned by this context, in the range 1-64; valid offsets
  * in the indirection table will be in the range 0 to NUM_QUEUES-1.
  */
 #define       MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_OFST 0
 #define       MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_LEN 4
 /* enum: guaranteed invalid RSS context handle value */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_INVALID  0xffffffff
+#define          MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_INVALID 0xffffffff
 
 
 /***********************************/
 #define       MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_OFST 0
 #define       MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_LEN 4
 /* enum: guaranteed invalid .1p mapping handle value */
-#define          MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_INVALID  0xffffffff
+#define          MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_INVALID 0xffffffff
 
 
 /***********************************/
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_LBN 1
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_WIDTH 2
 /* enum: pad to 64 bytes */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_64  0x0
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_64 0x0
 /* enum: pad to 128 bytes (Medford only) */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_128  0x1
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_128 0x1
 /* enum: pad to 256 bytes (Medford only) */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_256   0x2
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_256 0x2
 
 /* MC_CMD_SET_RXDP_CONFIG_OUT msgresponse */
 #define    MC_CMD_SET_RXDP_CONFIG_OUT_LEN 0
 #define       MC_CMD_SET_CLOCK_IN_SYS_FREQ_OFST 0
 #define       MC_CMD_SET_CLOCK_IN_SYS_FREQ_LEN 4
 /* enum: Leave the system clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_SYS_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_SYS_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for inter-core clock domain */
 #define       MC_CMD_SET_CLOCK_IN_ICORE_FREQ_OFST 4
 #define       MC_CMD_SET_CLOCK_IN_ICORE_FREQ_LEN 4
 /* enum: Leave the inter-core clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_ICORE_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_ICORE_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for DPCPU clock domain */
 #define       MC_CMD_SET_CLOCK_IN_DPCPU_FREQ_OFST 8
 #define       MC_CMD_SET_CLOCK_IN_DPCPU_FREQ_LEN 4
 /* enum: Leave the DPCPU clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_DPCPU_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_DPCPU_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for PCS clock domain */
 #define       MC_CMD_SET_CLOCK_IN_PCS_FREQ_OFST 12
 #define       MC_CMD_SET_CLOCK_IN_PCS_FREQ_LEN 4
 /* enum: Leave the PCS clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_PCS_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_PCS_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for MC clock domain */
 #define       MC_CMD_SET_CLOCK_IN_MC_FREQ_OFST 16
 #define       MC_CMD_SET_CLOCK_IN_MC_FREQ_LEN 4
 /* enum: Leave the MC clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_MC_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_MC_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for rmon clock domain */
 #define       MC_CMD_SET_CLOCK_IN_RMON_FREQ_OFST 20
 #define       MC_CMD_SET_CLOCK_IN_RMON_FREQ_LEN 4
 /* enum: Leave the rmon clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_RMON_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_RMON_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for vswitch clock domain */
 #define       MC_CMD_SET_CLOCK_IN_VSWITCH_FREQ_OFST 24
 #define       MC_CMD_SET_CLOCK_IN_VSWITCH_FREQ_LEN 4
 /* enum: Leave the vswitch clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_VSWITCH_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_VSWITCH_DOMAIN_DONT_CHANGE 0x0
 
 /* MC_CMD_SET_CLOCK_OUT msgresponse */
 #define    MC_CMD_SET_CLOCK_OUT_LEN 28
 #define       MC_CMD_SET_CLOCK_OUT_SYS_FREQ_OFST 0
 #define       MC_CMD_SET_CLOCK_OUT_SYS_FREQ_LEN 4
 /* enum: The system clock domain doesn't exist */
-#define          MC_CMD_SET_CLOCK_OUT_SYS_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_SYS_DOMAIN_UNSUPPORTED 0x0
 /* Resulting inter-core frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_ICORE_FREQ_OFST 4
 #define       MC_CMD_SET_CLOCK_OUT_ICORE_FREQ_LEN 4
 /* enum: The inter-core clock domain doesn't exist / isn't used */
-#define          MC_CMD_SET_CLOCK_OUT_ICORE_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_ICORE_DOMAIN_UNSUPPORTED 0x0
 /* Resulting DPCPU frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_DPCPU_FREQ_OFST 8
 #define       MC_CMD_SET_CLOCK_OUT_DPCPU_FREQ_LEN 4
 /* enum: The dpcpu clock domain doesn't exist */
-#define          MC_CMD_SET_CLOCK_OUT_DPCPU_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_DPCPU_DOMAIN_UNSUPPORTED 0x0
 /* Resulting PCS frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_PCS_FREQ_OFST 12
 #define       MC_CMD_SET_CLOCK_OUT_PCS_FREQ_LEN 4
 /* enum: The PCS clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_PCS_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_PCS_DOMAIN_UNSUPPORTED 0x0
 /* Resulting MC frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_MC_FREQ_OFST 16
 #define       MC_CMD_SET_CLOCK_OUT_MC_FREQ_LEN 4
 /* enum: The MC clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_MC_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_MC_DOMAIN_UNSUPPORTED 0x0
 /* Resulting rmon frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_RMON_FREQ_OFST 20
 #define       MC_CMD_SET_CLOCK_OUT_RMON_FREQ_LEN 4
 /* enum: The rmon clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_RMON_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_RMON_DOMAIN_UNSUPPORTED 0x0
 /* Resulting vswitch frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_VSWITCH_FREQ_OFST 24
 #define       MC_CMD_SET_CLOCK_OUT_VSWITCH_FREQ_LEN 4
 /* enum: The vswitch clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_VSWITCH_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_VSWITCH_DOMAIN_UNSUPPORTED 0x0
 
 
 /***********************************/
 #define       MC_CMD_DPCPU_RPC_IN_CPU_OFST 0
 #define       MC_CMD_DPCPU_RPC_IN_CPU_LEN 4
 /* enum: RxDPCPU0 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX0  0x0
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX0 0x0
 /* enum: TxDPCPU0 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX0  0x1
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX0 0x1
 /* enum: TxDPCPU1 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX1  0x2
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX1 0x2
 /* enum: RxDPCPU1 (Medford only) */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX1   0x3
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX1 0x3
 /* enum: RxDPCPU (will be for the calling function; for now, just an alias of
  * DPCPU_RX0)
  */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX   0x80
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX 0x80
 /* enum: TxDPCPU (will be for the calling function; for now, just an alias of
  * DPCPU_TX0)
  */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX   0x81
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX 0x81
 /* First 8 bits [39:32] of DATA are consumed by MC-DPCPU protocol and must be
  * initialised to zero
  */
 #define       MC_CMD_DPCPU_RPC_IN_DATA_LEN 32
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_CMDNUM_LBN 8
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_CMDNUM_WIDTH 8
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_READ  0x6 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_WRITE  0x7 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_SELF_TEST  0xc /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_CSR_ACCESS  0xe /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_READ  0x46 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_WRITE  0x47 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SELF_TEST  0x4a /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_CSR_ACCESS  0x4c /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SET_MC_REPLAY_CNTXT  0x4d /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_READ 0x6 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_WRITE 0x7 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_SELF_TEST 0xc /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_CSR_ACCESS 0xe /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_READ 0x46 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_WRITE 0x47 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SELF_TEST 0x4a /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_CSR_ACCESS 0x4c /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SET_MC_REPLAY_CNTXT 0x4d /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_OBJID_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_OBJID_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_ADDR_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_INFO_WIDTH 240
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_WIDTH 16
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_STOP_RETURN_RESULT  0x0 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_READ  0x1 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE  0x2 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE_READ  0x3 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_PIPELINED_READ  0x4 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_STOP_RETURN_RESULT 0x0 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_READ 0x1 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE 0x2 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE_READ 0x3 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_PIPELINED_READ 0x4 /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_START_DELAY_LBN 48
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_START_DELAY_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_RPT_COUNT_LBN 64
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_GAP_DELAY_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_WIDTH 16
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_CUT_THROUGH  0x1 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD  0x2 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD_FIRST  0x3 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_CUT_THROUGH 0x1 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD 0x2 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD_FIRST 0x3 /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_CNTXT_LBN 64
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_CNTXT_WIDTH 16
 #define       MC_CMD_DPCPU_RPC_IN_WDATA_OFST 12
 #define       MC_CMD_SHMBOOT_OP_IN_SHMBOOT_OP_OFST 0
 #define       MC_CMD_SHMBOOT_OP_IN_SHMBOOT_OP_LEN 4
 /* enum: Copy slave_data section to the slave core. (Greenport only) */
-#define          MC_CMD_SHMBOOT_OP_IN_PUSH_SLAVE_DATA  0x0
+#define          MC_CMD_SHMBOOT_OP_IN_PUSH_SLAVE_DATA 0x0
 
 /* MC_CMD_SHMBOOT_OP_OUT msgresponse */
 #define    MC_CMD_SHMBOOT_OP_OUT_LEN 0
 #define       MC_CMD_DUMP_DO_IN_PADDING_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_OFST 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM  0x0 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_DEFAULT  0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM 0x0 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_DEFAULT 0x1 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_TYPE_OFST 8
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_TYPE_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_NVRAM  0x1 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY  0x2 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY_MLI  0x3 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_UART  0x4 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_NVRAM 0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY 0x2 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY_MLI 0x3 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_UART 0x4 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_PARTITION_TYPE_ID_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_PARTITION_TYPE_ID_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_OFFSET_OFST 16
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_ADDR_HI_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_LO_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_LO_LEN 4
-#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_PAGE_SIZE  0x1000 /* enum */
+#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_PAGE_SIZE 0x1000 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_HI_OFST 16
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_HI_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_DEPTH_OFST 20
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_DEPTH_LEN 4
-#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_MAX_DEPTH  0x2 /* enum */
+#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_MAX_DEPTH 0x2 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_UART_PORT_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_UART_PORT_LEN 4
 /* enum: The uart port this command was received over (if using a uart
  * transport)
  */
-#define          MC_CMD_DUMP_DO_IN_UART_PORT_SRC  0xff
+#define          MC_CMD_DUMP_DO_IN_UART_PORT_SRC 0xff
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_SIZE_OFST 24
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_SIZE_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_OFST 28
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM  0x0 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_NVRAM_DUMP_PARTITION  0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM 0x0 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_NVRAM_DUMP_PARTITION 0x1 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM_TYPE_OFST 32
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM_TYPE_LEN 4
 /*            Enum values, see field(s): */
 #define    MC_CMD_SET_PSU_IN_LEN 12
 #define       MC_CMD_SET_PSU_IN_PARAM_OFST 0
 #define       MC_CMD_SET_PSU_IN_PARAM_LEN 4
-#define          MC_CMD_SET_PSU_IN_PARAM_SUPPLY_VOLTAGE  0x0 /* enum */
+#define          MC_CMD_SET_PSU_IN_PARAM_SUPPLY_VOLTAGE 0x0 /* enum */
 #define       MC_CMD_SET_PSU_IN_RAIL_OFST 4
 #define       MC_CMD_SET_PSU_IN_RAIL_LEN 4
-#define          MC_CMD_SET_PSU_IN_RAIL_0V9  0x0 /* enum */
-#define          MC_CMD_SET_PSU_IN_RAIL_1V2  0x1 /* enum */
+#define          MC_CMD_SET_PSU_IN_RAIL_0V9 0x0 /* enum */
+#define          MC_CMD_SET_PSU_IN_RAIL_1V2 0x1 /* enum */
 /* desired value, eg voltage in mV */
 #define       MC_CMD_SET_PSU_IN_VALUE_OFST 8
 #define       MC_CMD_SET_PSU_IN_VALUE_LEN 4
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_OP_OFST 0
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_OP_LEN 1
 /* enum: Get current RXEQ settings */
-#define          MC_CMD_KR_TUNE_IN_RXEQ_GET  0x0
+#define          MC_CMD_KR_TUNE_IN_RXEQ_GET 0x0
 /* enum: Override RXEQ settings */
-#define          MC_CMD_KR_TUNE_IN_RXEQ_SET  0x1
+#define          MC_CMD_KR_TUNE_IN_RXEQ_SET 0x1
 /* enum: Get current TX Driver settings */
-#define          MC_CMD_KR_TUNE_IN_TXEQ_GET  0x2
+#define          MC_CMD_KR_TUNE_IN_TXEQ_GET 0x2
 /* enum: Override TX Driver settings */
-#define          MC_CMD_KR_TUNE_IN_TXEQ_SET  0x3
+#define          MC_CMD_KR_TUNE_IN_TXEQ_SET 0x3
 /* enum: Force KR Serdes reset / recalibration */
-#define          MC_CMD_KR_TUNE_IN_RECAL  0x4
+#define          MC_CMD_KR_TUNE_IN_RECAL 0x4
 /* enum: Start KR Serdes Eye diagram plot on a given lane. Lane must have valid
  * signal.
  */
-#define          MC_CMD_KR_TUNE_IN_START_EYE_PLOT  0x5
+#define          MC_CMD_KR_TUNE_IN_START_EYE_PLOT 0x5
 /* enum: Poll KR Serdes Eye diagram plot. Returns one row of BER data. The
  * caller should call this command repeatedly after starting eye plot, until no
  * more data is returned.
  */
-#define          MC_CMD_KR_TUNE_IN_POLL_EYE_PLOT  0x6
+#define          MC_CMD_KR_TUNE_IN_POLL_EYE_PLOT 0x6
 /* enum: Read Figure Of Merit (eye quality, higher is better). */
-#define          MC_CMD_KR_TUNE_IN_READ_FOM  0x7
+#define          MC_CMD_KR_TUNE_IN_READ_FOM 0x7
+/* enum: Start/stop link training frames */
+#define          MC_CMD_KR_TUNE_IN_LINK_TRAIN_RUN 0x8
+/* enum: Issue KR link training command (control training coefficients) */
+#define          MC_CMD_KR_TUNE_IN_LINK_TRAIN_CMD 0x9
 /* Align the arguments to 32 bits */
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_RSVD_OFST 1
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_RSVD_LEN 3
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: Attenuation (0-15, Huntington) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_ATT  0x0
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_ATT 0x0
 /* enum: CTLE Boost (0-15, Huntington) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_BOOST  0x1
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_BOOST 0x1
 /* enum: Edge DFE Tap1 (Huntington - 0 - max negative, 64 - zero, 127 - max
  * positive, Medford - 0-31)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP1  0x2
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP1 0x2
 /* enum: Edge DFE Tap2 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-31)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP2  0x3
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP2 0x3
 /* enum: Edge DFE Tap3 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP3  0x4
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP3 0x4
 /* enum: Edge DFE Tap4 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP4  0x5
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP4 0x5
 /* enum: Edge DFE Tap5 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP5  0x6
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP5 0x6
 /* enum: Edge DFE DLEV (0-128 for Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_DLEV  0x7
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_DLEV 0x7
 /* enum: Variable Gain Amplifier (0-15, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_VGA  0x8
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_VGA 0x8
 /* enum: CTLE EQ Capacitor (0-15, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQC 0x9
 /* enum: CTLE EQ Resistor (0-7, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQRES 0xa
 /* enum: CTLE gain (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_GAIN  0xb
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_GAIN 0xb
 /* enum: CTLE pole (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_POLE  0xc
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_POLE 0xc
 /* enum: CTLE peaking (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_PEAK  0xd
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_PEAK 0xd
 /* enum: DFE Tap1 - even path (Medford2 - 6 bit signed (-29 - +29)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_EVEN  0xe
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_EVEN 0xe
 /* enum: DFE Tap1 - odd path (Medford2 - 6 bit signed (-29 - +29)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_ODD  0xf
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_ODD 0xf
 /* enum: DFE Tap2 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP2  0x10
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP2 0x10
 /* enum: DFE Tap3 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP3  0x11
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP3 0x11
 /* enum: DFE Tap4 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP4  0x12
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP4 0x12
 /* enum: DFE Tap5 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP5  0x13
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP5 0x13
 /* enum: DFE Tap6 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP6  0x14
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP6 0x14
 /* enum: DFE Tap7 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP7  0x15
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP7 0x15
 /* enum: DFE Tap8 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP8  0x16
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP8 0x16
 /* enum: DFE Tap9 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP9  0x17
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP9 0x17
 /* enum: DFE Tap10 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP10  0x18
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP10 0x18
 /* enum: DFE Tap11 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP11  0x19
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP11 0x19
 /* enum: DFE Tap12 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP12  0x1a
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP12 0x1a
 /* enum: I/Q clk offset (Medford2 - 4 bit signed (-5 - +5))) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_IQ_OFF  0x1b
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_IQ_OFF 0x1b
 /* enum: Negative h1 polarity data sampler offset calibration code, even path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_EVEN  0x1c
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_EVEN 0x1c
 /* enum: Negative h1 polarity data sampler offset calibration code, odd path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_ODD  0x1d
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_ODD 0x1d
 /* enum: Positive h1 polarity data sampler offset calibration code, even path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_EVEN  0x1e
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_EVEN 0x1e
 /* enum: Positive h1 polarity data sampler offset calibration code, odd path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_ODD  0x1f
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_ODD 0x1f
 /* enum: CDR calibration loop code (Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_PVT  0x20
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_PVT 0x20
 /* enum: CDR integral loop code (Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_INTEG  0x21
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_INTEG 0x21
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 3
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_ALL  0x4 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_ALL 0x4 /* enum */
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_LBN 11
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_WIDTH 1
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_RESERVED_LBN 12
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: TX Amplitude (Huntington, Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV  0x0
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV 0x0
 /* enum: De-Emphasis Tap1 Magnitude (0-7) (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_MODE  0x1
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_MODE 0x1
 /* enum: De-Emphasis Tap1 Fine */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_DTLEV  0x2
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_DTLEV 0x2
 /* enum: De-Emphasis Tap2 Magnitude (0-6) (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2  0x3
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2 0x3
 /* enum: De-Emphasis Tap2 Fine (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2TLEV  0x4
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2TLEV 0x4
 /* enum: Pre-Emphasis Magnitude (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_E  0x5
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_E 0x5
 /* enum: Pre-Emphasis Fine (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_ETLEV  0x6
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_ETLEV 0x6
 /* enum: TX Slew Rate Coarse control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_PREDRV_DLY  0x7
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_PREDRV_DLY 0x7
 /* enum: TX Slew Rate Fine control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_SR_SET  0x8
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_SR_SET 0x8
 /* enum: TX Termination Impedance control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_RT_SET  0x9
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_RT_SET 0x9
 /* enum: TX Amplitude Fine control (Medford) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV_FINE  0xa
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV_FINE 0xa
 /* enum: Pre-shoot Tap (Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_ADV  0xb
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_ADV 0xb
 /* enum: De-emphasis Tap (Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_DLY  0xc
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_DLY 0xc
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_WIDTH 3
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_ALL  0x4 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_ALL 0x4 /* enum */
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_RESERVED_LBN 11
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_RESERVED_WIDTH 5
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_INITIAL_LBN 16
 /* Align the arguments to 32 bits */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_KR_TUNE_RSVD_OFST 1
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_KR_TUNE_RSVD_LEN 3
-/* Port-relative lane to scan eye on */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_OFST 4
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_LEN 4
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_NUM_LBN 0
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_NUM_WIDTH 8
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_ABS_REL_LBN 31
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_ABS_REL_WIDTH 1
 /* Scan duration / cycle count */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_BER_OFST 8
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_BER_LEN 4
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_KR_TUNE_RSVD_LEN 3
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_LANE_OFST 4
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_LANE_LEN 4
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_NUM_LBN 0
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_NUM_WIDTH 8
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_ABS_REL_LBN 31
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_ABS_REL_WIDTH 1
 
 /* MC_CMD_KR_TUNE_READ_FOM_OUT msgresponse */
 #define    MC_CMD_KR_TUNE_READ_FOM_OUT_LEN 4
 #define       MC_CMD_KR_TUNE_READ_FOM_OUT_FOM_OFST 0
 #define       MC_CMD_KR_TUNE_READ_FOM_OUT_FOM_LEN 4
 
+/* MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN msgrequest */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_LEN 8
+/* Requested operation */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_OP_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_OP_LEN 1
+/* Align the arguments to 32 bits */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_RSVD_OFST 1
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_RSVD_LEN 3
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_RUN_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_RUN_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_STOP 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_START 0x1 /* enum */
+
+/* MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN msgrequest */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LEN 28
+/* Requested operation */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_OP_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_OP_LEN 1
+/* Align the arguments to 32 bits */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_RSVD_OFST 1
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_RSVD_LEN 3
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LANE_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LANE_LEN 4
+/* Set INITIALIZE state */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_INITIALIZE_OFST 8
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_INITIALIZE_LEN 4
+/* Set PRESET state */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_PRESET_OFST 12
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_PRESET_LEN 4
+/* C(-1) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CM1_OFST 16
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CM1_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_HOLD 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_INCREMENT 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_DECREMENT 0x2 /* enum */
+/* C(0) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_C0_OFST 20
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_C0_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(+1) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CP1_OFST 24
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CP1_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+
+/* MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT msgresponse */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_LEN 24
+/* C(-1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_STATUS_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_STATUS_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_NOT_UPDATED 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_UPDATED 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_MINIMUM 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_MAXIMUM 0x3 /* enum */
+/* C(0) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_STATUS_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_STATUS_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(+1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_STATUS_OFST 8
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_STATUS_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(-1) value */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_VALUE_OFST 12
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_VALUE_LEN 4
+/* C(0) value */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_VALUE_OFST 16
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_VALUE_LEN 4
+/* C(+1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_VALUE_OFST 20
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_VALUE_LEN 4
+
 
 /***********************************/
 /* MC_CMD_PCIE_TUNE
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_OP_OFST 0
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_OP_LEN 1
 /* enum: Get current RXEQ settings */
-#define          MC_CMD_PCIE_TUNE_IN_RXEQ_GET  0x0
+#define          MC_CMD_PCIE_TUNE_IN_RXEQ_GET 0x0
 /* enum: Override RXEQ settings */
-#define          MC_CMD_PCIE_TUNE_IN_RXEQ_SET  0x1
+#define          MC_CMD_PCIE_TUNE_IN_RXEQ_SET 0x1
 /* enum: Get current TX Driver settings */
-#define          MC_CMD_PCIE_TUNE_IN_TXEQ_GET  0x2
+#define          MC_CMD_PCIE_TUNE_IN_TXEQ_GET 0x2
 /* enum: Override TX Driver settings */
-#define          MC_CMD_PCIE_TUNE_IN_TXEQ_SET  0x3
+#define          MC_CMD_PCIE_TUNE_IN_TXEQ_SET 0x3
 /* enum: Start PCIe Serdes Eye diagram plot on a given lane. */
-#define          MC_CMD_PCIE_TUNE_IN_START_EYE_PLOT  0x5
+#define          MC_CMD_PCIE_TUNE_IN_START_EYE_PLOT 0x5
 /* enum: Poll PCIe Serdes Eye diagram plot. Returns one row of BER data. The
  * caller should call this command repeatedly after starting eye plot, until no
  * more data is returned.
  */
-#define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT  0x6
+#define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT 0x6
 /* enum: Enable the SERDES BIST and set it to generate a 200MHz square wave */
-#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE  0x7
+#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE 0x7
 /* Align the arguments to 32 bits */
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_OFST 1
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_LEN 3
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: Attenuation (0-15) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_ATT  0x0
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_ATT 0x0
 /* enum: CTLE Boost (0-15) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_BOOST  0x1
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_BOOST 0x1
 /* enum: DFE Tap1 (0 - max negative, 64 - zero, 127 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP1  0x2
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP1 0x2
 /* enum: DFE Tap2 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP2  0x3
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP2 0x3
 /* enum: DFE Tap3 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP3  0x4
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP3 0x4
 /* enum: DFE Tap4 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP4  0x5
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP4 0x5
 /* enum: DFE Tap5 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP5  0x6
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP5 0x6
 /* enum: DFE DLev */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_DLEV  0x7
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_DLEV 0x7
 /* enum: Figure of Merit */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_FOM  0x8
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_FOM 0x8
 /* enum: CTLE EQ Capacitor (HF Gain) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQC 0x9
 /* enum: CTLE EQ Resistor (DC Gain) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQRES 0xa
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 5
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_4  0x4 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_5  0x5 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_6  0x6 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_7  0x7 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_8  0x8 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_9  0x9 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_10  0xa /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_11  0xb /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_12  0xc /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_13  0xd /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_14  0xe /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_15  0xf /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL  0x10 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_4 0x4 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_5 0x5 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_6 0x6 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_7 0x7 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_8 0x8 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_9 0x9 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_10 0xa /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_11 0xb /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_12 0xc /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_13 0xd /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_14 0xe /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_15 0xf /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL 0x10 /* enum */
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_LBN 13
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_WIDTH 1
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_RESERVED_LBN 14
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: TxMargin (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXMARGIN  0x0
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXMARGIN 0x0
 /* enum: TxSwing (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXSWING  0x1
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXSWING 0x1
 /* enum: De-emphasis coefficient C(-1) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CM1  0x2
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CM1 0x2
 /* enum: De-emphasis coefficient C(0) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_C0  0x3
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_C0 0x3
 /* enum: De-emphasis coefficient C(+1) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CP1  0x4
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CP1 0x4
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_LANE_WIDTH 4
 /*             Enum values, see field(s): */
 /* enum: re-read and apply licenses after a license key partition update; note
  * that this operation returns a zero-length response
  */
-#define          MC_CMD_LICENSING_IN_OP_UPDATE_LICENSE  0x0
+#define          MC_CMD_LICENSING_IN_OP_UPDATE_LICENSE 0x0
 /* enum: report counts of installed licenses */
-#define          MC_CMD_LICENSING_IN_OP_GET_KEY_STATS  0x1
+#define          MC_CMD_LICENSING_IN_OP_GET_KEY_STATS 0x1
 
 /* MC_CMD_LICENSING_OUT msgresponse */
 #define    MC_CMD_LICENSING_OUT_LEN 28
 #define       MC_CMD_LICENSING_OUT_LICENSING_SELF_TEST_OFST 24
 #define       MC_CMD_LICENSING_OUT_LICENSING_SELF_TEST_LEN 4
 /* enum: licensing subsystem self-test failed */
-#define          MC_CMD_LICENSING_OUT_SELF_TEST_FAIL  0x0
+#define          MC_CMD_LICENSING_OUT_SELF_TEST_FAIL 0x0
 /* enum: licensing subsystem self-test passed */
-#define          MC_CMD_LICENSING_OUT_SELF_TEST_PASS  0x1
+#define          MC_CMD_LICENSING_OUT_SELF_TEST_PASS 0x1
 
 
 /***********************************/
 /* enum: re-read and apply licenses after a license key partition update; note
  * that this operation returns a zero-length response
  */
-#define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE  0x0
+#define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE 0x0
 /* enum: report counts of installed licenses Returns EAGAIN if license
  * processing (updating) has been started but not yet completed.
  */
-#define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE  0x1
+#define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE 0x1
 
 /* MC_CMD_LICENSING_V3_OUT msgresponse */
 #define    MC_CMD_LICENSING_V3_OUT_LEN 88
 #define       MC_CMD_LICENSING_V3_OUT_LICENSING_SELF_TEST_OFST 20
 #define       MC_CMD_LICENSING_V3_OUT_LICENSING_SELF_TEST_LEN 4
 /* enum: licensing subsystem self-test failed */
-#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_FAIL  0x0
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_FAIL 0x0
 /* enum: licensing subsystem self-test passed */
-#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_PASS  0x1
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_PASS 0x1
 /* bitmask of licensed applications */
 #define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_OFST 24
 #define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_LEN 8
 #define       MC_CMD_GET_LICENSED_APP_STATE_OUT_STATE_OFST 0
 #define       MC_CMD_GET_LICENSED_APP_STATE_OUT_STATE_LEN 4
 /* enum: no (or invalid) license is present for the application */
-#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_NOT_LICENSED  0x0
+#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_NOT_LICENSED 0x0
 /* enum: a valid license is present for the application */
-#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_LICENSED  0x1
+#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_LICENSED 0x1
 
 
 /***********************************/
 #define       MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_STATE_OFST 0
 #define       MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_STATE_LEN 4
 /* enum: no (or invalid) license is present for the application */
-#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED  0x0
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED 0x0
 /* enum: a valid license is present for the application */
-#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LICENSED  0x1
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LICENSED 0x1
 
 
 /***********************************/
 #define       MC_CMD_LICENSED_APP_OP_IN_OP_OFST 4
 #define       MC_CMD_LICENSED_APP_OP_IN_OP_LEN 4
 /* enum: validate application */
-#define          MC_CMD_LICENSED_APP_OP_IN_OP_VALIDATE  0x0
+#define          MC_CMD_LICENSED_APP_OP_IN_OP_VALIDATE 0x0
 /* enum: mask application */
-#define          MC_CMD_LICENSED_APP_OP_IN_OP_MASK  0x1
+#define          MC_CMD_LICENSED_APP_OP_IN_OP_MASK 0x1
 /* arguments specific to this particular operation */
 #define       MC_CMD_LICENSED_APP_OP_IN_ARGS_OFST 8
 #define       MC_CMD_LICENSED_APP_OP_IN_ARGS_LEN 4
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 100
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_LEN 4
 /* enum: expiry units are accounting units */
-#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC  0x0
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC 0x0
 /* enum: expiry units are calendar days */
-#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS  0x1
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS 0x1
 /* base MAC address of the NIC stored in NVRAM (note that this is a constant
  * value for a given NIC regardless which function is calling, effectively this
  * is PF0 base MAC address)
 #define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_FLAG_OFST 8
 #define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_FLAG_LEN 4
 /* enum: turn the features off */
-#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_OFF  0x0
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_OFF 0x0
 /* enum: turn the features back on */
-#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_ON  0x1
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_ON 0x1
 
 /* MC_CMD_LICENSED_V3_MASK_FEATURES_OUT msgresponse */
 #define    MC_CMD_LICENSED_V3_MASK_FEATURES_OUT_LEN 0
  * This is an asynchronous operation owing to the time taken to validate an
  * ECDSA license
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_SET  0x0
+#define          MC_CMD_LICENSING_V3_TEMPORARY_SET 0x0
 /* enum: clear the license immediately rather than waiting for the next power
  * cycle
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR  0x1
+#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR 0x1
 /* enum: get the status of the asynchronous MC_CMD_LICENSING_V3_TEMPORARY_SET
  * operation
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS  0x2
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS 0x2
 
 /* MC_CMD_LICENSING_V3_TEMPORARY_IN_SET msgrequest */
 #define    MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LEN 164
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_OFST 0
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_LEN 4
 /* enum: finished validating and installing license */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK  0x0
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK 0x0
 /* enum: license validation and installation in progress */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS  0x1
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS 0x1
 /* enum: licensing error. More specific error messages are not provided to
  * avoid exposing details of the licensing system to the client
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR  0x2
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR 0x2
 /* bitmask of licensed features */
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_OFST 4
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LEN 8
 #define       MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_OFST 8
 #define       MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) as returned by MC_CMD_RSS_CONTEXT_ALLOC. Note
  * that these handles should be considered opaque to the host, although a value
  * of 0xFFFFFFFF is guaranteed never to be a valid handle.
  */
 #define MC_CMD_GET_PORT_SNIFF_CONFIG 0xf8
 
-#define MC_CMD_0xf8_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xf8_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_PORT_SNIFF_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_PORT_SNIFF_CONFIG_IN_LEN 0
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_OFST 8
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_LEN 4
 /* enum: receiving to just the specified queue */
-#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE 0x0
 /* enum: receiving to multiple queues using RSS context */
-#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS  0x1
+#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) */
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_OFST 12
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_LEN 4
 /* enum: Per-TXQ enable for multicast UDP destination lookup for possible
  * internal loopback. (ENTITY is a queue handle, VALUE is a single boolean.)
  */
-#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_TXQ_MCAST_UDP_DST_LOOKUP_EN  0x0
+#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_TXQ_MCAST_UDP_DST_LOOKUP_EN 0x0
 /* enum: Per-v-adaptor enable for suppression of self-transmissions on the
  * internal loopback path. (ENTITY is an EVB_PORT_ID, VALUE is a single
  * boolean.)
  */
-#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_VADAPTOR_SUPPRESS_SELF_TX  0x1
+#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_VADAPTOR_SUPPRESS_SELF_TX 0x1
 /* handle for the entity to update: queue handle, EVB port ID, etc. depending
  * on the type of configuration setting being changed
  */
 #define       MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_OFST 8
 #define       MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) as returned by MC_CMD_RSS_CONTEXT_ALLOC. Note
  * that these handles should be considered opaque to the host, although a value
  * of 0xFFFFFFFF is guaranteed never to be a valid handle.
  */
 #define MC_CMD_GET_TX_PORT_SNIFF_CONFIG 0xfc
 
-#define MC_CMD_0xfc_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xfc_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_TX_PORT_SNIFF_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_TX_PORT_SNIFF_CONFIG_IN_LEN 0
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_OFST 8
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_LEN 4
 /* enum: receiving to just the specified queue */
-#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE 0x0
 /* enum: receiving to multiple queues using RSS context */
-#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS  0x1
+#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) */
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_OFST 12
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_LEN 4
 #define    MC_CMD_READ_ATB_IN_LEN 16
 #define       MC_CMD_READ_ATB_IN_SIGNAL_BUS_OFST 0
 #define       MC_CMD_READ_ATB_IN_SIGNAL_BUS_LEN 4
-#define          MC_CMD_READ_ATB_IN_BUS_CCOM  0x0 /* enum */
-#define          MC_CMD_READ_ATB_IN_BUS_CKR  0x1 /* enum */
-#define          MC_CMD_READ_ATB_IN_BUS_CPCIE  0x8 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CCOM 0x0 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CKR 0x1 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CPCIE 0x8 /* enum */
 #define       MC_CMD_READ_ATB_IN_SIGNAL_EN_BITNO_OFST 4
 #define       MC_CMD_READ_ATB_IN_SIGNAL_EN_BITNO_LEN 4
 #define       MC_CMD_READ_ATB_IN_SIGNAL_SEL_OFST 8
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_PF_WIDTH 16
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_VF_LBN 16
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_VF_WIDTH 16
-#define          MC_CMD_PRIVILEGE_MASK_IN_VF_NULL  0xffff /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_VF_NULL 0xffff /* enum */
 /* New privilege mask to be set. The mask will only be changed if the MSB is
  * set to 1.
  */
 #define       MC_CMD_PRIVILEGE_MASK_IN_NEW_MASK_OFST 4
 #define       MC_CMD_PRIVILEGE_MASK_IN_NEW_MASK_LEN 4
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN             0x1 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_LINK              0x2 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD            0x4 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PTP               0x8 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE_FILTERS  0x10 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN 0x1 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_LINK 0x2 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD 0x4 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PTP 0x8 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE_FILTERS 0x10 /* enum */
 /* enum: Deprecated. Equivalent to MAC_SPOOFING_TX combined with CHANGE_MAC. */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING      0x20
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNICAST           0x40 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MULTICAST         0x80 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_BROADCAST         0x100 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST     0x200 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS       0x400 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING 0x20
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNICAST 0x40 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MULTICAST 0x80 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_BROADCAST 0x100 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST 0x200 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS 0x400 /* enum */
 /* enum: Allows to set the TX packets' source MAC address to any arbitrary MAC
  * adress.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX   0x800
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX 0x800
 /* enum: Privilege that allows a Function to change the MAC address configured
  * in its associated vAdapter/vPort.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_CHANGE_MAC        0x1000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_CHANGE_MAC 0x1000
 /* enum: Privilege that allows a Function to install filters that specify VLANs
  * that are not in the permit list for the associated vPort. This privilege is
  * primarily to support ESX where vPorts are created that restrict traffic to
  * only a set of permitted VLANs. See the vPort flag FLAG_VLAN_RESTRICT.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNRESTRICTED_VLAN  0x2000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNRESTRICTED_VLAN 0x2000
 /* enum: Privilege for insecure commands. Commands that belong to this group
  * are not permitted on secure adapters regardless of the privilege mask.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE          0x4000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE 0x4000
 /* enum: Set this bit to indicate that a new privilege mask is to be set,
  * otherwise the command will only read the existing mask.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_DO_CHANGE             0x80000000
+#define          MC_CMD_PRIVILEGE_MASK_IN_DO_CHANGE 0x80000000
 
 /* MC_CMD_PRIVILEGE_MASK_OUT msgresponse */
 #define    MC_CMD_PRIVILEGE_MASK_OUT_LEN 4
 /* New link state mode to be set */
 #define       MC_CMD_LINK_STATE_MODE_IN_NEW_MODE_OFST 4
 #define       MC_CMD_LINK_STATE_MODE_IN_NEW_MODE_LEN 4
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO       0x0 /* enum */
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP         0x1 /* enum */
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN       0x2 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO 0x0 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP 0x1 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN 0x2 /* enum */
 /* enum: Use this value to just read the existing setting without modifying it.
  */
-#define          MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE         0xffffffff
+#define          MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE 0xffffffff
 
 /* MC_CMD_LINK_STATE_MODE_OUT msgresponse */
 #define    MC_CMD_LINK_STATE_MODE_OUT_LEN 4
 /* The groups of functions to have their privilege masks modified. */
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FN_GROUP_OFST 0
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FN_GROUP_LEN 4
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_NONE       0x0 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_ALL        0x1 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_PFS_ONLY   0x2 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_ONLY   0x3 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_OF_PF  0x4 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_ONE        0x5 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_NONE 0x0 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_ALL 0x1 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_PFS_ONLY 0x2 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_ONLY 0x3 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_OF_PF 0x4 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_ONE 0x5 /* enum */
 /* For VFS_OF_PF specify the PF, for ONE specify the target function */
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FUNCTION_OFST 4
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FUNCTION_LEN 4
 /* Sector type */
 #define       MC_CMD_XPM_READ_SECTOR_OUT_TYPE_OFST 0
 #define       MC_CMD_XPM_READ_SECTOR_OUT_TYPE_LEN 4
-#define          MC_CMD_XPM_READ_SECTOR_OUT_BLANK            0x0 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_128   0x1 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_256   0x2 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_DATA      0x3 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_INVALID          0xff /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_BLANK 0x0 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_128 0x1 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_256 0x2 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_DATA 0x3 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_INVALID 0xff /* enum */
 /* Sector data */
 #define       MC_CMD_XPM_READ_SECTOR_OUT_DATA_OFST 4
 #define       MC_CMD_XPM_READ_SECTOR_OUT_DATA_LEN 1
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_OFST 0
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LEN 2
 /* enum: the IANA allocated UDP port for VXLAN */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT  0x12b5
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT 0x12b5
 /* enum: the IANA allocated UDP port for Geneve */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT  0x17c1
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT 0x17c1
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LBN 0
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_WIDTH 16
 /* tunnel encapsulation protocol (only those named below are supported) */
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_OFST 2
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LEN 2
 /* enum: This port will be used for VXLAN on both IPv4 and IPv6 */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN  0x0
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN 0x0
 /* enum: This port will be used for Geneve on both IPv4 and IPv6 */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE  0x1
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE 0x1
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LBN 16
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_WIDTH 16
 
 /* Timer mode. Meanings as per EVQ_TMR_REG.TC_TIMER_VAL */
 #define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_OFST 12
 #define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_LEN 4
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS  0x0 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START  0x1 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START  0x2 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF  0x3 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS 0x0 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START 0x1 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START 0x2 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF 0x3 /* enum */
 
 /* MC_CMD_SET_EVQ_TMR_OUT msgresponse */
 #define    MC_CMD_SET_EVQ_TMR_OUT_LEN 8
  */
 #define MC_CMD_ALLOCATE_TX_VFIFO_CP 0x11d
 
-#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_CP_IN msgrequest */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_LEN 20
 /* Will the common pool be used as TX_vFIFO_ULL (1) */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_OFST 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_LEN 4
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED       0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED 0x1 /* enum */
 /* enum: Using this interface without TX_vFIFO_ULL is not supported for now */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED      0x0
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED 0x0
 /* Number of buffers to reserve for the common pool */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_OFST 8
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_LEN 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_OFST 12
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_LEN 4
 /* enum: Extracts information from function */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1
 /* Network port or RX Engine to which the common pool connects. */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_OFST 16
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_LEN 4
 /* enum: Extracts information from function */
-/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0          0x0 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1          0x1 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2          0x2 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3          0x3 /* enum */
+/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0 0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1 0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2 0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3 0x3 /* enum */
 /* enum: To enable Switch loopback with Rx engine 0 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0     0x4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0 0x4
 /* enum: To enable Switch loopback with Rx engine 1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1     0x5
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1 0x5
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT msgresponse */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_LEN 4
  */
 #define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO 0x11e
 
-#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN msgrequest */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LEN 20
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_OFST 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_LEN 4
 /* enum: Extracts information from common pool */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE   -0x1
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0          0x0 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1          0x1 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2          0x2 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3          0x3 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0 0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1 0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2 0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3 0x3 /* enum */
 /* enum: To enable Switch loopback with Rx engine 0 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0     0x4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0 0x4
 /* enum: To enable Switch loopback with Rx engine 1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1     0x5
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1 0x5
 /* Minimum number of buffers that the pool must have */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_OFST 8
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_LEN 4
 /* enum: Do not check the space available */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM     0x0
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM 0x0
 /* Will the vFIFO be used as TX_vFIFO_ULL */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_OFST 12
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_LEN 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_OFST 16
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_LEN 4
 /* enum: Search for the lowest unused priority */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE  -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE -0x1
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT msgresponse */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_LEN 8
  */
 #define MC_CMD_TEARDOWN_TX_VFIFO_VF 0x11f
 
-#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_TEARDOWN_TX_VFIFO_VF_IN msgrequest */
 #define    MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_LEN 4
  */
 #define MC_CMD_DEALLOCATE_TX_VFIFO_CP 0x121
 
-#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN msgrequest */
 #define    MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_LEN 4
  */
 #define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS 0x124
 
-#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN msgrequest */
 #define    MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN_LEN 0
index ce8aabf9091e47acc1f19e74f42b4d99b2b871d6..9382bb0b4d5ad67df6e5c79566e872604dd46996 100644 (file)
@@ -352,6 +352,64 @@ static void efx_mcdi_phy_decode_link(struct efx_nic *efx,
        link_state->speed = speed;
 }
 
+/* The semantics of the ethtool FEC mode bitmask are not well defined,
+ * particularly the meaning of combinations of bits.  Which means we get to
+ * define our own semantics, as follows:
+ * OFF overrides any other bits, and means "disable all FEC" (with the
+ * exception of 25G KR4/CR4, where it is not possible to reject it if AN
+ * partner requests it).
+ * AUTO on its own means use cable requirements and link partner autoneg with
+ * fw-default preferences for the cable type.
+ * AUTO and either RS or BASER means use the specified FEC type if cable and
+ * link partner support it, otherwise autoneg/fw-default.
+ * RS or BASER alone means use the specified FEC type if cable and link partner
+ * support it and either requests it, otherwise no FEC.
+ * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
+ * partner support it, preferring RS to BASER.
+ */
+static u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
+{
+       u32 ret = 0;
+
+       if (ethtool_cap & ETHTOOL_FEC_OFF)
+               return 0;
+
+       if (ethtool_cap & ETHTOOL_FEC_AUTO)
+               ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+                      (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+                      (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
+       if (ethtool_cap & ETHTOOL_FEC_RS)
+               ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
+                      (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
+       if (ethtool_cap & ETHTOOL_FEC_BASER)
+               ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+                      (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+                      (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
+                      (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+       return ret;
+}
+
+/* Invert ethtool_fec_caps_to_mcdi.  There are two combinations that function
+ * can never produce, (baser xor rs) and neither req; the implementation below
+ * maps both of those to AUTO.  This should never matter, and it's not clear
+ * what a better mapping would be anyway.
+ */
+static u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
+{
+       bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
+            rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
+            baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
+                           : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
+            baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
+                               : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+
+       if (!baser && !rs)
+               return ETHTOOL_FEC_OFF;
+       return (rs_req ? ETHTOOL_FEC_RS : 0) |
+              (baser_req ? ETHTOOL_FEC_BASER : 0) |
+              (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
+}
+
 static int efx_mcdi_phy_probe(struct efx_nic *efx)
 {
        struct efx_mcdi_phy_data *phy_data;
@@ -438,6 +496,13 @@ static int efx_mcdi_phy_probe(struct efx_nic *efx)
                MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
                MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
 
+       /* Record the initial FEC configuration (or nearest approximation
+        * representable in the ethtool configuration space)
+        */
+       efx->fec_config = mcdi_fec_caps_to_ethtool(caps,
+                                                  efx->link_state.speed == 25000 ||
+                                                  efx->link_state.speed == 50000);
+
        /* Default to Autonegotiated flow control if the PHY supports it */
        efx->wanted_fc = EFX_FC_RX | EFX_FC_TX;
        if (phy_data->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
@@ -458,6 +523,8 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx)
                    ethtool_linkset_to_mcdi_cap(efx->link_advertising) :
                    phy_cfg->forced_cap);
 
+       caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
+
        return efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
                                 efx->loopback_mode, 0);
 }
@@ -584,6 +651,8 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
                }
        }
 
+       caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
+
        rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
                               efx->loopback_mode, 0);
        if (rc)
@@ -599,6 +668,85 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
        return 0;
 }
 
+static int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
+                                    struct ethtool_fecparam *fec)
+{
+       MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
+       u32 caps, active, speed; /* MCDI format */
+       bool is_25g = false;
+       size_t outlen;
+       int rc;
+
+       BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+       rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+                         outbuf, sizeof(outbuf), &outlen);
+       if (rc)
+               return rc;
+       if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
+               return -EOPNOTSUPP;
+
+       /* behaviour for 25G/50G links depends on 25G BASER bit */
+       speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
+       is_25g = speed == 25000 || speed == 50000;
+
+       caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
+       fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
+       /* BASER is never supported on 100G */
+       if (speed == 100000)
+               fec->fec &= ~ETHTOOL_FEC_BASER;
+
+       active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
+       switch (active) {
+       case MC_CMD_FEC_NONE:
+               fec->active_fec = ETHTOOL_FEC_OFF;
+               break;
+       case MC_CMD_FEC_BASER:
+               fec->active_fec = ETHTOOL_FEC_BASER;
+               break;
+       case MC_CMD_FEC_RS:
+               fec->active_fec = ETHTOOL_FEC_RS;
+               break;
+       default:
+               netif_warn(efx, hw, efx->net_dev,
+                          "Firmware reports unrecognised FEC_TYPE %u\n",
+                          active);
+               /* We don't know what firmware has picked.  AUTO is as good a
+                * "can't happen" value as any other.
+                */
+               fec->active_fec = ETHTOOL_FEC_AUTO;
+               break;
+       }
+
+       return 0;
+}
+
+static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
+                                    const struct ethtool_fecparam *fec)
+{
+       struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+       u32 caps;
+       int rc;
+
+       /* Work out what efx_mcdi_phy_set_link_ksettings() would produce from
+        * saved advertising bits
+        */
+       if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, efx->link_advertising))
+               caps = (ethtool_linkset_to_mcdi_cap(efx->link_advertising) |
+                       1 << MC_CMD_PHY_CAP_AN_LBN);
+       else
+               caps = phy_cfg->forced_cap;
+
+       caps |= ethtool_fec_caps_to_mcdi(fec->fec);
+       rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
+                              efx->loopback_mode, 0);
+       if (rc)
+               return rc;
+
+       /* Record the new FEC setting for subsequent set_link calls */
+       efx->fec_config = fec->fec;
+       return 0;
+}
+
 static int efx_mcdi_phy_test_alive(struct efx_nic *efx)
 {
        MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
@@ -977,6 +1125,8 @@ static const struct efx_phy_operations efx_mcdi_phy_ops = {
        .remove         = efx_mcdi_phy_remove,
        .get_link_ksettings = efx_mcdi_phy_get_link_ksettings,
        .set_link_ksettings = efx_mcdi_phy_set_link_ksettings,
+       .get_fecparam   = efx_mcdi_phy_get_fecparam,
+       .set_fecparam   = efx_mcdi_phy_set_fecparam,
        .test_alive     = efx_mcdi_phy_test_alive,
        .run_tests      = efx_mcdi_phy_run_tests,
        .test_name      = efx_mcdi_phy_test_name,
index d20a8660ee486e1fef50074bd34935d0e2a47259..2453f3849e72985c5d2157cfc33bdb5131b00df9 100644 (file)
@@ -627,6 +627,8 @@ static inline bool efx_link_state_equal(const struct efx_link_state *left,
  *     Serialised by the mac_lock.
  * @get_link_ksettings: Get ethtool settings. Serialised by the mac_lock.
  * @set_link_ksettings: Set ethtool settings. Serialised by the mac_lock.
+ * @get_fecparam: Get Forward Error Correction settings. Serialised by mac_lock.
+ * @set_fecparam: Set Forward Error Correction settings. Serialised by mac_lock.
  * @set_npage_adv: Set abilities advertised in (Extended) Next Page
  *     (only needed where AN bit is set in mmds)
  * @test_alive: Test that PHY is 'alive' (online)
@@ -645,6 +647,9 @@ struct efx_phy_operations {
                                   struct ethtool_link_ksettings *cmd);
        int (*set_link_ksettings)(struct efx_nic *efx,
                                  const struct ethtool_link_ksettings *cmd);
+       int (*get_fecparam)(struct efx_nic *efx, struct ethtool_fecparam *fec);
+       int (*set_fecparam)(struct efx_nic *efx,
+                           const struct ethtool_fecparam *fec);
        void (*set_npage_adv) (struct efx_nic *efx, u32);
        int (*test_alive) (struct efx_nic *efx);
        const char *(*test_name) (struct efx_nic *efx, unsigned int index);
@@ -704,6 +709,28 @@ union efx_multicast_hash {
 
 struct vfdi_status;
 
+/* The reserved RSS context value */
+#define EFX_EF10_RSS_CONTEXT_INVALID   0xffffffff
+/**
+ * struct efx_rss_context - A user-defined RSS context for filtering
+ * @list: node of linked list on which this struct is stored
+ * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
+ *     %EFX_EF10_RSS_CONTEXT_INVALID if this context is not present on the NIC.
+ *     For Siena, 0 if RSS is active, else %EFX_EF10_RSS_CONTEXT_INVALID.
+ * @user_id: the rss_context ID exposed to userspace over ethtool.
+ * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ * @rx_hash_key: Toeplitz hash key for this RSS context
+ * @indir_table: Indirection table for this RSS context
+ */
+struct efx_rss_context {
+       struct list_head list;
+       u32 context_id;
+       u32 user_id;
+       bool rx_hash_udp_4tuple;
+       u8 rx_hash_key[40];
+       u32 rx_indir_table[128];
+};
+
 /**
  * struct efx_nic - an Efx NIC
  * @name: Device name (net device name or bus id before net device registered)
@@ -764,11 +791,9 @@ struct vfdi_status;
  *     (valid only for NICs that set %EFX_RX_PKT_PREFIX_LEN; always negative)
  * @rx_packet_ts_offset: Offset of timestamp from start of packet data
  *     (valid only if channel->sync_timestamps_enabled; always negative)
- * @rx_hash_key: Toeplitz hash key for RSS
- * @rx_indir_table: Indirection table for RSS
  * @rx_scatter: Scatter mode enabled for receives
- * @rss_active: RSS enabled on hardware
- * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ * @rss_context: Main RSS context.  Its @list member is the head of the list of
+ *     RSS contexts created by user requests
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
  * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
@@ -800,6 +825,8 @@ struct vfdi_status;
  * @mdio_bus: PHY MDIO bus ID (only used by Siena)
  * @phy_mode: PHY operating mode. Serialised by @mac_lock.
  * @link_advertising: Autonegotiation advertising flags
+ * @fec_config: Forward Error Correction configuration flags.  For bit positions
+ *     see &enum ethtool_fec_config_bits.
  * @link_state: Current state of the link
  * @n_link_state_changes: Number of times the link has changed state
  * @unicast_filter: Flag for Falcon-arch simple unicast filter.
@@ -909,11 +936,8 @@ struct efx_nic {
        int rx_packet_hash_offset;
        int rx_packet_len_offset;
        int rx_packet_ts_offset;
-       u8 rx_hash_key[40];
-       u32 rx_indir_table[128];
        bool rx_scatter;
-       bool rss_active;
-       bool rx_hash_udp_4tuple;
+       struct efx_rss_context rss_context;
 
        unsigned int_error_count;
        unsigned long int_error_expire;
@@ -955,6 +979,7 @@ struct efx_nic {
        enum efx_phy_mode phy_mode;
 
        __ETHTOOL_DECLARE_LINK_MODE_MASK(link_advertising);
+       u32 fec_config;
        struct efx_link_state link_state;
        unsigned int n_link_state_changes;
 
@@ -1099,6 +1124,10 @@ struct efx_udp_tunnel {
  * @tx_write: Write TX descriptors and doorbell
  * @rx_push_rss_config: Write RSS hash key and indirection table to the NIC
  * @rx_pull_rss_config: Read RSS hash key and indirection table back from the NIC
+ * @rx_push_rss_context_config: Write RSS hash key and indirection table for
+ *     user RSS context to the NIC
+ * @rx_pull_rss_context_config: Read RSS hash key and indirection table for user
+ *     RSS context back from the NIC
  * @rx_probe: Allocate resources for RX queue
  * @rx_init: Initialise RX queue on the NIC
  * @rx_remove: Free resources for RX queue
@@ -1237,6 +1266,13 @@ struct efx_nic_type {
        int (*rx_push_rss_config)(struct efx_nic *efx, bool user,
                                  const u32 *rx_indir_table, const u8 *key);
        int (*rx_pull_rss_config)(struct efx_nic *efx);
+       int (*rx_push_rss_context_config)(struct efx_nic *efx,
+                                         struct efx_rss_context *ctx,
+                                         const u32 *rx_indir_table,
+                                         const u8 *key);
+       int (*rx_pull_rss_context_config)(struct efx_nic *efx,
+                                         struct efx_rss_context *ctx);
+       void (*rx_restore_rss_contexts)(struct efx_nic *efx);
        int (*rx_probe)(struct efx_rx_queue *rx_queue);
        void (*rx_init)(struct efx_rx_queue *rx_queue);
        void (*rx_remove)(struct efx_rx_queue *rx_queue);
index 6549fc685a48f7c98e920da9634cc1b7d53ec087..d080a414e8f2d383a7f596e7222ff6861c92e5ec 100644 (file)
@@ -374,7 +374,6 @@ enum {
  * @piobuf_size: size of a single PIO buffer
  * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC
  *     reboot
- * @rx_rss_context: Firmware handle for our RSS context
  * @rx_rss_context_exclusive: Whether our RSS context is exclusive or shared
  * @stats: Hardware statistics
  * @workaround_35388: Flag: firmware supports workaround for bug 35388
@@ -415,7 +414,6 @@ struct efx_ef10_nic_data {
        unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT];
        u16 piobuf_size;
        bool must_restore_piobufs;
-       u32 rx_rss_context;
        bool rx_rss_context_exclusive;
        u64 stats[EF10_STAT_COUNT];
        bool workaround_35388;
index ae8645ae4492527269615da0fd95402ae203d21d..18aab25234baa6b0a05277049726842d4264806f 100644 (file)
@@ -350,11 +350,11 @@ static int siena_rx_pull_rss_config(struct efx_nic *efx)
         * siena_rx_push_rss_config, below)
         */
        efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
-       memcpy(efx->rx_hash_key, &temp, sizeof(temp));
+       memcpy(efx->rss_context.rx_hash_key, &temp, sizeof(temp));
        efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
-       memcpy(efx->rx_hash_key + sizeof(temp), &temp, sizeof(temp));
+       memcpy(efx->rss_context.rx_hash_key + sizeof(temp), &temp, sizeof(temp));
        efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
-       memcpy(efx->rx_hash_key + 2 * sizeof(temp), &temp,
+       memcpy(efx->rss_context.rx_hash_key + 2 * sizeof(temp), &temp,
               FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
        efx_farch_rx_pull_indir_table(efx);
        return 0;
@@ -367,26 +367,26 @@ static int siena_rx_push_rss_config(struct efx_nic *efx, bool user,
 
        /* Set hash key for IPv4 */
        if (key)
-               memcpy(efx->rx_hash_key, key, sizeof(temp));
-       memcpy(&temp, efx->rx_hash_key, sizeof(temp));
+               memcpy(efx->rss_context.rx_hash_key, key, sizeof(temp));
+       memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
        efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
 
        /* Enable IPv6 RSS */
-       BUILD_BUG_ON(sizeof(efx->rx_hash_key) <
+       BUILD_BUG_ON(sizeof(efx->rss_context.rx_hash_key) <
                     2 * sizeof(temp) + FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8 ||
                     FRF_CZ_RX_RSS_IPV6_TKEY_HI_LBN != 0);
-       memcpy(&temp, efx->rx_hash_key, sizeof(temp));
+       memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
        efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
-       memcpy(&temp, efx->rx_hash_key + sizeof(temp), sizeof(temp));
+       memcpy(&temp, efx->rss_context.rx_hash_key + sizeof(temp), sizeof(temp));
        efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
        EFX_POPULATE_OWORD_2(temp, FRF_CZ_RX_RSS_IPV6_THASH_ENABLE, 1,
                             FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE, 1);
-       memcpy(&temp, efx->rx_hash_key + 2 * sizeof(temp),
+       memcpy(&temp, efx->rss_context.rx_hash_key + 2 * sizeof(temp),
               FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
        efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
 
-       memcpy(efx->rx_indir_table, rx_indir_table,
-              sizeof(efx->rx_indir_table));
+       memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+              sizeof(efx->rss_context.rx_indir_table));
        efx_farch_rx_push_indir_table(efx);
 
        return 0;
@@ -432,8 +432,8 @@ static int siena_init_nic(struct efx_nic *efx)
                            EFX_RX_USR_BUF_SIZE >> 5);
        efx_writeo(efx, &temp, FR_AZ_RX_CFG);
 
-       siena_rx_push_rss_config(efx, false, efx->rx_indir_table, NULL);
-       efx->rss_active = true;
+       siena_rx_push_rss_config(efx, false, efx->rss_context.rx_indir_table, NULL);
+       efx->rss_context.context_id = 0; /* indicates RSS is active */
 
        /* Enable event logging */
        rc = efx_mcdi_log_ctrl(efx, true, false, 0);
index 012fb66eed8dd618d63fbeaad184accb0c08fc39..f0afb88d7bc2b02de3dc1054ec2ec5803f452a35 100644 (file)
@@ -2335,14 +2335,14 @@ static int smsc911x_drv_remove(struct platform_device *pdev)
        pdata = netdev_priv(dev);
        BUG_ON(!pdata);
        BUG_ON(!pdata->ioaddr);
-       WARN_ON(dev->phydev);
 
        SMSC_TRACE(pdata, ifdown, "Stopping driver");
 
+       unregister_netdev(dev);
+
        mdiobus_unregister(pdata->mii_bus);
        mdiobus_free(pdata->mii_bus);
 
-       unregister_netdev(dev);
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
                                           "smsc911x-memory");
        if (!res)
index 111e7ca9df5600c0ecd29bca11cf25a7f3e7e616..f5c5984afefb988c6c0ecd75234473f7084a2c32 100644 (file)
@@ -1295,7 +1295,7 @@ static int ave_open(struct net_device *ndev)
        val |= AVE_IIRQC_EN0 | (AVE_INTM_COUNT << 16);
        writel(val, priv->base + AVE_IIRQC);
 
-       val = AVE_GI_RXIINT | AVE_GI_RXOVF | AVE_GI_TX;
+       val = AVE_GI_RXIINT | AVE_GI_RXOVF | AVE_GI_TX | AVE_GI_RXDROP;
        ave_irq_restore(ndev, val);
 
        napi_enable(&priv->napi_rx);
index c728ffa095de04534e3e2e502cdc0d2a5eceff29..2a6521d33e4327ce420eb8fdaad1b09b05bdc615 100644 (file)
@@ -389,6 +389,8 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
 
 static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
 {
+       p->des0 = 0;
+       p->des1 = 0;
        p->des2 = 0;
        p->des3 = 0;
 }
index c8d86d77e03d1f47c56d45c613a8bf958c7fa0c2..a9856a8bf8ad28e5e00caa898c5fbeb5c57a9b64 100644 (file)
@@ -1844,6 +1844,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
                if (unlikely(status & tx_dma_own))
                        break;
 
+               /* Make sure descriptor fields are read after reading
+                * the own bit.
+                */
+               dma_rmb();
+
                /* Just consider the last segment and ...*/
                if (likely(!(status & tx_not_ls))) {
                        /* ... verify the status error condition */
@@ -2983,14 +2988,21 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                        tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
 
        /* If context desc is used to change MSS */
-       if (mss_desc)
+       if (mss_desc) {
+               /* Make sure that first descriptor has been completely
+                * written, including its own bit. This is because MSS is
+                * actually before first descriptor, so we need to make
+                * sure that MSS's own bit is the last thing written.
+                */
+               dma_wmb();
                priv->hw->desc->set_tx_owner(mss_desc);
+       }
 
        /* The own bit must be the latest setting done when prepare the
         * descriptor and then barrier is needed to make sure that
         * all is coherent before granting the DMA engine.
         */
-       dma_wmb();
+       wmb();
 
        if (netif_msg_pktdata(priv)) {
                pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
@@ -3214,7 +3226,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                 * descriptor and then barrier is needed to make sure that
                 * all is coherent before granting the DMA engine.
                 */
-               dma_wmb();
+               wmb();
        }
 
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
index 63d3d6b215f3096da6da95fe489bf1172c831a28..a94f50442613e9f77cec6aff24fbf19a5a33756b 100644 (file)
@@ -312,7 +312,7 @@ static struct vnet *vnet_new(const u64 *local_mac,
        dev->ethtool_ops = &vnet_ethtool_ops;
        dev->watchdog_timeo = VNET_TX_TIMEOUT;
 
-       dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE |
+       dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_ALL_TSO |
                           NETIF_F_HW_CSUM | NETIF_F_SG;
        dev->features = dev->hw_features;
 
index 1b1b78fdc1384975856fe6a3d8368fe868dfbc23..1b4af54a4968917d75e04865dd5a67fd40ec4823 100644 (file)
@@ -120,14 +120,18 @@ do {                                                              \
 #define CPDMA_RXCP             0x60
 
 #define CPSW_POLL_WEIGHT       64
+#define CPSW_RX_VLAN_ENCAP_HDR_SIZE            4
 #define CPSW_MIN_PACKET_SIZE   (VLAN_ETH_ZLEN)
-#define CPSW_MAX_PACKET_SIZE   (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+#define CPSW_MAX_PACKET_SIZE   (VLAN_ETH_FRAME_LEN +\
+                                ETH_FCS_LEN +\
+                                CPSW_RX_VLAN_ENCAP_HDR_SIZE)
 
 #define RX_PRIORITY_MAPPING    0x76543210
 #define TX_PRIORITY_MAPPING    0x33221100
 #define CPDMA_TX_PRIORITY_MAP  0x01234567
 
 #define CPSW_VLAN_AWARE                BIT(1)
+#define CPSW_RX_VLAN_ENCAP     BIT(2)
 #define CPSW_ALE_VLAN_AWARE    1
 
 #define CPSW_FIFO_NORMAL_MODE          (0 << 16)
@@ -148,6 +152,18 @@ do {                                                               \
 #define CPSW_MAX_QUEUES                8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
 
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT      29
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK                GENMASK(2, 0)
+#define CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT       16
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT  8
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK    GENMASK(1, 0)
+enum {
+       CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG = 0,
+       CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV,
+       CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG,
+       CPSW_RX_VLAN_ENCAP_HDR_PKT_UNTAG,
+};
+
 static int debug_level;
 module_param(debug_level, int, 0);
 MODULE_PARM_DESC(debug_level, "cpsw debug level (NETIF_MSG bits)");
@@ -718,6 +734,49 @@ static void cpsw_tx_handler(void *token, int len, int status)
        dev_kfree_skb_any(skb);
 }
 
+static void cpsw_rx_vlan_encap(struct sk_buff *skb)
+{
+       struct cpsw_priv *priv = netdev_priv(skb->dev);
+       struct cpsw_common *cpsw = priv->cpsw;
+       u32 rx_vlan_encap_hdr = *((u32 *)skb->data);
+       u16 vtag, vid, prio, pkt_type;
+
+       /* Remove VLAN header encapsulation word */
+       skb_pull(skb, CPSW_RX_VLAN_ENCAP_HDR_SIZE);
+
+       pkt_type = (rx_vlan_encap_hdr >>
+                   CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT) &
+                   CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK;
+       /* Ignore unknown & Priority-tagged packets*/
+       if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV ||
+           pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG)
+               return;
+
+       vid = (rx_vlan_encap_hdr >>
+              CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT) &
+              VLAN_VID_MASK;
+       /* Ignore vid 0 and pass packet as is */
+       if (!vid)
+               return;
+       /* Ignore default vlans in dual mac mode */
+       if (cpsw->data.dual_emac &&
+           vid == cpsw->slaves[priv->emac_port].port_vlan)
+               return;
+
+       prio = (rx_vlan_encap_hdr >>
+               CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT) &
+               CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK;
+
+       vtag = (prio << VLAN_PRIO_SHIFT) | vid;
+       __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
+
+       /* strip vlan tag for VLAN-tagged packet */
+       if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG) {
+               memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+               skb_pull(skb, VLAN_HLEN);
+       }
+}
+
 static void cpsw_rx_handler(void *token, int len, int status)
 {
        struct cpdma_chan       *ch;
@@ -752,6 +811,8 @@ static void cpsw_rx_handler(void *token, int len, int status)
        if (new_skb) {
                skb_copy_queue_mapping(new_skb, skb);
                skb_put(skb, len);
+               if (status & CPDMA_RX_VLAN_ENCAP)
+                       cpsw_rx_vlan_encap(skb);
                cpts_rx_timestamp(cpsw->cpts, skb);
                skb->protocol = eth_type_trans(skb, ndev);
                netif_receive_skb(skb);
@@ -1014,7 +1075,8 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
                /* set speed_in input in case RMII mode is used in 100Mbps */
                if (phy->speed == 100)
                        mac_control |= BIT(15);
-               else if (phy->speed == 10)
+               /* in band mode only works in 10Mbps RGMII mode */
+               else if ((phy->speed == 10) && phy_interface_is_rgmii(phy))
                        mac_control |= BIT(18); /* In Band mode */
 
                if (priv->rx_pause)
@@ -1406,7 +1468,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
        cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
                             CPSW_ALE_VLAN_AWARE);
        control_reg = readl(&cpsw->regs->control);
-       control_reg |= CPSW_VLAN_AWARE;
+       control_reg |= CPSW_VLAN_AWARE | CPSW_RX_VLAN_ENCAP;
        writel(control_reg, &cpsw->regs->control);
        fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
                     CPSW_FIFO_NORMAL_MODE;
@@ -3122,7 +3184,7 @@ static int cpsw_probe(struct platform_device *pdev)
                        cpsw->quirk_irq = true;
        }
 
-       ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+       ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
 
        ndev->netdev_ops = &cpsw_netdev_ops;
        ndev->ethtool_ops = &cpsw_ethtool_ops;
index 6f9173ff941495564a1567d5fe1c3b2962188857..31ae04117f0a2e174739c47b34379fb61c14c52e 100644 (file)
@@ -1164,7 +1164,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
                outlen -= CPDMA_DESC_CRC_LEN;
 
        status  = status & (CPDMA_DESC_EOQ | CPDMA_DESC_TD_COMPLETE |
-                           CPDMA_DESC_PORT_MASK);
+                           CPDMA_DESC_PORT_MASK | CPDMA_RX_VLAN_ENCAP);
 
        chan->head = desc_from_phys(pool, desc_read(desc, hw_next));
        chan_write(chan, cp, desc_dma);
index fd65ce2b83deb0d715cb397cad1b9c59c3f59d1d..d399af5389b8cee5946d2f0dbcf6685d236ab3ef 100644 (file)
@@ -19,6 +19,8 @@
 
 #define CPDMA_RX_SOURCE_PORT(__status__)       ((__status__ >> 16) & 0x7)
 
+#define CPDMA_RX_VLAN_ENCAP BIT(19)
+
 #define CPDMA_EOI_RX_THRESH    0x0
 #define CPDMA_EOI_RX           0x1
 #define CPDMA_EOI_TX           0x2
index b919e89a9b932b43fa8e006351e7db10242bd54f..516dd59249d79c61a569aca236fd14c8debc512b 100644 (file)
@@ -1694,6 +1694,7 @@ static struct pernet_operations geneve_net_ops = {
        .exit_batch = geneve_exit_batch_net,
        .id   = &geneve_net_id,
        .size = sizeof(struct geneve_net),
+       .async = true,
 };
 
 static int __init geneve_init_module(void)
index f38e32a7ec9c979ac4524c31e09da375a6e0606c..127edd23018f52294e7efccd6c06f85a178e4c01 100644 (file)
@@ -1325,6 +1325,7 @@ static struct pernet_operations gtp_net_ops = {
        .exit   = gtp_net_exit,
        .id     = &gtp_net_id,
        .size   = sizeof(struct gtp_net),
+       .async  = true,
 };
 
 static int __init gtp_init(void)
index c8a66827100cbb5460f07eba41ddee73346ab57a..3f25b9c8ea593c8d6e3f04170a52070488033d01 100644 (file)
@@ -1,3 +1,3 @@
 obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o
 
-hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o
+hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o
index 0db3bd1ea06f5a71eb6fc67ba00abb066c7f3414..960f061414722e50043c3a71d33757e737507a88 100644 (file)
@@ -173,6 +173,7 @@ struct rndis_device {
        struct list_head req_list;
 
        struct work_struct mcast_work;
+       u32 filter;
 
        bool link_state;        /* 0 - link up, 1 - link down */
 
@@ -211,7 +212,6 @@ void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
 
 void rndis_set_subchannel(struct work_struct *w);
-bool rndis_filter_opened(const struct netvsc_device *nvdev);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
 struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
@@ -793,6 +793,7 @@ struct netvsc_device {
 
        /* Receive buffer allocated by us but manages by NetVSP */
        void *recv_buf;
+       u32 recv_buf_size; /* allocated bytes */
        u32 recv_buf_gpadl_handle;
        u32 recv_section_cnt;
        u32 recv_section_size;
index 17e529af79dcd2664e19651c4065a8b4fec7b597..c9910c33e671f3f161f83249491d63e557274bbf 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/sync_bitops.h>
 
 #include "hyperv_net.h"
+#include "netvsc_trace.h"
 
 /*
  * Switch the data path from the synthetic interface to the VF
@@ -57,6 +58,8 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
                init_pkt->msg.v4_msg.active_dp.active_datapath =
                        NVSP_DATAPATH_SYNTHETIC;
 
+       trace_nvsp_send(ndev, init_pkt);
+
        vmbus_sendpacket(dev->channel, init_pkt,
                               sizeof(struct nvsp_message),
                               (unsigned long)init_pkt,
@@ -90,6 +93,11 @@ static void free_netvsc_device(struct rcu_head *head)
                = container_of(head, struct netvsc_device, rcu);
        int i;
 
+       kfree(nvdev->extension);
+       vfree(nvdev->recv_buf);
+       vfree(nvdev->send_buf);
+       kfree(nvdev->send_section_map);
+
        for (i = 0; i < VRSS_CHANNEL_MAX; i++)
                vfree(nvdev->chan_table[i].mrc.slots);
 
@@ -124,6 +132,8 @@ static void netvsc_revoke_buf(struct hv_device *device,
                revoke_packet->msg.v1_msg.
                revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 
+               trace_nvsp_send(ndev, revoke_packet);
+
                ret = vmbus_sendpacket(device->channel,
                                       revoke_packet,
                                       sizeof(struct nvsp_message),
@@ -164,6 +174,8 @@ static void netvsc_revoke_buf(struct hv_device *device,
                revoke_packet->msg.v1_msg.revoke_send_buf.id =
                        NETVSC_SEND_BUFFER_ID;
 
+               trace_nvsp_send(ndev, revoke_packet);
+
                ret = vmbus_sendpacket(device->channel,
                                       revoke_packet,
                                       sizeof(struct nvsp_message),
@@ -211,12 +223,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
                net_device->recv_buf_gpadl_handle = 0;
        }
 
-       if (net_device->recv_buf) {
-               /* Free up the receive buffer */
-               vfree(net_device->recv_buf);
-               net_device->recv_buf = NULL;
-       }
-
        if (net_device->send_buf_gpadl_handle) {
                ret = vmbus_teardown_gpadl(device->channel,
                                           net_device->send_buf_gpadl_handle);
@@ -231,12 +237,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
                }
                net_device->send_buf_gpadl_handle = 0;
        }
-       if (net_device->send_buf) {
-               /* Free up the send buffer */
-               vfree(net_device->send_buf);
-               net_device->send_buf = NULL;
-       }
-       kfree(net_device->send_section_map);
 }
 
 int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
@@ -282,6 +282,8 @@ static int netvsc_init_buf(struct hv_device *device,
                goto cleanup;
        }
 
+       net_device->recv_buf_size = buf_size;
+
        /*
         * Establish the gpadl handle for this buffer on this
         * channel.  Note: This call uses the vmbus connection rather
@@ -305,6 +307,8 @@ static int netvsc_init_buf(struct hv_device *device,
        init_packet->msg.v1_msg.
                send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 
+       trace_nvsp_send(ndev, init_packet);
+
        /* Send the gpadl notification request */
        ret = vmbus_sendpacket(device->channel, init_packet,
                               sizeof(struct nvsp_message),
@@ -384,6 +388,8 @@ static int netvsc_init_buf(struct hv_device *device,
                net_device->send_buf_gpadl_handle;
        init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
 
+       trace_nvsp_send(ndev, init_packet);
+
        /* Send the gpadl notification request */
        ret = vmbus_sendpacket(device->channel, init_packet,
                               sizeof(struct nvsp_message),
@@ -452,6 +458,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
        init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
        init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
 
+       trace_nvsp_send(ndev, init_packet);
+
        /* Send the init request */
        ret = vmbus_sendpacket(device->channel, init_packet,
                               sizeof(struct nvsp_message),
@@ -484,6 +492,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
                init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
        }
 
+       trace_nvsp_send(ndev, init_packet);
+
        ret = vmbus_sendpacket(device->channel, init_packet,
                                sizeof(struct nvsp_message),
                                (unsigned long)init_packet,
@@ -496,6 +506,7 @@ static int netvsc_connect_vsp(struct hv_device *device,
                              struct netvsc_device *net_device,
                              const struct netvsc_device_info *device_info)
 {
+       struct net_device *ndev = hv_get_drvdata(device);
        static const u32 ver_list[] = {
                NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
                NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
@@ -536,6 +547,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
                send_ndis_ver.ndis_minor_ver =
                                ndis_version & 0xFFFF;
 
+       trace_nvsp_send(ndev, init_packet);
+
        /* Send the init request */
        ret = vmbus_sendpacket(device->channel, init_packet,
                                sizeof(struct nvsp_message),
@@ -562,26 +575,29 @@ void netvsc_device_remove(struct hv_device *device)
                = rtnl_dereference(net_device_ctx->nvdev);
        int i;
 
-       cancel_work_sync(&net_device->subchan_work);
-
        netvsc_revoke_buf(device, net_device);
 
        RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 
+       /* And disassociate NAPI context from device */
+       for (i = 0; i < net_device->num_chn; i++)
+               netif_napi_del(&net_device->chan_table[i].napi);
+
        /*
         * At this point, no one should be accessing net_device
         * except in here
         */
        netdev_dbg(ndev, "net device safe to remove\n");
 
+       /* older versions require that buffer be revoked before close */
+       if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
+               netvsc_teardown_gpadl(device, net_device);
+
        /* Now, we can close the channel safely */
        vmbus_close(device->channel);
 
-       netvsc_teardown_gpadl(device, net_device);
-
-       /* And dissassociate NAPI context from device */
-       for (i = 0; i < net_device->num_chn; i++)
-               netif_napi_del(&net_device->chan_table[i].napi);
+       if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
+               netvsc_teardown_gpadl(device, net_device);
 
        /* Release all resources */
        free_netvsc_device_rcu(net_device);
@@ -645,14 +661,18 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
        queue_sends =
                atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
 
-       if (net_device->destroy && queue_sends == 0)
-               wake_up(&net_device->wait_drain);
+       if (unlikely(net_device->destroy)) {
+               if (queue_sends == 0)
+                       wake_up(&net_device->wait_drain);
+       } else {
+               struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
 
-       if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
-           (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
-            queue_sends < 1)) {
-               netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
-               ndev_ctx->eth_stats.wake_queue++;
+               if (netif_tx_queue_stopped(txq) &&
+                   (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
+                    queue_sends < 1)) {
+                       netif_tx_wake_queue(txq);
+                       ndev_ctx->eth_stats.wake_queue++;
+               }
        }
 }
 
@@ -747,7 +767,7 @@ static inline int netvsc_send_pkt(
        struct sk_buff *skb)
 {
        struct nvsp_message nvmsg;
-       struct nvsp_1_message_send_rndis_packet * const rpkt =
+       struct nvsp_1_message_send_rndis_packet *rpkt =
                &nvmsg.msg.v1_msg.send_rndis_pkt;
        struct netvsc_channel * const nvchan =
                &net_device->chan_table[packet->q_idx];
@@ -776,6 +796,8 @@ static inline int netvsc_send_pkt(
        if (out_channel->rescind)
                return -ENODEV;
 
+       trace_nvsp_send_pkt(ndev, out_channel, rpkt);
+
        if (packet->page_buf_cnt) {
                if (packet->cp_partial)
                        pb += packet->rmsg_pgcnt;
@@ -852,13 +874,6 @@ int netvsc_send(struct net_device *ndev,
        if (unlikely(!net_device || net_device->destroy))
                return -ENODEV;
 
-       /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
-        * here before the negotiation with the host is finished and
-        * send_section_map may not be allocated yet.
-        */
-       if (unlikely(!net_device->send_section_map))
-               return -EAGAIN;
-
        nvchan = &net_device->chan_table[packet->q_idx];
        packet->send_buf_index = NETVSC_INVALID_INDEX;
        packet->cp_partial = false;
@@ -866,10 +881,8 @@ int netvsc_send(struct net_device *ndev,
        /* Send control message directly without accessing msd (Multi-Send
         * Data) field which may be changed during data packet processing.
         */
-       if (!skb) {
-               cur_send = packet;
-               goto send_now;
-       }
+       if (!skb)
+               return netvsc_send_pkt(device, packet, net_device, pb, skb);
 
        /* batch packets in send buffer if possible */
        msdp = &nvchan->msd;
@@ -953,7 +966,6 @@ int netvsc_send(struct net_device *ndev,
                }
        }
 
-send_now:
        if (cur_send)
                ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
 
@@ -1085,13 +1097,30 @@ static int netvsc_receive(struct net_device *ndev,
 
        /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
        for (i = 0; i < count; i++) {
-               void *data = recv_buf
-                       + vmxferpage_packet->ranges[i].byte_offset;
+               u32 offset = vmxferpage_packet->ranges[i].byte_offset;
                u32 buflen = vmxferpage_packet->ranges[i].byte_count;
+               void *data;
+               int ret;
+
+               if (unlikely(offset + buflen > net_device->recv_buf_size)) {
+                       status = NVSP_STAT_FAIL;
+                       netif_err(net_device_ctx, rx_err, ndev,
+                                 "Packet offset:%u + len:%u too big\n",
+                                 offset, buflen);
+
+                       continue;
+               }
+
+               data = recv_buf + offset;
+
+               trace_rndis_recv(ndev, q_idx, data);
 
                /* Pass it to the upper layer */
-               status = rndis_filter_receive(ndev, net_device,
-                                             channel, data, buflen);
+               ret = rndis_filter_receive(ndev, net_device,
+                                          channel, data, buflen);
+
+               if (unlikely(ret != NVSP_STAT_SUCCESS))
+                       status = NVSP_STAT_FAIL;
        }
 
        enq_receive_complete(ndev, net_device, q_idx,
@@ -1153,6 +1182,8 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
        struct nvsp_message *nvmsg = hv_pkt_data(desc);
 
+       trace_nvsp_recv(ndev, channel, nvmsg);
+
        switch (desc->type) {
        case VM_PKT_COMP:
                netvsc_send_completion(net_device, channel, device,
@@ -1217,9 +1248,10 @@ int netvsc_poll(struct napi_struct *napi, int budget)
        if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
            work_done < budget &&
            napi_complete_done(napi, work_done) &&
-           hv_end_read(&channel->inbound)) {
+           hv_end_read(&channel->inbound) &&
+           napi_schedule_prep(napi)) {
                hv_begin_read(&channel->inbound);
-               napi_reschedule(napi);
+               __napi_schedule(napi);
        }
 
        /* Driver may overshoot since multiple packets per descriptor */
@@ -1242,7 +1274,7 @@ void netvsc_channel_cb(void *context)
                /* disable interupts from host */
                hv_begin_read(rbi);
 
-               __napi_schedule(&nvchan->napi);
+               __napi_schedule_irqoff(&nvchan->napi);
        }
 }
 
@@ -1296,7 +1328,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
                         netvsc_channel_cb, net_device->chan_table);
 
        if (ret != 0) {
-               netif_napi_del(&net_device->chan_table[0].napi);
                netdev_err(ndev, "unable to open channel: %d\n", ret);
                goto cleanup;
        }
@@ -1306,11 +1337,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
 
        napi_enable(&net_device->chan_table[0].napi);
 
-       /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
-        * populated.
-        */
-       rcu_assign_pointer(net_device_ctx->nvdev, net_device);
-
        /* Connect with the NetVsp */
        ret = netvsc_connect_vsp(device, net_device, device_info);
        if (ret != 0) {
@@ -1319,6 +1345,11 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
                goto close;
        }
 
+       /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
+        * populated.
+        */
+       rcu_assign_pointer(net_device_ctx->nvdev, net_device);
+
        return net_device;
 
 close:
@@ -1329,6 +1360,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
        vmbus_close(device->channel);
 
 cleanup:
+       netif_napi_del(&net_device->chan_table[0].napi);
        free_netvsc_device(&net_device->rcu);
 
        return ERR_PTR(ret);
index c5584c2d440e033b649193b39435d0a458aaf694..5d716750ae225483d98c378e732b40f6fafc5cba 100644 (file)
 
 #include "hyperv_net.h"
 
-#define RING_SIZE_MIN          64
+#define RING_SIZE_MIN  64
+#define RETRY_US_LO    5000
+#define RETRY_US_HI    10000
+#define RETRY_MAX      2000    /* >10 sec */
 
 #define LINKCHANGE_INT (2 * HZ)
 #define VF_TAKEOVER_INT (HZ / 10)
@@ -66,12 +69,43 @@ static int debug = -1;
 module_param(debug, int, S_IRUGO);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
-static void netvsc_set_multicast_list(struct net_device *net)
+static void netvsc_change_rx_flags(struct net_device *net, int change)
 {
-       struct net_device_context *net_device_ctx = netdev_priv(net);
-       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+       struct net_device_context *ndev_ctx = netdev_priv(net);
+       struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+       int inc;
+
+       if (!vf_netdev)
+               return;
+
+       if (change & IFF_PROMISC) {
+               inc = (net->flags & IFF_PROMISC) ? 1 : -1;
+               dev_set_promiscuity(vf_netdev, inc);
+       }
+
+       if (change & IFF_ALLMULTI) {
+               inc = (net->flags & IFF_ALLMULTI) ? 1 : -1;
+               dev_set_allmulti(vf_netdev, inc);
+       }
+}
 
-       rndis_filter_update(nvdev);
+static void netvsc_set_rx_mode(struct net_device *net)
+{
+       struct net_device_context *ndev_ctx = netdev_priv(net);
+       struct net_device *vf_netdev;
+       struct netvsc_device *nvdev;
+
+       rcu_read_lock();
+       vf_netdev = rcu_dereference(ndev_ctx->vf_netdev);
+       if (vf_netdev) {
+               dev_uc_sync(vf_netdev, net);
+               dev_mc_sync(vf_netdev, net);
+       }
+
+       nvdev = rcu_dereference(ndev_ctx->nvdev);
+       if (nvdev)
+               rndis_filter_update(nvdev);
+       rcu_read_unlock();
 }
 
 static int netvsc_open(struct net_device *net)
@@ -91,10 +125,7 @@ static int netvsc_open(struct net_device *net)
                return ret;
        }
 
-       netif_tx_wake_all_queues(net);
-
        rdev = nvdev->extension;
-
        if (!rdev->link_state)
                netif_carrier_on(net);
 
@@ -112,36 +143,25 @@ static int netvsc_open(struct net_device *net)
        return 0;
 }
 
-static int netvsc_close(struct net_device *net)
+static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
 {
-       struct net_device_context *net_device_ctx = netdev_priv(net);
-       struct net_device *vf_netdev
-               = rtnl_dereference(net_device_ctx->vf_netdev);
-       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
-       int ret = 0;
-       u32 aread, i, msec = 10, retry = 0, retry_max = 20;
-       struct vmbus_channel *chn;
-
-       netif_tx_disable(net);
-
-       /* No need to close rndis filter if it is removed already */
-       if (!nvdev)
-               goto out;
-
-       ret = rndis_filter_close(nvdev);
-       if (ret != 0) {
-               netdev_err(net, "unable to close device (ret %d).\n", ret);
-               return ret;
-       }
+       unsigned int retry = 0;
+       int i;
 
        /* Ensure pending bytes in ring are read */
-       while (true) {
-               aread = 0;
+       for (;;) {
+               u32 aread = 0;
+
                for (i = 0; i < nvdev->num_chn; i++) {
-                       chn = nvdev->chan_table[i].channel;
+                       struct vmbus_channel *chn
+                               = nvdev->chan_table[i].channel;
+
                        if (!chn)
                                continue;
 
+                       /* make sure receive not running now */
+                       napi_synchronize(&nvdev->chan_table[i].napi);
+
                        aread = hv_get_bytes_to_read(&chn->inbound);
                        if (aread)
                                break;
@@ -151,22 +171,40 @@ static int netvsc_close(struct net_device *net)
                                break;
                }
 
-               retry++;
-               if (retry > retry_max || aread == 0)
-                       break;
+               if (aread == 0)
+                       return 0;
 
-               msleep(msec);
+               if (++retry > RETRY_MAX)
+                       return -ETIMEDOUT;
 
-               if (msec < 1000)
-                       msec *= 2;
+               usleep_range(RETRY_US_LO, RETRY_US_HI);
        }
+}
 
-       if (aread) {
-               netdev_err(net, "Ring buffer not empty after closing rndis\n");
-               ret = -ETIMEDOUT;
+static int netvsc_close(struct net_device *net)
+{
+       struct net_device_context *net_device_ctx = netdev_priv(net);
+       struct net_device *vf_netdev
+               = rtnl_dereference(net_device_ctx->vf_netdev);
+       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+       int ret;
+
+       netif_tx_disable(net);
+
+       /* No need to close rndis filter if it is removed already */
+       if (!nvdev)
+               return 0;
+
+       ret = rndis_filter_close(nvdev);
+       if (ret != 0) {
+               netdev_err(net, "unable to close device (ret %d).\n", ret);
+               return ret;
        }
 
-out:
+       ret = netvsc_wait_until_empty(nvdev);
+       if (ret)
+               netdev_err(net, "Ring buffer not empty after closing rndis\n");
+
        if (vf_netdev)
                dev_close(vf_netdev);
 
@@ -299,8 +337,19 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
        rcu_read_lock();
        vf_netdev = rcu_dereference(ndc->vf_netdev);
        if (vf_netdev) {
-               txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
-               qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+               const struct net_device_ops *vf_ops = vf_netdev->netdev_ops;
+
+               if (vf_ops->ndo_select_queue)
+                       txq = vf_ops->ndo_select_queue(vf_netdev, skb,
+                                                      accel_priv, fallback);
+               else
+                       txq = fallback(vf_netdev, skb);
+
+               /* Record the queue selected by VF so that it can be
+                * used for common case where VF has more queues than
+                * the synthetic device.
+                */
+               qdisc_skb_cb(skb)->slave_dev_queue_mapping = txq;
        } else {
                txq = netvsc_pick_tx(ndev, skb);
        }
@@ -782,7 +831,7 @@ int netvsc_recv_callback(struct net_device *net,
        u64_stats_update_end(&rx_stats->syncp);
 
        napi_gro_receive(&nvchan->napi, skb);
-       return 0;
+       return NVSP_STAT_SUCCESS;
 }
 
 static void netvsc_get_drvinfo(struct net_device *net,
@@ -804,16 +853,81 @@ static void netvsc_get_channels(struct net_device *net,
        }
 }
 
+static int netvsc_detach(struct net_device *ndev,
+                        struct netvsc_device *nvdev)
+{
+       struct net_device_context *ndev_ctx = netdev_priv(ndev);
+       struct hv_device *hdev = ndev_ctx->device_ctx;
+       int ret;
+
+       /* Don't try continuing to try and setup sub channels */
+       if (cancel_work_sync(&nvdev->subchan_work))
+               nvdev->num_chn = 1;
+
+       /* If device was up (receiving) then shutdown */
+       if (netif_running(ndev)) {
+               netif_tx_disable(ndev);
+
+               ret = rndis_filter_close(nvdev);
+               if (ret) {
+                       netdev_err(ndev,
+                                  "unable to close device (ret %d).\n", ret);
+                       return ret;
+               }
+
+               ret = netvsc_wait_until_empty(nvdev);
+               if (ret) {
+                       netdev_err(ndev,
+                                  "Ring buffer not empty after closing rndis\n");
+                       return ret;
+               }
+       }
+
+       netif_device_detach(ndev);
+
+       rndis_filter_device_remove(hdev, nvdev);
+
+       return 0;
+}
+
+static int netvsc_attach(struct net_device *ndev,
+                        struct netvsc_device_info *dev_info)
+{
+       struct net_device_context *ndev_ctx = netdev_priv(ndev);
+       struct hv_device *hdev = ndev_ctx->device_ctx;
+       struct netvsc_device *nvdev;
+       struct rndis_device *rdev;
+       int ret;
+
+       nvdev = rndis_filter_device_add(hdev, dev_info);
+       if (IS_ERR(nvdev))
+               return PTR_ERR(nvdev);
+
+       /* Note: enable and attach happen when sub-channels setup */
+
+       netif_carrier_off(ndev);
+
+       if (netif_running(ndev)) {
+               ret = rndis_filter_open(nvdev);
+               if (ret)
+                       return ret;
+
+               rdev = nvdev->extension;
+               if (!rdev->link_state)
+                       netif_carrier_on(ndev);
+       }
+
+       return 0;
+}
+
 static int netvsc_set_channels(struct net_device *net,
                               struct ethtool_channels *channels)
 {
        struct net_device_context *net_device_ctx = netdev_priv(net);
-       struct hv_device *dev = net_device_ctx->device_ctx;
        struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
        unsigned int orig, count = channels->combined_count;
        struct netvsc_device_info device_info;
-       bool was_opened;
-       int ret = 0;
+       int ret;
 
        /* We do not support separate count for rx, tx, or other */
        if (count == 0 ||
@@ -830,9 +944,6 @@ static int netvsc_set_channels(struct net_device *net,
                return -EINVAL;
 
        orig = nvdev->num_chn;
-       was_opened = rndis_filter_opened(nvdev);
-       if (was_opened)
-               rndis_filter_close(nvdev);
 
        memset(&device_info, 0, sizeof(device_info));
        device_info.num_chn = count;
@@ -841,28 +952,17 @@ static int netvsc_set_channels(struct net_device *net,
        device_info.recv_sections = nvdev->recv_section_cnt;
        device_info.recv_section_size = nvdev->recv_section_size;
 
-       rndis_filter_device_remove(dev, nvdev);
+       ret = netvsc_detach(net, nvdev);
+       if (ret)
+               return ret;
 
-       nvdev = rndis_filter_device_add(dev, &device_info);
-       if (IS_ERR(nvdev)) {
-               ret = PTR_ERR(nvdev);
+       ret = netvsc_attach(net, &device_info);
+       if (ret) {
                device_info.num_chn = orig;
-               nvdev = rndis_filter_device_add(dev, &device_info);
-
-               if (IS_ERR(nvdev)) {
-                       netdev_err(net, "restoring channel setting failed: %ld\n",
-                                  PTR_ERR(nvdev));
-                       return ret;
-               }
+               if (netvsc_attach(net, &device_info))
+                       netdev_err(net, "restoring channel setting failed\n");
        }
 
-       if (was_opened)
-               rndis_filter_open(nvdev);
-
-       /* We may have missed link change notifications */
-       net_device_ctx->last_reconfig = 0;
-       schedule_delayed_work(&net_device_ctx->dwork, 0);
-
        return ret;
 }
 
@@ -928,10 +1028,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
        struct net_device_context *ndevctx = netdev_priv(ndev);
        struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
        struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
-       struct hv_device *hdev = ndevctx->device_ctx;
        int orig_mtu = ndev->mtu;
        struct netvsc_device_info device_info;
-       bool was_opened;
        int ret = 0;
 
        if (!nvdev || nvdev->destroy)
@@ -944,11 +1042,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
                        return ret;
        }
 
-       netif_device_detach(ndev);
-       was_opened = rndis_filter_opened(nvdev);
-       if (was_opened)
-               rndis_filter_close(nvdev);
-
        memset(&device_info, 0, sizeof(device_info));
        device_info.num_chn = nvdev->num_chn;
        device_info.send_sections = nvdev->send_section_cnt;
@@ -956,35 +1049,27 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
        device_info.recv_sections = nvdev->recv_section_cnt;
        device_info.recv_section_size = nvdev->recv_section_size;
 
-       rndis_filter_device_remove(hdev, nvdev);
+       ret = netvsc_detach(ndev, nvdev);
+       if (ret)
+               goto rollback_vf;
 
        ndev->mtu = mtu;
 
-       nvdev = rndis_filter_device_add(hdev, &device_info);
-       if (IS_ERR(nvdev)) {
-               ret = PTR_ERR(nvdev);
-
-               /* Attempt rollback to original MTU */
-               ndev->mtu = orig_mtu;
-               nvdev = rndis_filter_device_add(hdev, &device_info);
-
-               if (vf_netdev)
-                       dev_set_mtu(vf_netdev, orig_mtu);
-
-               if (IS_ERR(nvdev)) {
-                       netdev_err(ndev, "restoring mtu failed: %ld\n",
-                                  PTR_ERR(nvdev));
-                       return ret;
-               }
-       }
+       ret = netvsc_attach(ndev, &device_info);
+       if (ret)
+               goto rollback;
 
-       if (was_opened)
-               rndis_filter_open(nvdev);
+       return 0;
 
-       netif_device_attach(ndev);
+rollback:
+       /* Attempt rollback to original MTU */
+       ndev->mtu = orig_mtu;
 
-       /* We may have missed link change notifications */
-       schedule_delayed_work(&ndevctx->dwork, 0);
+       if (netvsc_attach(ndev, &device_info))
+               netdev_err(ndev, "restoring mtu failed\n");
+rollback_vf:
+       if (vf_netdev)
+               dev_set_mtu(vf_netdev, orig_mtu);
 
        return ret;
 }
@@ -1490,11 +1575,9 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 {
        struct net_device_context *ndevctx = netdev_priv(ndev);
        struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
-       struct hv_device *hdev = ndevctx->device_ctx;
        struct netvsc_device_info device_info;
        struct ethtool_ringparam orig;
        u32 new_tx, new_rx;
-       bool was_opened;
        int ret = 0;
 
        if (!nvdev || nvdev->destroy)
@@ -1519,34 +1602,18 @@ static int netvsc_set_ringparam(struct net_device *ndev,
        device_info.recv_sections = new_rx;
        device_info.recv_section_size = nvdev->recv_section_size;
 
-       netif_device_detach(ndev);
-       was_opened = rndis_filter_opened(nvdev);
-       if (was_opened)
-               rndis_filter_close(nvdev);
-
-       rndis_filter_device_remove(hdev, nvdev);
-
-       nvdev = rndis_filter_device_add(hdev, &device_info);
-       if (IS_ERR(nvdev)) {
-               ret = PTR_ERR(nvdev);
+       ret = netvsc_detach(ndev, nvdev);
+       if (ret)
+               return ret;
 
+       ret = netvsc_attach(ndev, &device_info);
+       if (ret) {
                device_info.send_sections = orig.tx_pending;
                device_info.recv_sections = orig.rx_pending;
-               nvdev = rndis_filter_device_add(hdev, &device_info);
-               if (IS_ERR(nvdev)) {
-                       netdev_err(ndev, "restoring ringparam failed: %ld\n",
-                                  PTR_ERR(nvdev));
-                       return ret;
-               }
-       }
 
-       if (was_opened)
-               rndis_filter_open(nvdev);
-       netif_device_attach(ndev);
-
-       /* We may have missed link change notifications */
-       ndevctx->last_reconfig = 0;
-       schedule_delayed_work(&ndevctx->dwork, 0);
+               if (netvsc_attach(ndev, &device_info))
+                       netdev_err(ndev, "restoring ringparam failed");
+       }
 
        return ret;
 }
@@ -1576,7 +1643,8 @@ static const struct net_device_ops device_ops = {
        .ndo_open =                     netvsc_open,
        .ndo_stop =                     netvsc_close,
        .ndo_start_xmit =               netvsc_start_xmit,
-       .ndo_set_rx_mode =              netvsc_set_multicast_list,
+       .ndo_change_rx_flags =          netvsc_change_rx_flags,
+       .ndo_set_rx_mode =              netvsc_set_rx_mode,
        .ndo_change_mtu =               netvsc_change_mtu,
        .ndo_validate_addr =            eth_validate_addr,
        .ndo_set_mac_address =          netvsc_set_mac_addr,
@@ -1807,6 +1875,15 @@ static void __netvsc_vf_setup(struct net_device *ndev,
                netdev_warn(vf_netdev,
                            "unable to change mtu to %u\n", ndev->mtu);
 
+       /* set multicast etc flags on VF */
+       dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE);
+
+       /* sync address list from ndev to VF */
+       netif_addr_lock_bh(ndev);
+       dev_uc_sync(vf_netdev, ndev);
+       dev_mc_sync(vf_netdev, ndev);
+       netif_addr_unlock_bh(ndev);
+
        if (netif_running(ndev)) {
                ret = dev_open(vf_netdev);
                if (ret)
@@ -2021,8 +2098,8 @@ static int netvsc_probe(struct hv_device *dev,
 static int netvsc_remove(struct hv_device *dev)
 {
        struct net_device_context *ndev_ctx;
-       struct net_device *vf_netdev;
-       struct net_device *net;
+       struct net_device *vf_netdev, *net;
+       struct netvsc_device *nvdev;
 
        net = hv_get_drvdata(dev);
        if (net == NULL) {
@@ -2032,10 +2109,14 @@ static int netvsc_remove(struct hv_device *dev)
 
        ndev_ctx = netdev_priv(net);
 
-       netif_device_detach(net);
-
        cancel_delayed_work_sync(&ndev_ctx->dwork);
 
+       rcu_read_lock();
+       nvdev = rcu_dereference(ndev_ctx->nvdev);
+
+       if  (nvdev)
+               cancel_work_sync(&nvdev->subchan_work);
+
        /*
         * Call to the vsc driver to let it know that the device is being
         * removed. Also blocks mtu and channel changes.
@@ -2045,11 +2126,13 @@ static int netvsc_remove(struct hv_device *dev)
        if (vf_netdev)
                netvsc_unregister_vf(vf_netdev);
 
+       if (nvdev)
+               rndis_filter_device_remove(dev, nvdev);
+
        unregister_netdevice(net);
 
-       rndis_filter_device_remove(dev,
-                                  rtnl_dereference(ndev_ctx->nvdev));
        rtnl_unlock();
+       rcu_read_unlock();
 
        hv_set_drvdata(dev, NULL);
 
diff --git a/drivers/net/hyperv/netvsc_trace.c b/drivers/net/hyperv/netvsc_trace.c
new file mode 100644 (file)
index 0000000..bb0ce5a
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/netdevice.h>
+
+#include "hyperv_net.h"
+
+#define CREATE_TRACE_POINTS
+#include "netvsc_trace.h"
diff --git a/drivers/net/hyperv/netvsc_trace.h b/drivers/net/hyperv/netvsc_trace.h
new file mode 100644 (file)
index 0000000..f758556
--- /dev/null
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if !defined(_NETVSC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _NETVSC_TRACE_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM netvsc
+#define TRACE_INCLUDE_FILE netvsc_trace
+
+TRACE_DEFINE_ENUM(RNDIS_MSG_PACKET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INDICATE);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INIT);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INIT_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_HALT);
+TRACE_DEFINE_ENUM(RNDIS_MSG_QUERY);
+TRACE_DEFINE_ENUM(RNDIS_MSG_QUERY_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_SET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_SET_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_RESET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_RESET_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_KEEPALIVE);
+TRACE_DEFINE_ENUM(RNDIS_MSG_KEEPALIVE_C);
+
+#define show_rndis_type(type)                                  \
+       __print_symbolic(type,                                  \
+                { RNDIS_MSG_PACKET,      "PACKET" },           \
+                { RNDIS_MSG_INDICATE,    "INDICATE", },        \
+                { RNDIS_MSG_INIT,        "INIT", },            \
+                { RNDIS_MSG_INIT_C,      "INIT_C", },          \
+                { RNDIS_MSG_HALT,        "HALT", },            \
+                { RNDIS_MSG_QUERY,       "QUERY", },           \
+                { RNDIS_MSG_QUERY_C,     "QUERY_C", },         \
+                { RNDIS_MSG_SET,         "SET", },             \
+                { RNDIS_MSG_SET_C,       "SET_C", },           \
+                { RNDIS_MSG_RESET,       "RESET", },           \
+                { RNDIS_MSG_RESET_C,     "RESET_C", },         \
+                { RNDIS_MSG_KEEPALIVE,   "KEEPALIVE", },       \
+                { RNDIS_MSG_KEEPALIVE_C, "KEEPALIVE_C", })
+
+DECLARE_EVENT_CLASS(rndis_msg_class,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+               const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg),
+       TP_STRUCT__entry(
+              __string( name, ndev->name  )
+              __field(  u16,  queue       )
+              __field(  u32,  req_id      )
+              __field(  u32,  msg_type    )
+              __field(  u32,  msg_len     )
+       ),
+       TP_fast_assign(
+              __assign_str(name, ndev->name);
+              __entry->queue    = q;
+              __entry->req_id   = msg->msg.init_req.req_id;
+              __entry->msg_type = msg->ndis_msg_type;
+              __entry->msg_len  = msg->msg_len;
+       ),
+       TP_printk("dev=%s q=%u req=%#x type=%s msg_len=%u",
+                __get_str(name), __entry->queue, __entry->req_id,
+                show_rndis_type(__entry->msg_type), __entry->msg_len)
+);
+
+DEFINE_EVENT(rndis_msg_class, rndis_send,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+               const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg)
+);
+
+DEFINE_EVENT(rndis_msg_class, rndis_recv,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+               const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg)
+);
+
+TRACE_DEFINE_ENUM(NVSP_MSG_TYPE_INIT);
+TRACE_DEFINE_ENUM(NVSP_MSG_TYPE_INIT_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_NDIS_VER);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RECV_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_REVOKE_RECV_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_SEND_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_REVOKE_SEND_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RNDIS_PKT);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG2_TYPE_SEND_NDIS_CONFIG);
+
+TRACE_DEFINE_ENUM(NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION);
+TRACE_DEFINE_ENUM(NVSP_MSG4_TYPE_SWITCH_DATA_PATH);
+
+TRACE_DEFINE_ENUM(NVSP_MSG5_TYPE_SUBCHANNEL);
+TRACE_DEFINE_ENUM(NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE);
+
+#define show_nvsp_type(type)                                                           \
+       __print_symbolic(type,                                                          \
+                 { NVSP_MSG_TYPE_INIT,                    "INIT" },                    \
+                 { NVSP_MSG_TYPE_INIT_COMPLETE,           "INIT_COMPLETE" },           \
+                 { NVSP_MSG1_TYPE_SEND_NDIS_VER,          "SEND_NDIS_VER" },           \
+                 { NVSP_MSG1_TYPE_SEND_RECV_BUF,          "SEND_RECV_BUF" },           \
+                 { NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE, "SEND_RECV_BUF_COMPLETE" },  \
+                 { NVSP_MSG1_TYPE_REVOKE_RECV_BUF,        "REVOKE_RECV_BUF" },         \
+                 { NVSP_MSG1_TYPE_SEND_SEND_BUF,          "SEND_SEND_BUF" },           \
+                 { NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE, "SEND_SEND_BUF_COMPLETE" },  \
+                 { NVSP_MSG1_TYPE_REVOKE_SEND_BUF,        "REVOKE_SEND_BUF" },         \
+                 { NVSP_MSG1_TYPE_SEND_RNDIS_PKT,         "SEND_RNDIS_PKT" },          \
+                 { NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, "SEND_RNDIS_PKT_COMPLETE" },\
+                 { NVSP_MSG2_TYPE_SEND_NDIS_CONFIG,       "SEND_NDIS_CONFIG" },        \
+                 { NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION,    "SEND_VF_ASSOCIATION" },     \
+                 { NVSP_MSG4_TYPE_SWITCH_DATA_PATH,       "SWITCH_DATA_PATH" },        \
+                 { NVSP_MSG5_TYPE_SUBCHANNEL,              "SUBCHANNEL" },             \
+                 { NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,  "SEND_INDIRECTION_TABLE" })
+
+TRACE_EVENT(nvsp_send,
+       TP_PROTO(const struct net_device *ndev,
+                const struct nvsp_message *msg),
+       TP_ARGS(ndev, msg),
+       TP_STRUCT__entry(
+               __string( name, ndev->name  )
+               __field(  u32,  msg_type    )
+       ),
+       TP_fast_assign(
+               __assign_str(name, ndev->name);
+               __entry->msg_type = msg->hdr.msg_type;
+       ),
+       TP_printk("dev=%s type=%s",
+                 __get_str(name),
+                 show_nvsp_type(__entry->msg_type))
+);
+
+TRACE_EVENT(nvsp_send_pkt,
+       TP_PROTO(const struct net_device *ndev,
+                const struct vmbus_channel *chan,
+                const struct nvsp_1_message_send_rndis_packet *rpkt),
+       TP_ARGS(ndev, chan, rpkt),
+       TP_STRUCT__entry(
+               __string( name, ndev->name    )
+               __field(  u16,  qid           )
+               __field(  u32,  channel_type  )
+               __field(  u32,  section_index )
+               __field(  u32,  section_size  )
+       ),
+       TP_fast_assign(
+               __assign_str(name, ndev->name);
+               __entry->qid = chan->offermsg.offer.sub_channel_index;
+               __entry->channel_type = rpkt->channel_type;
+               __entry->section_index = rpkt->send_buf_section_index;
+               __entry->section_size = rpkt->send_buf_section_size;
+       ),
+       TP_printk("dev=%s qid=%u type=%s section=%u size=%d",
+                 __get_str(name), __entry->qid,
+                 __entry->channel_type ? "CONTROL" : "DATA",
+                 __entry->section_index, __entry->section_size)
+);
+
+TRACE_EVENT(nvsp_recv,
+       TP_PROTO(const struct net_device *ndev,
+                const struct vmbus_channel *chan,
+                const struct nvsp_message *msg),
+       TP_ARGS(ndev, chan, msg),
+       TP_STRUCT__entry(
+               __string( name, ndev->name  )
+               __field(  u16,  qid         )
+               __field(  u32,  msg_type    )
+       ),
+       TP_fast_assign(
+               __assign_str(name, ndev->name);
+               __entry->qid = chan->offermsg.offer.sub_channel_index;
+               __entry->msg_type = msg->hdr.msg_type;
+       ),
+       TP_printk("dev=%s qid=%u type=%s",
+                 __get_str(name), __entry->qid,
+                 show_nvsp_type(__entry->msg_type))
+);
+
+#endif /* _NETVSC_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/net/hyperv
+#include <trace/define_trace.h>
index c3ca191fea7fe6bc629ae45be0759cdf1f062956..4a4952363e8a6cfb4057fcc52eb7d2163deaaa91 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/rtnetlink.h>
 
 #include "hyperv_net.h"
+#include "netvsc_trace.h"
 
 static void rndis_set_multicast(struct work_struct *w);
 
@@ -241,6 +242,8 @@ static int rndis_filter_send_request(struct rndis_device *dev,
                        pb[0].len;
        }
 
+       trace_rndis_send(dev->ndev, 0, &req->request_msg);
+
        rcu_read_lock_bh();
        ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL);
        rcu_read_unlock_bh();
@@ -264,13 +267,23 @@ static void rndis_set_link_state(struct rndis_device *rdev,
        }
 }
 
-static void rndis_filter_receive_response(struct rndis_device *dev,
-                                      struct rndis_message *resp)
+static void rndis_filter_receive_response(struct net_device *ndev,
+                                         struct netvsc_device *nvdev,
+                                         const struct rndis_message *resp)
 {
+       struct rndis_device *dev = nvdev->extension;
        struct rndis_request *request = NULL;
        bool found = false;
        unsigned long flags;
-       struct net_device *ndev = dev->ndev;
+
+       /* This should never happen, it means control message
+        * response received after device removed.
+        */
+       if (dev->state == RNDIS_DEV_UNINITIALIZED) {
+               netdev_err(ndev,
+                          "got rndis message uninitialized\n");
+               return;
+       }
 
        spin_lock_irqsave(&dev->request_lock, flags);
        list_for_each_entry(request, &dev->req_list, list_ent) {
@@ -352,7 +365,6 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
 
 static int rndis_filter_receive_data(struct net_device *ndev,
                                     struct netvsc_device *nvdev,
-                                    struct rndis_device *dev,
                                     struct rndis_message *msg,
                                     struct vmbus_channel *channel,
                                     void *data, u32 data_buflen)
@@ -372,7 +384,7 @@ static int rndis_filter_receive_data(struct net_device *ndev,
         * should be the data packet size plus the trailer padding size
         */
        if (unlikely(data_buflen < rndis_pkt->data_len)) {
-               netdev_err(dev->ndev, "rndis message buffer "
+               netdev_err(ndev, "rndis message buffer "
                           "overflow detected (got %u, min %u)"
                           "...dropping this message!\n",
                           data_buflen, rndis_pkt->data_len);
@@ -400,35 +412,20 @@ int rndis_filter_receive(struct net_device *ndev,
                         void *data, u32 buflen)
 {
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
-       struct rndis_device *rndis_dev = net_dev->extension;
        struct rndis_message *rndis_msg = data;
 
-       /* Make sure the rndis device state is initialized */
-       if (unlikely(!rndis_dev)) {
-               netif_dbg(net_device_ctx, rx_err, ndev,
-                         "got rndis message but no rndis device!\n");
-               return NVSP_STAT_FAIL;
-       }
-
-       if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
-               netif_dbg(net_device_ctx, rx_err, ndev,
-                         "got rndis message uninitialized\n");
-               return NVSP_STAT_FAIL;
-       }
-
        if (netif_msg_rx_status(net_device_ctx))
                dump_rndis_message(ndev, rndis_msg);
 
        switch (rndis_msg->ndis_msg_type) {
        case RNDIS_MSG_PACKET:
-               return rndis_filter_receive_data(ndev, net_dev,
-                                                rndis_dev, rndis_msg,
+               return rndis_filter_receive_data(ndev, net_dev, rndis_msg,
                                                 channel, data, buflen);
        case RNDIS_MSG_INIT_C:
        case RNDIS_MSG_QUERY_C:
        case RNDIS_MSG_SET_C:
                /* completion msgs */
-               rndis_filter_receive_response(rndis_dev, rndis_msg);
+               rndis_filter_receive_response(ndev, net_dev, rndis_msg);
                break;
 
        case RNDIS_MSG_INDICATE:
@@ -440,10 +437,10 @@ int rndis_filter_receive(struct net_device *ndev,
                        "unhandled rndis message (type %u len %u)\n",
                           rndis_msg->ndis_msg_type,
                           rndis_msg->msg_len);
-               break;
+               return NVSP_STAT_FAIL;
        }
 
-       return 0;
+       return NVSP_STAT_SUCCESS;
 }
 
 static int rndis_filter_query_device(struct rndis_device *dev,
@@ -825,13 +822,15 @@ static int rndis_filter_set_packet_filter(struct rndis_device *dev,
        struct rndis_set_request *set;
        int ret;
 
+       if (dev->filter == new_filter)
+               return 0;
+
        request = get_rndis_request(dev, RNDIS_MSG_SET,
                        RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
                        sizeof(u32));
        if (!request)
                return -ENOMEM;
 
-
        /* Setup the rndis set */
        set = &request->request_msg.msg.set_req;
        set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
@@ -842,8 +841,10 @@ static int rndis_filter_set_packet_filter(struct rndis_device *dev,
               &new_filter, sizeof(u32));
 
        ret = rndis_filter_send_request(dev, request);
-       if (ret == 0)
+       if (ret == 0) {
                wait_for_completion(&request->wait_event);
+               dev->filter = new_filter;
+       }
 
        put_rndis_request(dev, request);
 
@@ -854,15 +855,19 @@ static void rndis_set_multicast(struct work_struct *w)
 {
        struct rndis_device *rdev
                = container_of(w, struct rndis_device, mcast_work);
+       u32 filter = NDIS_PACKET_TYPE_DIRECTED;
+       unsigned int flags = rdev->ndev->flags;
 
-       if (rdev->ndev->flags & IFF_PROMISC)
-               rndis_filter_set_packet_filter(rdev,
-                                              NDIS_PACKET_TYPE_PROMISCUOUS);
-       else
-               rndis_filter_set_packet_filter(rdev,
-                                              NDIS_PACKET_TYPE_BROADCAST |
-                                              NDIS_PACKET_TYPE_ALL_MULTICAST |
-                                              NDIS_PACKET_TYPE_DIRECTED);
+       if (flags & IFF_PROMISC) {
+               filter = NDIS_PACKET_TYPE_PROMISCUOUS;
+       } else {
+               if (flags & IFF_ALLMULTI)
+                       filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
+               if (flags & IFF_BROADCAST)
+                       filter |= NDIS_PACKET_TYPE_BROADCAST;
+       }
+
+       rndis_filter_set_packet_filter(rdev, filter);
 }
 
 void rndis_filter_update(struct netvsc_device *nvdev)
@@ -940,12 +945,11 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev)
        return true;
 }
 
-static void rndis_filter_halt_device(struct rndis_device *dev)
+static void rndis_filter_halt_device(struct netvsc_device *nvdev,
+                                    struct rndis_device *dev)
 {
        struct rndis_request *request;
        struct rndis_halt_request *halt;
-       struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
        /* Attempt to do a rndis device halt */
        request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -1084,6 +1088,8 @@ void rndis_set_subchannel(struct work_struct *w)
        init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
        init_packet->msg.v5_msg.subchn_req.num_subchannels =
                                                nvdev->num_chn - 1;
+       trace_nvsp_send(ndev, init_packet);
+
        ret = vmbus_sendpacket(hv_dev->channel, init_packet,
                               sizeof(struct nvsp_message),
                               (unsigned long)init_packet,
@@ -1116,6 +1122,7 @@ void rndis_set_subchannel(struct work_struct *w)
        for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
                ndev_ctx->tx_table[i] = i % nvdev->num_chn;
 
+       netif_device_attach(ndev);
        rtnl_unlock();
        return;
 
@@ -1126,6 +1133,8 @@ void rndis_set_subchannel(struct work_struct *w)
 
        nvdev->max_chn = 1;
        nvdev->num_chn = 1;
+
+       netif_device_attach(ndev);
 unlock:
        rtnl_unlock();
 }
@@ -1328,6 +1337,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
                net_device->num_chn = 1;
        }
 
+       /* No sub channels, device is ready */
+       if (net_device->num_chn == 1)
+               netif_device_attach(net);
+
        return net_device;
 
 err_dev_remv:
@@ -1341,12 +1354,11 @@ void rndis_filter_device_remove(struct hv_device *dev,
        struct rndis_device *rndis_dev = net_dev->extension;
 
        /* Halt and release the rndis device */
-       rndis_filter_halt_device(rndis_dev);
+       rndis_filter_halt_device(net_dev, rndis_dev);
 
        net_dev->extension = NULL;
 
        netvsc_device_remove(dev);
-       kfree(rndis_dev);
 }
 
 int rndis_filter_open(struct netvsc_device *nvdev)
@@ -1364,10 +1376,3 @@ int rndis_filter_close(struct netvsc_device *nvdev)
 
        return rndis_filter_close_device(nvdev->extension);
 }
-
-bool rndis_filter_opened(const struct netvsc_device *nvdev)
-{
-       const struct rndis_device *dev = nvdev->extension;
-
-       return dev->state == RNDIS_DEV_DATAINITIALIZED;
-}
index 303ba413392003047e208db65f7df85d382dd785..8782f5655e3ff170c1d2e032455f769f66512140 100644 (file)
@@ -104,3 +104,14 @@ config IEEE802154_CA8210_DEBUGFS
          exposes a debugfs node for each CA8210 instance which allows
          direct use of the Cascoda API, exposing the 802.15.4 MAC
          management entities.
+
+config IEEE802154_MCR20A
+       tristate "MCR20A transceiver driver"
+       depends on IEEE802154_DRIVERS && MAC802154
+       depends on SPI
+       ---help---
+         Say Y here to enable the MCR20A SPI 802.15.4 wireless
+         controller.
+
+         This driver can also be built as a module. To do so, say M here.
+         the module will be called 'mcr20a'.
index bea1de5e726c7fed4088d0ab7b4ba27ef62da69f..104744d5a668d1e255835d4d4492008928a5fd43 100644 (file)
@@ -6,3 +6,4 @@ obj-$(CONFIG_IEEE802154_CC2520) += cc2520.o
 obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o
 obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o
 obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o
+obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
new file mode 100644 (file)
index 0000000..d9eb22a
--- /dev/null
@@ -0,0 +1,1413 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/ieee802154.h>
+#include <linux/debugfs.h>
+
+#include <net/mac802154.h>
+#include <net/cfg802154.h>
+
+#include <linux/device.h>
+
+#include "mcr20a.h"
+
+#define        SPI_COMMAND_BUFFER              3
+
+#define REGISTER_READ                  BIT(7)
+#define REGISTER_WRITE                 (0 << 7)
+#define REGISTER_ACCESS                        (0 << 6)
+#define PACKET_BUFF_BURST_ACCESS       BIT(6)
+#define PACKET_BUFF_BYTE_ACCESS                BIT(5)
+
+#define MCR20A_WRITE_REG(x)            (x)
+#define MCR20A_READ_REG(x)             (REGISTER_READ | (x))
+#define MCR20A_BURST_READ_PACKET_BUF   (0xC0)
+#define MCR20A_BURST_WRITE_PACKET_BUF  (0x40)
+
+#define MCR20A_CMD_REG         0x80
+#define MCR20A_CMD_REG_MASK    0x3f
+#define MCR20A_CMD_WRITE       0x40
+#define MCR20A_CMD_FB          0x20
+
+/* Number of Interrupt Request Status Register */
+#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */
+
+/* MCR20A CCA Type */
+enum {
+       MCR20A_CCA_ED,    // energy detect - CCA bit not active,
+                         // not to be used for T and CCCA sequences
+       MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE
+       MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE
+       MCR20A_CCA_MODE3
+};
+
+enum {
+       MCR20A_XCVSEQ_IDLE      = 0x00,
+       MCR20A_XCVSEQ_RX        = 0x01,
+       MCR20A_XCVSEQ_TX        = 0x02,
+       MCR20A_XCVSEQ_CCA       = 0x03,
+       MCR20A_XCVSEQ_TR        = 0x04,
+       MCR20A_XCVSEQ_CCCA      = 0x05,
+};
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define        MCR20A_MIN_CHANNEL      (11)
+#define        MCR20A_MAX_CHANNEL      (26)
+#define        MCR20A_CHANNEL_SPACING  (5)
+
+/* MCR20A CCA Threshold constans */
+#define MCR20A_MIN_CCA_THRESHOLD (0x6EU)
+#define MCR20A_MAX_CCA_THRESHOLD (0x00U)
+
+/* version 0C */
+#define MCR20A_OVERWRITE_VERSION (0x0C)
+
+/* MCR20A PLL configurations */
+static const u8  PLL_INT[16] = {
+       /* 2405 */ 0x0B,        /* 2410 */ 0x0B,        /* 2415 */ 0x0B,
+       /* 2420 */ 0x0B,        /* 2425 */ 0x0B,        /* 2430 */ 0x0B,
+       /* 2435 */ 0x0C,        /* 2440 */ 0x0C,        /* 2445 */ 0x0C,
+       /* 2450 */ 0x0C,        /* 2455 */ 0x0C,        /* 2460 */ 0x0C,
+       /* 2465 */ 0x0D,        /* 2470 */ 0x0D,        /* 2475 */ 0x0D,
+       /* 2480 */ 0x0D
+};
+
+static const u8 PLL_FRAC[16] = {
+       /* 2405 */ 0x28,        /* 2410 */ 0x50,        /* 2415 */ 0x78,
+       /* 2420 */ 0xA0,        /* 2425 */ 0xC8,        /* 2430 */ 0xF0,
+       /* 2435 */ 0x18,        /* 2440 */ 0x40,        /* 2445 */ 0x68,
+       /* 2450 */ 0x90,        /* 2455 */ 0xB8,        /* 2460 */ 0xE0,
+       /* 2465 */ 0x08,        /* 2470 */ 0x30,        /* 2475 */ 0x58,
+       /* 2480 */ 0x80
+};
+
+static const struct reg_sequence mar20a_iar_overwrites[] = {
+       { IAR_MISC_PAD_CTRL,    0x02 },
+       { IAR_VCO_CTRL1,        0xB3 },
+       { IAR_VCO_CTRL2,        0x07 },
+       { IAR_PA_TUNING,        0x71 },
+       { IAR_CHF_IBUF,         0x2F },
+       { IAR_CHF_QBUF,         0x2F },
+       { IAR_CHF_IRIN,         0x24 },
+       { IAR_CHF_QRIN,         0x24 },
+       { IAR_CHF_IL,           0x24 },
+       { IAR_CHF_QL,           0x24 },
+       { IAR_CHF_CC1,          0x32 },
+       { IAR_CHF_CCL,          0x1D },
+       { IAR_CHF_CC2,          0x2D },
+       { IAR_CHF_IROUT,        0x24 },
+       { IAR_CHF_QROUT,        0x24 },
+       { IAR_PA_CAL,           0x28 },
+       { IAR_AGC_THR1,         0x55 },
+       { IAR_AGC_THR2,         0x2D },
+       { IAR_ATT_RSSI1,        0x5F },
+       { IAR_ATT_RSSI2,        0x8F },
+       { IAR_RSSI_OFFSET,      0x61 },
+       { IAR_CHF_PMA_GAIN,     0x03 },
+       { IAR_CCA1_THRESH,      0x50 },
+       { IAR_CORR_NVAL,        0x13 },
+       { IAR_ACKDELAY,         0x3D },
+};
+
+#define MCR20A_VALID_CHANNELS (0x07FFF800)
+
+struct mcr20a_platform_data {
+       int rst_gpio;
+};
+
+#define MCR20A_MAX_BUF         (127)
+
+#define printdev(X) (&X->spi->dev)
+
+/* regmap information for Direct Access Register (DAR) access */
+#define MCR20A_DAR_WRITE       0x01
+#define MCR20A_DAR_READ                0x00
+#define MCR20A_DAR_NUMREGS     0x3F
+
+/* regmap information for Indirect Access Register (IAR) access */
+#define MCR20A_IAR_ACCESS      0x80
+#define MCR20A_IAR_NUMREGS     0xBEFF
+
+/* Read/Write SPI Commands for DAR and IAR registers. */
+#define MCR20A_READSHORT(reg)  ((reg) << 1)
+#define MCR20A_WRITESHORT(reg) ((reg) << 1 | 1)
+#define MCR20A_READLONG(reg)   (1 << 15 | (reg) << 5)
+#define MCR20A_WRITELONG(reg)  (1 << 15 | (reg) << 5 | 1 << 4)
+
+/* Type definitions for link configuration of instantiable layers  */
+#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12)
+
+static bool
+mcr20a_dar_writeable(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case DAR_IRQ_STS1:
+       case DAR_IRQ_STS2:
+       case DAR_IRQ_STS3:
+       case DAR_PHY_CTRL1:
+       case DAR_PHY_CTRL2:
+       case DAR_PHY_CTRL3:
+       case DAR_PHY_CTRL4:
+       case DAR_SRC_CTRL:
+       case DAR_SRC_ADDRS_SUM_LSB:
+       case DAR_SRC_ADDRS_SUM_MSB:
+       case DAR_T3CMP_LSB:
+       case DAR_T3CMP_MSB:
+       case DAR_T3CMP_USB:
+       case DAR_T2PRIMECMP_LSB:
+       case DAR_T2PRIMECMP_MSB:
+       case DAR_T1CMP_LSB:
+       case DAR_T1CMP_MSB:
+       case DAR_T1CMP_USB:
+       case DAR_T2CMP_LSB:
+       case DAR_T2CMP_MSB:
+       case DAR_T2CMP_USB:
+       case DAR_T4CMP_LSB:
+       case DAR_T4CMP_MSB:
+       case DAR_T4CMP_USB:
+       case DAR_PLL_INT0:
+       case DAR_PLL_FRAC0_LSB:
+       case DAR_PLL_FRAC0_MSB:
+       case DAR_PA_PWR:
+       /* no DAR_ACM */
+       case DAR_OVERWRITE_VER:
+       case DAR_CLK_OUT_CTRL:
+       case DAR_PWR_MODES:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_dar_readable(struct device *dev, unsigned int reg)
+{
+       bool rc;
+
+       /* all writeable are also readable */
+       rc = mcr20a_dar_writeable(dev, reg);
+       if (rc)
+               return rc;
+
+       /* readonly regs */
+       switch (reg) {
+       case DAR_RX_FRM_LEN:
+       case DAR_CCA1_ED_FNL:
+       case DAR_EVENT_TMR_LSB:
+       case DAR_EVENT_TMR_MSB:
+       case DAR_EVENT_TMR_USB:
+       case DAR_TIMESTAMP_LSB:
+       case DAR_TIMESTAMP_MSB:
+       case DAR_TIMESTAMP_USB:
+       case DAR_SEQ_STATE:
+       case DAR_LQI_VALUE:
+       case DAR_RSSI_CCA_CONT:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_dar_volatile(struct device *dev, unsigned int reg)
+{
+       /* can be changed during runtime */
+       switch (reg) {
+       case DAR_IRQ_STS1:
+       case DAR_IRQ_STS2:
+       case DAR_IRQ_STS3:
+       /* use them in spi_async and regmap so it's volatile */
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_dar_precious(struct device *dev, unsigned int reg)
+{
+       /* don't clear irq line on read */
+       switch (reg) {
+       case DAR_IRQ_STS1:
+       case DAR_IRQ_STS2:
+       case DAR_IRQ_STS3:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static const struct regmap_config mcr20a_dar_regmap = {
+       .name                   = "mcr20a_dar",
+       .reg_bits               = 8,
+       .val_bits               = 8,
+       .write_flag_mask        = REGISTER_ACCESS | REGISTER_WRITE,
+       .read_flag_mask         = REGISTER_ACCESS | REGISTER_READ,
+       .cache_type             = REGCACHE_RBTREE,
+       .writeable_reg          = mcr20a_dar_writeable,
+       .readable_reg           = mcr20a_dar_readable,
+       .volatile_reg           = mcr20a_dar_volatile,
+       .precious_reg           = mcr20a_dar_precious,
+       .fast_io                = true,
+       .can_multi_write        = true,
+};
+
+static bool
+mcr20a_iar_writeable(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case IAR_XTAL_TRIM:
+       case IAR_PMC_LP_TRIM:
+       case IAR_MACPANID0_LSB:
+       case IAR_MACPANID0_MSB:
+       case IAR_MACSHORTADDRS0_LSB:
+       case IAR_MACSHORTADDRS0_MSB:
+       case IAR_MACLONGADDRS0_0:
+       case IAR_MACLONGADDRS0_8:
+       case IAR_MACLONGADDRS0_16:
+       case IAR_MACLONGADDRS0_24:
+       case IAR_MACLONGADDRS0_32:
+       case IAR_MACLONGADDRS0_40:
+       case IAR_MACLONGADDRS0_48:
+       case IAR_MACLONGADDRS0_56:
+       case IAR_RX_FRAME_FILTER:
+       case IAR_PLL_INT1:
+       case IAR_PLL_FRAC1_LSB:
+       case IAR_PLL_FRAC1_MSB:
+       case IAR_MACPANID1_LSB:
+       case IAR_MACPANID1_MSB:
+       case IAR_MACSHORTADDRS1_LSB:
+       case IAR_MACSHORTADDRS1_MSB:
+       case IAR_MACLONGADDRS1_0:
+       case IAR_MACLONGADDRS1_8:
+       case IAR_MACLONGADDRS1_16:
+       case IAR_MACLONGADDRS1_24:
+       case IAR_MACLONGADDRS1_32:
+       case IAR_MACLONGADDRS1_40:
+       case IAR_MACLONGADDRS1_48:
+       case IAR_MACLONGADDRS1_56:
+       case IAR_DUAL_PAN_CTRL:
+       case IAR_DUAL_PAN_DWELL:
+       case IAR_CCA1_THRESH:
+       case IAR_CCA1_ED_OFFSET_COMP:
+       case IAR_LQI_OFFSET_COMP:
+       case IAR_CCA_CTRL:
+       case IAR_CCA2_CORR_PEAKS:
+       case IAR_CCA2_CORR_THRESH:
+       case IAR_TMR_PRESCALE:
+       case IAR_ANT_PAD_CTRL:
+       case IAR_MISC_PAD_CTRL:
+       case IAR_BSM_CTRL:
+       case IAR_RNG:
+       case IAR_RX_WTR_MARK:
+       case IAR_SOFT_RESET:
+       case IAR_TXDELAY:
+       case IAR_ACKDELAY:
+       case IAR_CORR_NVAL:
+       case IAR_ANT_AGC_CTRL:
+       case IAR_AGC_THR1:
+       case IAR_AGC_THR2:
+       case IAR_PA_CAL:
+       case IAR_ATT_RSSI1:
+       case IAR_ATT_RSSI2:
+       case IAR_RSSI_OFFSET:
+       case IAR_XTAL_CTRL:
+       case IAR_CHF_PMA_GAIN:
+       case IAR_CHF_IBUF:
+       case IAR_CHF_QBUF:
+       case IAR_CHF_IRIN:
+       case IAR_CHF_QRIN:
+       case IAR_CHF_IL:
+       case IAR_CHF_QL:
+       case IAR_CHF_CC1:
+       case IAR_CHF_CCL:
+       case IAR_CHF_CC2:
+       case IAR_CHF_IROUT:
+       case IAR_CHF_QROUT:
+       case IAR_PA_TUNING:
+       case IAR_VCO_CTRL1:
+       case IAR_VCO_CTRL2:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_iar_readable(struct device *dev, unsigned int reg)
+{
+       bool rc;
+
+       /* all writeable are also readable */
+       rc = mcr20a_iar_writeable(dev, reg);
+       if (rc)
+               return rc;
+
+       /* readonly regs */
+       switch (reg) {
+       case IAR_PART_ID:
+       case IAR_DUAL_PAN_STS:
+       case IAR_RX_BYTE_COUNT:
+       case IAR_FILTERFAIL_CODE1:
+       case IAR_FILTERFAIL_CODE2:
+       case IAR_RSSI:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool
+mcr20a_iar_volatile(struct device *dev, unsigned int reg)
+{
+/* can be changed during runtime */
+       switch (reg) {
+       case IAR_DUAL_PAN_STS:
+       case IAR_RX_BYTE_COUNT:
+       case IAR_FILTERFAIL_CODE1:
+       case IAR_FILTERFAIL_CODE2:
+       case IAR_RSSI:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static const struct regmap_config mcr20a_iar_regmap = {
+       .name                   = "mcr20a_iar",
+       .reg_bits               = 16,
+       .val_bits               = 8,
+       .write_flag_mask        = REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
+       .read_flag_mask         = REGISTER_ACCESS | REGISTER_READ  | IAR_INDEX,
+       .cache_type             = REGCACHE_RBTREE,
+       .writeable_reg          = mcr20a_iar_writeable,
+       .readable_reg           = mcr20a_iar_readable,
+       .volatile_reg           = mcr20a_iar_volatile,
+       .fast_io                = true,
+};
+
+struct mcr20a_local {
+       struct spi_device *spi;
+
+       struct ieee802154_hw *hw;
+       struct mcr20a_platform_data *pdata;
+       struct regmap *regmap_dar;
+       struct regmap *regmap_iar;
+
+       u8 *buf;
+
+       bool is_tx;
+
+       /* for writing tx buffer */
+       struct spi_message tx_buf_msg;
+       u8 tx_header[1];
+       /* burst buffer write command */
+       struct spi_transfer tx_xfer_header;
+       u8 tx_len[1];
+       /* len of tx packet */
+       struct spi_transfer tx_xfer_len;
+       /* data of tx packet */
+       struct spi_transfer tx_xfer_buf;
+       struct sk_buff *tx_skb;
+
+       /* for read length rxfifo */
+       struct spi_message reg_msg;
+       u8 reg_cmd[1];
+       u8 reg_data[MCR20A_IRQSTS_NUM];
+       struct spi_transfer reg_xfer_cmd;
+       struct spi_transfer reg_xfer_data;
+
+       /* receive handling */
+       struct spi_message rx_buf_msg;
+       u8 rx_header[1];
+       struct spi_transfer rx_xfer_header;
+       u8 rx_lqi[1];
+       struct spi_transfer rx_xfer_lqi;
+       u8 rx_buf[MCR20A_MAX_BUF];
+       struct spi_transfer rx_xfer_buf;
+
+       /* isr handling for reading intstat */
+       struct spi_message irq_msg;
+       u8 irq_header[1];
+       u8 irq_data[MCR20A_IRQSTS_NUM];
+       struct spi_transfer irq_xfer_data;
+       struct spi_transfer irq_xfer_header;
+};
+
+static void
+mcr20a_write_tx_buf_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       lp->reg_msg.complete = NULL;
+       lp->reg_cmd[0]  = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+       lp->reg_data[0] = MCR20A_XCVSEQ_TX;
+       lp->reg_xfer_data.len = 1;
+
+       ret = spi_async(lp->spi, &lp->reg_msg);
+       if (ret)
+               dev_err(printdev(lp), "failed to set SEQ TX\n");
+}
+
+static int
+mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+       struct mcr20a_local *lp = hw->priv;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       lp->tx_skb = skb;
+
+       print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1,
+                            skb->data, skb->len, 0);
+
+       lp->is_tx = 1;
+
+       lp->reg_msg.complete    = NULL;
+       lp->reg_cmd[0]          = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+       lp->reg_data[0]         = MCR20A_XCVSEQ_IDLE;
+       lp->reg_xfer_data.len   = 1;
+
+       return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_ed(struct ieee802154_hw *hw, u8 *level)
+{
+       WARN_ON(!level);
+       *level = 0xbe;
+       return 0;
+}
+
+static int
+mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+       struct mcr20a_local *lp = hw->priv;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */
+       ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]);
+       if (ret)
+               return ret;
+       ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00);
+       if (ret)
+               return ret;
+       ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB,
+                          PLL_FRAC[channel - 11]);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int
+mcr20a_start(struct ieee802154_hw *hw)
+{
+       struct mcr20a_local *lp = hw->priv;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* No slotted operation */
+       dev_dbg(printdev(lp), "no slotted operation\n");
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_SLOTTED, 0x0);
+
+       /* enable irq */
+       enable_irq(lp->spi->irq);
+
+       /* Unmask SEQ interrupt */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
+                                DAR_PHY_CTRL2_SEQMSK, 0x0);
+
+       /* Start the RX sequence */
+       dev_dbg(printdev(lp), "start the RX sequence\n");
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+       return 0;
+}
+
+static void
+mcr20a_stop(struct ieee802154_hw *hw)
+{
+       struct mcr20a_local *lp = hw->priv;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* stop all running sequence */
+       regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                          DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+       /* disable irq */
+       disable_irq(lp->spi->irq);
+}
+
+static int
+mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw,
+                       struct ieee802154_hw_addr_filt *filt,
+                       unsigned long changed)
+{
+       struct mcr20a_local *lp = hw->priv;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
+               u16 addr = le16_to_cpu(filt->short_addr);
+
+               regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr);
+               regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8);
+       }
+
+       if (changed & IEEE802154_AFILT_PANID_CHANGED) {
+               u16 pan = le16_to_cpu(filt->pan_id);
+
+               regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan);
+               regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8);
+       }
+
+       if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
+               u8 addr[8], i;
+
+               memcpy(addr, &filt->ieee_addr, 8);
+               for (i = 0; i < 8; i++)
+                       regmap_write(lp->regmap_iar,
+                                    IAR_MACLONGADDRS0_0 + i, addr[i]);
+       }
+
+       if (changed & IEEE802154_AFILT_PANC_CHANGED) {
+               if (filt->pan_coord) {
+                       regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                          DAR_PHY_CTRL4_PANCORDNTR0, 0x10);
+               } else {
+                       regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                          DAR_PHY_CTRL4_PANCORDNTR0, 0x00);
+               }
+       }
+
+       return 0;
+}
+
+/* -30 dBm to 10 dBm */
+#define MCR20A_MAX_TX_POWERS 0x14
+static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = {
+       -3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400,
+       -1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000
+};
+
+static int
+mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+       struct mcr20a_local *lp = hw->priv;
+       u32 i;
+
+       dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm);
+
+       for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) {
+               if (lp->hw->phy->supported.tx_powers[i] == mbm)
+                       return regmap_write(lp->regmap_dar, DAR_PA_PWR,
+                                           ((i + 8) & 0x1F));
+       }
+
+       return -EINVAL;
+}
+
+#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD
+static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1];
+
+static int
+mcr20a_set_cca_mode(struct ieee802154_hw *hw,
+                   const struct wpan_phy_cca *cca)
+{
+       struct mcr20a_local *lp = hw->priv;
+       unsigned int cca_mode = 0xff;
+       bool cca_mode_and = false;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* mapping 802.15.4 to driver spec */
+       switch (cca->mode) {
+       case NL802154_CCA_ENERGY:
+               cca_mode = MCR20A_CCA_MODE1;
+               break;
+       case NL802154_CCA_CARRIER:
+               cca_mode = MCR20A_CCA_MODE2;
+               break;
+       case NL802154_CCA_ENERGY_CARRIER:
+               switch (cca->opt) {
+               case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+                       cca_mode = MCR20A_CCA_MODE3;
+                       cca_mode_and = true;
+                       break;
+               case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+                       cca_mode = MCR20A_CCA_MODE3;
+                       cca_mode_and = false;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
+       default:
+               return -EINVAL;
+       }
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                DAR_PHY_CTRL4_CCATYPE_MASK,
+                                cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT);
+       if (ret < 0)
+               return ret;
+
+       if (cca_mode == MCR20A_CCA_MODE3) {
+               if (cca_mode_and) {
+                       ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL,
+                                                IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+                                                0x08);
+               } else {
+                       ret = regmap_update_bits(lp->regmap_iar,
+                                                IAR_CCA_CTRL,
+                                                IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+                                                0x00);
+               }
+               if (ret < 0)
+                       return ret;
+       }
+
+       return ret;
+}
+
+static int
+mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
+{
+       struct mcr20a_local *lp = hw->priv;
+       u32 i;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
+               if (hw->phy->supported.cca_ed_levels[i] == mbm)
+                       return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i);
+       }
+
+       return 0;
+}
+
+static int
+mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+       struct mcr20a_local *lp = hw->priv;
+       int ret;
+       u8 rx_frame_filter_reg = 0x0;
+       u8 val;
+
+       dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
+
+       if (on) {
+               /* All frame types accepted*/
+               val |= DAR_PHY_CTRL4_PROMISCUOUS;
+               rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
+               rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
+                                 IAR_RX_FRAME_FLT_NS_FT);
+
+               ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                        DAR_PHY_CTRL4_PROMISCUOUS,
+                                        DAR_PHY_CTRL4_PROMISCUOUS);
+               if (ret < 0)
+                       return ret;
+
+               ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+                                  rx_frame_filter_reg);
+               if (ret < 0)
+                       return ret;
+       } else {
+               ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+                                        DAR_PHY_CTRL4_PROMISCUOUS, 0x0);
+               if (ret < 0)
+                       return ret;
+
+               ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+                                  IAR_RX_FRAME_FLT_FRM_VER |
+                                  IAR_RX_FRAME_FLT_BEACON_FT |
+                                  IAR_RX_FRAME_FLT_DATA_FT |
+                                  IAR_RX_FRAME_FLT_CMD_FT);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static const struct ieee802154_ops mcr20a_hw_ops = {
+       .owner                  = THIS_MODULE,
+       .xmit_async             = mcr20a_xmit,
+       .ed                     = mcr20a_ed,
+       .set_channel            = mcr20a_set_channel,
+       .start                  = mcr20a_start,
+       .stop                   = mcr20a_stop,
+       .set_hw_addr_filt       = mcr20a_set_hw_addr_filt,
+       .set_txpower            = mcr20a_set_txpower,
+       .set_cca_mode           = mcr20a_set_cca_mode,
+       .set_cca_ed_level       = mcr20a_set_cca_ed_level,
+       .set_promiscuous_mode   = mcr20a_set_promiscuous_mode,
+};
+
+static int
+mcr20a_request_rx(struct mcr20a_local *lp)
+{
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* Start the RX sequence */
+       regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+       return 0;
+}
+
+static void
+mcr20a_handle_rx_read_buf_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+       struct sk_buff *skb;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       dev_dbg(printdev(lp), "RX is done\n");
+
+       if (!ieee802154_is_valid_psdu_len(len)) {
+               dev_vdbg(&lp->spi->dev, "corrupted frame received\n");
+               len = IEEE802154_MTU;
+       }
+
+       len = len - 2;  /* get rid of frame check field */
+
+       skb = dev_alloc_skb(len);
+       if (!skb)
+               return;
+
+       memcpy(skb_put(skb, len), lp->rx_buf, len);
+       ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
+
+       print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+                            lp->rx_buf, len, 0);
+       pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]);
+
+       /* start RX sequence */
+       mcr20a_request_rx(lp);
+}
+
+static void
+mcr20a_handle_rx_read_len_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       u8 len;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* get the length of received frame */
+       len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+       dev_dbg(printdev(lp), "frame len : %d\n", len);
+
+       /* prepare to read the rx buf */
+       lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+       lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF;
+       lp->rx_xfer_buf.len = len;
+
+       ret = spi_async(lp->spi, &lp->rx_buf_msg);
+       if (ret)
+               dev_err(printdev(lp), "failed to read rx buffer length\n");
+}
+
+static int
+mcr20a_handle_rx(struct mcr20a_local *lp)
+{
+       dev_dbg(printdev(lp), "%s\n", __func__);
+       lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete;
+       lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN);
+       lp->reg_xfer_data.len   = 1;
+
+       return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_handle_tx_complete(struct mcr20a_local *lp)
+{
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       ieee802154_xmit_complete(lp->hw, lp->tx_skb, false);
+
+       return mcr20a_request_rx(lp);
+}
+
+static int
+mcr20a_handle_tx(struct mcr20a_local *lp)
+{
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* write tx buffer */
+       lp->tx_header[0]        = MCR20A_BURST_WRITE_PACKET_BUF;
+       /* add 2 bytes of FCS */
+       lp->tx_len[0]           = lp->tx_skb->len + 2;
+       lp->tx_xfer_buf.tx_buf  = lp->tx_skb->data;
+       /* add 1 byte psduLength */
+       lp->tx_xfer_buf.len     = lp->tx_skb->len + 1;
+
+       ret = spi_async(lp->spi, &lp->tx_buf_msg);
+       if (ret) {
+               dev_err(printdev(lp), "SPI write Failed for TX buf\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static void
+mcr20a_irq_clean_complete(void *context)
+{
+       struct mcr20a_local *lp = context;
+       u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       enable_irq(lp->spi->irq);
+
+       dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n",
+               lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]);
+
+       switch (seq_state) {
+       /* TX IRQ, RX IRQ and SEQ IRQ */
+       case (0x03):
+               if (lp->is_tx) {
+                       lp->is_tx = 0;
+                       dev_dbg(printdev(lp), "TX is done. No ACK\n");
+                       mcr20a_handle_tx_complete(lp);
+               }
+               break;
+       case (0x05):
+                       /* rx is starting */
+                       dev_dbg(printdev(lp), "RX is starting\n");
+                       mcr20a_handle_rx(lp);
+               break;
+       case (0x07):
+               if (lp->is_tx) {
+                       /* tx is done */
+                       lp->is_tx = 0;
+                       dev_dbg(printdev(lp), "TX is done. Get ACK\n");
+                       mcr20a_handle_tx_complete(lp);
+               } else {
+                       /* rx is starting */
+                       dev_dbg(printdev(lp), "RX is starting\n");
+                       mcr20a_handle_rx(lp);
+               }
+               break;
+       case (0x01):
+               if (lp->is_tx) {
+                       dev_dbg(printdev(lp), "TX is starting\n");
+                       mcr20a_handle_tx(lp);
+               } else {
+                       dev_dbg(printdev(lp), "MCR20A is stop\n");
+               }
+               break;
+       }
+}
+
+static void mcr20a_irq_status_complete(void *context)
+{
+       int ret;
+       struct mcr20a_local *lp = context;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+       regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+       lp->reg_msg.complete = mcr20a_irq_clean_complete;
+       lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1);
+       memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM);
+       lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM;
+
+       ret = spi_async(lp->spi, &lp->reg_msg);
+
+       if (ret)
+               dev_err(printdev(lp), "failed to clean irq status\n");
+}
+
+static irqreturn_t mcr20a_irq_isr(int irq, void *data)
+{
+       struct mcr20a_local *lp = data;
+       int ret;
+
+       disable_irq_nosync(irq);
+
+       lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1);
+       /* read IRQSTSx */
+       ret = spi_async(lp->spi, &lp->irq_msg);
+       if (ret) {
+               enable_irq(irq);
+               return IRQ_NONE;
+       }
+
+       return IRQ_HANDLED;
+}
+
+static int mcr20a_get_platform_data(struct spi_device *spi,
+                                   struct mcr20a_platform_data *pdata)
+{
+       int ret = 0;
+
+       if (!spi->dev.of_node)
+               return -EINVAL;
+
+       pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0);
+       dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio);
+
+       return ret;
+}
+
+static void mcr20a_hw_setup(struct mcr20a_local *lp)
+{
+       u8 i;
+       struct ieee802154_hw *hw = lp->hw;
+       struct wpan_phy *phy = lp->hw->phy;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       phy->symbol_duration = 16;
+       phy->lifs_period = 40;
+       phy->sifs_period = 12;
+
+       hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+                       IEEE802154_HW_AFILT |
+                       IEEE802154_HW_PROMISCUOUS;
+
+       phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+                       WPAN_PHY_FLAG_CCA_MODE;
+
+       phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+               BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
+       phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+               BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+       /* initiating cca_ed_levels */
+       for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1;
+             ++i) {
+               mcr20a_ed_levels[i] =  -i * 100;
+       }
+
+       phy->supported.cca_ed_levels = mcr20a_ed_levels;
+       phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels);
+
+       phy->cca.mode = NL802154_CCA_ENERGY;
+
+       phy->supported.channels[0] = MCR20A_VALID_CHANNELS;
+       phy->current_page = 0;
+       /* MCR20A default reset value */
+       phy->current_channel = 20;
+       phy->symbol_duration = 16;
+       phy->supported.tx_powers = mcr20a_powers;
+       phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers);
+       phy->cca_ed_level = phy->supported.cca_ed_levels[75];
+       phy->transmit_power = phy->supported.tx_powers[0x0F];
+}
+
+static void
+mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp)
+{
+       spi_message_init(&lp->tx_buf_msg);
+       lp->tx_buf_msg.context = lp;
+       lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete;
+
+       lp->tx_xfer_header.len = 1;
+       lp->tx_xfer_header.tx_buf = lp->tx_header;
+
+       lp->tx_xfer_len.len = 1;
+       lp->tx_xfer_len.tx_buf = lp->tx_len;
+
+       spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg);
+       spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg);
+       spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg);
+}
+
+static void
+mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp)
+{
+       spi_message_init(&lp->reg_msg);
+       lp->reg_msg.context = lp;
+
+       lp->reg_xfer_cmd.len = 1;
+       lp->reg_xfer_cmd.tx_buf = lp->reg_cmd;
+       lp->reg_xfer_cmd.rx_buf = lp->reg_cmd;
+
+       lp->reg_xfer_data.rx_buf = lp->reg_data;
+       lp->reg_xfer_data.tx_buf = lp->reg_data;
+
+       spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg);
+       spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg);
+
+       spi_message_init(&lp->rx_buf_msg);
+       lp->rx_buf_msg.context = lp;
+       lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+       lp->rx_xfer_header.len = 1;
+       lp->rx_xfer_header.tx_buf = lp->rx_header;
+       lp->rx_xfer_header.rx_buf = lp->rx_header;
+
+       lp->rx_xfer_buf.rx_buf = lp->rx_buf;
+
+       lp->rx_xfer_lqi.len = 1;
+       lp->rx_xfer_lqi.rx_buf = lp->rx_lqi;
+
+       spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg);
+       spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg);
+       spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg);
+}
+
+static void
+mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp)
+{
+       spi_message_init(&lp->irq_msg);
+       lp->irq_msg.context             = lp;
+       lp->irq_msg.complete    = mcr20a_irq_status_complete;
+       lp->irq_xfer_header.len = 1;
+       lp->irq_xfer_header.tx_buf = lp->irq_header;
+       lp->irq_xfer_header.rx_buf = lp->irq_header;
+
+       lp->irq_xfer_data.len   = MCR20A_IRQSTS_NUM;
+       lp->irq_xfer_data.rx_buf = lp->irq_data;
+
+       spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg);
+       spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg);
+}
+
+static int
+mcr20a_phy_init(struct mcr20a_local *lp)
+{
+       u8 index;
+       unsigned int phy_reg = 0;
+       int ret;
+
+       dev_dbg(printdev(lp), "%s\n", __func__);
+
+       /* Disable Tristate on COCO MISO for SPI reads */
+       ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02);
+       if (ret)
+               goto err_ret;
+
+       /* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts
+        * immediately after init
+        */
+       ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF);
+       if (ret)
+               goto err_ret;
+
+       /* Clear all PP IRQ bits in IRQSTS2 */
+       ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2,
+                          DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ |
+                          DAR_IRQSTS2_WAKE_IRQ);
+       if (ret)
+               goto err_ret;
+
+       /* Disable all timer interrupts */
+       ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF);
+       if (ret)
+               goto err_ret;
+
+       /*  PHY_CTRL1 : default HW settings + AUTOACK enabled */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+                                DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK);
+
+       /*  PHY_CTRL2 : disable all interrupts */
+       ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF);
+       if (ret)
+               goto err_ret;
+
+       /* PHY_CTRL3 : disable all timers and remaining interrupts */
+       ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3,
+                          DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK |
+                          DAR_PHY_CTRL3_WAKE_MSK);
+       if (ret)
+               goto err_ret;
+
+       /* SRC_CTRL : enable Acknowledge Frame Pending and
+        * Source Address Matching Enable
+        */
+       ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL,
+                          DAR_SRC_CTRL_ACK_FRM_PND |
+                          (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT));
+       if (ret)
+               goto err_ret;
+
+       /*  RX_FRAME_FILTER */
+       /*  FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */
+       ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+                          IAR_RX_FRAME_FLT_FRM_VER |
+                          IAR_RX_FRAME_FLT_BEACON_FT |
+                          IAR_RX_FRAME_FLT_DATA_FT |
+                          IAR_RX_FRAME_FLT_CMD_FT);
+       if (ret)
+               goto err_ret;
+
+       dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n",
+                MCR20A_OVERWRITE_VERSION);
+
+       /* Overwrites direct registers  */
+       ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER,
+                          MCR20A_OVERWRITE_VERSION);
+       if (ret)
+               goto err_ret;
+
+       /* Overwrites indirect registers  */
+       ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites,
+                                    ARRAY_SIZE(mar20a_iar_overwrites));
+       if (ret)
+               goto err_ret;
+
+       /* Clear HW indirect queue */
+       dev_dbg(printdev(lp), "clear HW indirect queue\n");
+       for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) {
+               phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) <<
+                              DAR_SRC_CTRL_INDEX_SHIFT)
+                             | (DAR_SRC_CTRL_SRCADDR_EN)
+                             | (DAR_SRC_CTRL_INDEX_DISABLE));
+               ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg);
+               if (ret)
+                       goto err_ret;
+               phy_reg = 0;
+       }
+
+       /* Assign HW Indirect hash table to PAN0 */
+       ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg);
+       if (ret)
+               goto err_ret;
+
+       /* Clear current lvl */
+       phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK;
+
+       /* Set new lvl */
+       phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE <<
+               IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT;
+       ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg);
+       if (ret)
+               goto err_ret;
+
+       /* Set CCA threshold to -75 dBm */
+       ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B);
+       if (ret)
+               goto err_ret;
+
+       /* Set prescaller to obtain 1 symbol (16us) timebase */
+       ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05);
+       if (ret)
+               goto err_ret;
+
+       /* Enable autodoze mode. */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES,
+                                DAR_PWR_MODES_AUTODOZE,
+                                DAR_PWR_MODES_AUTODOZE);
+       if (ret)
+               goto err_ret;
+
+       /* Disable clk_out */
+       ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL,
+                                DAR_CLK_OUT_CTRL_EN, 0x0);
+       if (ret)
+               goto err_ret;
+
+       return 0;
+
+err_ret:
+       return ret;
+}
+
+static int
+mcr20a_probe(struct spi_device *spi)
+{
+       struct ieee802154_hw *hw;
+       struct mcr20a_local *lp;
+       struct mcr20a_platform_data *pdata;
+       int irq_type;
+       int ret = -ENOMEM;
+
+       dev_dbg(&spi->dev, "%s\n", __func__);
+
+       if (!spi->irq) {
+               dev_err(&spi->dev, "no IRQ specified\n");
+               return -EINVAL;
+       }
+
+       pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+       if (!pdata)
+               return -ENOMEM;
+
+       /* set mcr20a platform data */
+       ret = mcr20a_get_platform_data(spi, pdata);
+       if (ret < 0) {
+               dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
+               return ret;
+       }
+
+       /* init reset gpio */
+       if (gpio_is_valid(pdata->rst_gpio)) {
+               ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
+                                           GPIOF_OUT_INIT_HIGH, "reset");
+               if (ret)
+                       return ret;
+       }
+
+       /* reset mcr20a */
+       if (gpio_is_valid(pdata->rst_gpio)) {
+               usleep_range(10, 20);
+               gpio_set_value_cansleep(pdata->rst_gpio, 0);
+               usleep_range(10, 20);
+               gpio_set_value_cansleep(pdata->rst_gpio, 1);
+               usleep_range(120, 240);
+       }
+
+       /* allocate ieee802154_hw and private data */
+       hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
+       if (!hw) {
+               dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
+               return -ENOMEM;
+       }
+
+       /* init mcr20a local data */
+       lp = hw->priv;
+       lp->hw = hw;
+       lp->spi = spi;
+       lp->spi->dev.platform_data = pdata;
+       lp->pdata = pdata;
+
+       /* init ieee802154_hw */
+       hw->parent = &spi->dev;
+       ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
+
+       /* init buf */
+       lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
+
+       if (!lp->buf)
+               return -ENOMEM;
+
+       mcr20a_setup_tx_spi_messages(lp);
+       mcr20a_setup_rx_spi_messages(lp);
+       mcr20a_setup_irq_spi_messages(lp);
+
+       /* setup regmap */
+       lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap);
+       if (IS_ERR(lp->regmap_dar)) {
+               ret = PTR_ERR(lp->regmap_dar);
+               dev_err(&spi->dev, "Failed to allocate dar map: %d\n",
+                       ret);
+               goto free_dev;
+       }
+
+       lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap);
+       if (IS_ERR(lp->regmap_iar)) {
+               ret = PTR_ERR(lp->regmap_iar);
+               dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret);
+               goto free_dev;
+       }
+
+       mcr20a_hw_setup(lp);
+
+       spi_set_drvdata(spi, lp);
+
+       ret = mcr20a_phy_init(lp);
+       if (ret < 0) {
+               dev_crit(&spi->dev, "mcr20a_phy_init failed\n");
+               goto free_dev;
+       }
+
+       irq_type = irq_get_trigger_type(spi->irq);
+       if (!irq_type)
+               irq_type = IRQF_TRIGGER_FALLING;
+
+       ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr,
+                              irq_type, dev_name(&spi->dev), lp);
+       if (ret) {
+               dev_err(&spi->dev, "could not request_irq for mcr20a\n");
+               ret = -ENODEV;
+               goto free_dev;
+       }
+
+       /* disable_irq by default and wait for starting hardware */
+       disable_irq(spi->irq);
+
+       ret = ieee802154_register_hw(hw);
+       if (ret) {
+               dev_crit(&spi->dev, "ieee802154_register_hw failed\n");
+               goto free_dev;
+       }
+
+       return ret;
+
+free_dev:
+       ieee802154_free_hw(lp->hw);
+
+       return ret;
+}
+
+static int mcr20a_remove(struct spi_device *spi)
+{
+       struct mcr20a_local *lp = spi_get_drvdata(spi);
+
+       dev_dbg(&spi->dev, "%s\n", __func__);
+
+       ieee802154_unregister_hw(lp->hw);
+       ieee802154_free_hw(lp->hw);
+
+       return 0;
+}
+
+static const struct of_device_id mcr20a_of_match[] = {
+       { .compatible = "nxp,mcr20a", },
+       { },
+};
+MODULE_DEVICE_TABLE(of, mcr20a_of_match);
+
+static const struct spi_device_id mcr20a_device_id[] = {
+       { .name = "mcr20a", },
+       { },
+};
+MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
+
+static struct spi_driver mcr20a_driver = {
+       .id_table = mcr20a_device_id,
+       .driver = {
+               .of_match_table = of_match_ptr(mcr20a_of_match),
+               .name   = "mcr20a",
+       },
+       .probe      = mcr20a_probe,
+       .remove     = mcr20a_remove,
+};
+
+module_spi_driver(mcr20a_driver);
+
+MODULE_DESCRIPTION("MCR20A Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>");
diff --git a/drivers/net/ieee802154/mcr20a.h b/drivers/net/ieee802154/mcr20a.h
new file mode 100644 (file)
index 0000000..6da4fd0
--- /dev/null
@@ -0,0 +1,498 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _MCR20A_H
+#define _MCR20A_H
+
+/* Direct Accress Register */
+#define DAR_IRQ_STS1           0x00
+#define DAR_IRQ_STS2           0x01
+#define DAR_IRQ_STS3           0x02
+#define DAR_PHY_CTRL1          0x03
+#define DAR_PHY_CTRL2          0x04
+#define DAR_PHY_CTRL3          0x05
+#define DAR_RX_FRM_LEN         0x06
+#define DAR_PHY_CTRL4          0x07
+#define DAR_SRC_CTRL           0x08
+#define DAR_SRC_ADDRS_SUM_LSB  0x09
+#define DAR_SRC_ADDRS_SUM_MSB  0x0A
+#define DAR_CCA1_ED_FNL                0x0B
+#define DAR_EVENT_TMR_LSB      0x0C
+#define DAR_EVENT_TMR_MSB      0x0D
+#define DAR_EVENT_TMR_USB      0x0E
+#define DAR_TIMESTAMP_LSB      0x0F
+#define DAR_TIMESTAMP_MSB      0x10
+#define DAR_TIMESTAMP_USB      0x11
+#define DAR_T3CMP_LSB          0x12
+#define DAR_T3CMP_MSB          0x13
+#define DAR_T3CMP_USB          0x14
+#define DAR_T2PRIMECMP_LSB     0x15
+#define DAR_T2PRIMECMP_MSB     0x16
+#define DAR_T1CMP_LSB          0x17
+#define DAR_T1CMP_MSB          0x18
+#define DAR_T1CMP_USB          0x19
+#define DAR_T2CMP_LSB          0x1A
+#define DAR_T2CMP_MSB          0x1B
+#define DAR_T2CMP_USB          0x1C
+#define DAR_T4CMP_LSB          0x1D
+#define DAR_T4CMP_MSB          0x1E
+#define DAR_T4CMP_USB          0x1F
+#define DAR_PLL_INT0           0x20
+#define DAR_PLL_FRAC0_LSB      0x21
+#define DAR_PLL_FRAC0_MSB      0x22
+#define DAR_PA_PWR             0x23
+#define DAR_SEQ_STATE          0x24
+#define DAR_LQI_VALUE          0x25
+#define DAR_RSSI_CCA_CONT      0x26
+/*------------------            0x27 */
+#define DAR_ASM_CTRL1          0x28
+#define DAR_ASM_CTRL2          0x29
+#define DAR_ASM_DATA_0         0x2A
+#define DAR_ASM_DATA_1         0x2B
+#define DAR_ASM_DATA_2         0x2C
+#define DAR_ASM_DATA_3         0x2D
+#define DAR_ASM_DATA_4         0x2E
+#define DAR_ASM_DATA_5         0x2F
+#define DAR_ASM_DATA_6         0x30
+#define DAR_ASM_DATA_7         0x31
+#define DAR_ASM_DATA_8         0x32
+#define DAR_ASM_DATA_9         0x33
+#define DAR_ASM_DATA_A         0x34
+#define DAR_ASM_DATA_B         0x35
+#define DAR_ASM_DATA_C         0x36
+#define DAR_ASM_DATA_D         0x37
+#define DAR_ASM_DATA_E         0x38
+#define DAR_ASM_DATA_F         0x39
+/*-----------------------       0x3A */
+#define DAR_OVERWRITE_VER      0x3B
+#define DAR_CLK_OUT_CTRL       0x3C
+#define DAR_PWR_MODES          0x3D
+#define IAR_INDEX              0x3E
+#define IAR_DATA               0x3F
+
+/* Indirect Resgister Memory */
+#define IAR_PART_ID            0x00
+#define IAR_XTAL_TRIM          0x01
+#define IAR_PMC_LP_TRIM                0x02
+#define IAR_MACPANID0_LSB      0x03
+#define IAR_MACPANID0_MSB      0x04
+#define IAR_MACSHORTADDRS0_LSB 0x05
+#define IAR_MACSHORTADDRS0_MSB 0x06
+#define IAR_MACLONGADDRS0_0    0x07
+#define IAR_MACLONGADDRS0_8    0x08
+#define IAR_MACLONGADDRS0_16   0x09
+#define IAR_MACLONGADDRS0_24   0x0A
+#define IAR_MACLONGADDRS0_32   0x0B
+#define IAR_MACLONGADDRS0_40   0x0C
+#define IAR_MACLONGADDRS0_48   0x0D
+#define IAR_MACLONGADDRS0_56   0x0E
+#define IAR_RX_FRAME_FILTER    0x0F
+#define IAR_PLL_INT1           0x10
+#define IAR_PLL_FRAC1_LSB      0x11
+#define IAR_PLL_FRAC1_MSB      0x12
+#define IAR_MACPANID1_LSB      0x13
+#define IAR_MACPANID1_MSB      0x14
+#define IAR_MACSHORTADDRS1_LSB 0x15
+#define IAR_MACSHORTADDRS1_MSB 0x16
+#define IAR_MACLONGADDRS1_0    0x17
+#define IAR_MACLONGADDRS1_8    0x18
+#define IAR_MACLONGADDRS1_16   0x19
+#define IAR_MACLONGADDRS1_24   0x1A
+#define IAR_MACLONGADDRS1_32   0x1B
+#define IAR_MACLONGADDRS1_40   0x1C
+#define IAR_MACLONGADDRS1_48   0x1D
+#define IAR_MACLONGADDRS1_56   0x1E
+#define IAR_DUAL_PAN_CTRL      0x1F
+#define IAR_DUAL_PAN_DWELL     0x20
+#define IAR_DUAL_PAN_STS       0x21
+#define IAR_CCA1_THRESH                0x22
+#define IAR_CCA1_ED_OFFSET_COMP        0x23
+#define IAR_LQI_OFFSET_COMP    0x24
+#define IAR_CCA_CTRL           0x25
+#define IAR_CCA2_CORR_PEAKS    0x26
+#define IAR_CCA2_CORR_THRESH   0x27
+#define IAR_TMR_PRESCALE       0x28
+/*--------------------          0x29 */
+#define IAR_GPIO_DATA          0x2A
+#define IAR_GPIO_DIR           0x2B
+#define IAR_GPIO_PUL_EN                0x2C
+#define IAR_GPIO_PUL_SEL       0x2D
+#define IAR_GPIO_DS            0x2E
+/*------------------            0x2F */
+#define IAR_ANT_PAD_CTRL       0x30
+#define IAR_MISC_PAD_CTRL      0x31
+#define IAR_BSM_CTRL           0x32
+/*-------------------           0x33 */
+#define IAR_RNG                        0x34
+#define IAR_RX_BYTE_COUNT      0x35
+#define IAR_RX_WTR_MARK                0x36
+#define IAR_SOFT_RESET         0x37
+#define IAR_TXDELAY            0x38
+#define IAR_ACKDELAY           0x39
+#define IAR_SEQ_MGR_CTRL       0x3A
+#define IAR_SEQ_MGR_STS                0x3B
+#define IAR_SEQ_T_STS          0x3C
+#define IAR_ABORT_STS          0x3D
+#define IAR_CCCA_BUSY_CNT      0x3E
+#define IAR_SRC_ADDR_CHECKSUM1 0x3F
+#define IAR_SRC_ADDR_CHECKSUM2 0x40
+#define IAR_SRC_TBL_VALID1     0x41
+#define IAR_SRC_TBL_VALID2     0x42
+#define IAR_FILTERFAIL_CODE1   0x43
+#define IAR_FILTERFAIL_CODE2   0x44
+#define IAR_SLOT_PRELOAD       0x45
+/*--------------------          0x46 */
+#define IAR_CORR_VT            0x47
+#define IAR_SYNC_CTRL          0x48
+#define IAR_PN_LSB_0           0x49
+#define IAR_PN_LSB_1           0x4A
+#define IAR_PN_MSB_0           0x4B
+#define IAR_PN_MSB_1           0x4C
+#define IAR_CORR_NVAL          0x4D
+#define IAR_TX_MODE_CTRL       0x4E
+#define IAR_SNF_THR            0x4F
+#define IAR_FAD_THR            0x50
+#define IAR_ANT_AGC_CTRL       0x51
+#define IAR_AGC_THR1           0x52
+#define IAR_AGC_THR2           0x53
+#define IAR_AGC_HYS            0x54
+#define IAR_AFC                        0x55
+/*-------------------           0x56 */
+/*-------------------           0x57 */
+#define IAR_PHY_STS            0x58
+#define IAR_RX_MAX_CORR                0x59
+#define IAR_RX_MAX_PREAMBLE    0x5A
+#define IAR_RSSI               0x5B
+/*-------------------           0x5C */
+/*-------------------           0x5D */
+#define IAR_PLL_DIG_CTRL       0x5E
+#define IAR_VCO_CAL            0x5F
+#define IAR_VCO_BEST_DIFF      0x60
+#define IAR_VCO_BIAS           0x61
+#define IAR_KMOD_CTRL          0x62
+#define IAR_KMOD_CAL           0x63
+#define IAR_PA_CAL             0x64
+#define IAR_PA_PWRCAL          0x65
+#define IAR_ATT_RSSI1          0x66
+#define IAR_ATT_RSSI2          0x67
+#define IAR_RSSI_OFFSET                0x68
+#define IAR_RSSI_SLOPE         0x69
+#define IAR_RSSI_CAL1          0x6A
+#define IAR_RSSI_CAL2          0x6B
+/*-------------------           0x6C */
+/*-------------------           0x6D */
+#define IAR_XTAL_CTRL          0x6E
+#define IAR_XTAL_COMP_MIN      0x6F
+#define IAR_XTAL_COMP_MAX      0x70
+#define IAR_XTAL_GM            0x71
+/*-------------------           0x72 */
+/*-------------------           0x73 */
+#define IAR_LNA_TUNE           0x74
+#define IAR_LNA_AGCGAIN                0x75
+/*-------------------           0x76 */
+/*-------------------           0x77 */
+#define IAR_CHF_PMA_GAIN       0x78
+#define IAR_CHF_IBUF           0x79
+#define IAR_CHF_QBUF           0x7A
+#define IAR_CHF_IRIN           0x7B
+#define IAR_CHF_QRIN           0x7C
+#define IAR_CHF_IL             0x7D
+#define IAR_CHF_QL             0x7E
+#define IAR_CHF_CC1            0x7F
+#define IAR_CHF_CCL            0x80
+#define IAR_CHF_CC2            0x81
+#define IAR_CHF_IROUT          0x82
+#define IAR_CHF_QROUT          0x83
+/*-------------------           0x84 */
+/*-------------------           0x85 */
+#define IAR_RSSI_CTRL          0x86
+/*-------------------           0x87 */
+/*-------------------           0x88 */
+#define IAR_PA_BIAS            0x89
+#define IAR_PA_TUNING          0x8A
+/*-------------------           0x8B */
+/*-------------------           0x8C */
+#define IAR_PMC_HP_TRIM                0x8D
+#define IAR_VREGA_TRIM         0x8E
+/*-------------------           0x8F */
+/*-------------------           0x90 */
+#define IAR_VCO_CTRL1          0x91
+#define IAR_VCO_CTRL2          0x92
+/*-------------------           0x93 */
+/*-------------------           0x94 */
+#define IAR_ANA_SPARE_OUT1     0x95
+#define IAR_ANA_SPARE_OUT2     0x96
+#define IAR_ANA_SPARE_IN       0x97
+#define IAR_MISCELLANEOUS      0x98
+/*-------------------           0x99 */
+#define IAR_SEQ_MGR_OVRD0      0x9A
+#define IAR_SEQ_MGR_OVRD1      0x9B
+#define IAR_SEQ_MGR_OVRD2      0x9C
+#define IAR_SEQ_MGR_OVRD3      0x9D
+#define IAR_SEQ_MGR_OVRD4      0x9E
+#define IAR_SEQ_MGR_OVRD5      0x9F
+#define IAR_SEQ_MGR_OVRD6      0xA0
+#define IAR_SEQ_MGR_OVRD7      0xA1
+/*-------------------           0xA2 */
+#define IAR_TESTMODE_CTRL      0xA3
+#define IAR_DTM_CTRL1          0xA4
+#define IAR_DTM_CTRL2          0xA5
+#define IAR_ATM_CTRL1          0xA6
+#define IAR_ATM_CTRL2          0xA7
+#define IAR_ATM_CTRL3          0xA8
+/*-------------------           0xA9 */
+#define IAR_LIM_FE_TEST_CTRL   0xAA
+#define IAR_CHF_TEST_CTRL      0xAB
+#define IAR_VCO_TEST_CTRL      0xAC
+#define IAR_PLL_TEST_CTRL      0xAD
+#define IAR_PA_TEST_CTRL       0xAE
+#define IAR_PMC_TEST_CTRL      0xAF
+#define IAR_SCAN_DTM_PROTECT_1 0xFE
+#define IAR_SCAN_DTM_PROTECT_0 0xFF
+
+/* IRQSTS1 bits */
+#define DAR_IRQSTS1_RX_FRM_PEND                BIT(7)
+#define DAR_IRQSTS1_PLL_UNLOCK_IRQ     BIT(6)
+#define DAR_IRQSTS1_FILTERFAIL_IRQ     BIT(5)
+#define DAR_IRQSTS1_RXWTRMRKIRQ                BIT(4)
+#define DAR_IRQSTS1_CCAIRQ             BIT(3)
+#define DAR_IRQSTS1_RXIRQ              BIT(2)
+#define DAR_IRQSTS1_TXIRQ              BIT(1)
+#define DAR_IRQSTS1_SEQIRQ             BIT(0)
+
+/* IRQSTS2 bits */
+#define DAR_IRQSTS2_CRCVALID           BIT(7)
+#define DAR_IRQSTS2_CCA                        BIT(6)
+#define DAR_IRQSTS2_SRCADDR            BIT(5)
+#define DAR_IRQSTS2_PI                 BIT(4)
+#define DAR_IRQSTS2_TMRSTATUS          BIT(3)
+#define DAR_IRQSTS2_ASM_IRQ            BIT(2)
+#define DAR_IRQSTS2_PB_ERR_IRQ         BIT(1)
+#define DAR_IRQSTS2_WAKE_IRQ           BIT(0)
+
+/* IRQSTS3 bits */
+#define DAR_IRQSTS3_TMR4MSK            BIT(7)
+#define DAR_IRQSTS3_TMR3MSK            BIT(6)
+#define DAR_IRQSTS3_TMR2MSK            BIT(5)
+#define DAR_IRQSTS3_TMR1MSK            BIT(4)
+#define DAR_IRQSTS3_TMR4IRQ            BIT(3)
+#define DAR_IRQSTS3_TMR3IRQ            BIT(2)
+#define DAR_IRQSTS3_TMR2IRQ            BIT(1)
+#define DAR_IRQSTS3_TMR1IRQ            BIT(0)
+
+/* PHY_CTRL1 bits */
+#define DAR_PHY_CTRL1_TMRTRIGEN                BIT(7)
+#define DAR_PHY_CTRL1_SLOTTED          BIT(6)
+#define DAR_PHY_CTRL1_CCABFRTX         BIT(5)
+#define DAR_PHY_CTRL1_CCABFRTX_SHIFT   5
+#define DAR_PHY_CTRL1_RXACKRQD         BIT(4)
+#define DAR_PHY_CTRL1_AUTOACK          BIT(3)
+#define DAR_PHY_CTRL1_XCVSEQ_MASK      0x07
+
+/* PHY_CTRL2 bits */
+#define DAR_PHY_CTRL2_CRC_MSK          BIT(7)
+#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK   BIT(6)
+#define DAR_PHY_CTRL2_FILTERFAIL_MSK   BIT(5)
+#define DAR_PHY_CTRL2_RX_WMRK_MSK      BIT(4)
+#define DAR_PHY_CTRL2_CCAMSK           BIT(3)
+#define DAR_PHY_CTRL2_RXMSK            BIT(2)
+#define DAR_PHY_CTRL2_TXMSK            BIT(1)
+#define DAR_PHY_CTRL2_SEQMSK           BIT(0)
+
+/* PHY_CTRL3 bits */
+#define DAR_PHY_CTRL3_TMR4CMP_EN       BIT(7)
+#define DAR_PHY_CTRL3_TMR3CMP_EN       BIT(6)
+#define DAR_PHY_CTRL3_TMR2CMP_EN       BIT(5)
+#define DAR_PHY_CTRL3_TMR1CMP_EN       BIT(4)
+#define DAR_PHY_CTRL3_ASM_MSK          BIT(2)
+#define DAR_PHY_CTRL3_PB_ERR_MSK       BIT(1)
+#define DAR_PHY_CTRL3_WAKE_MSK         BIT(0)
+
+/* RX_FRM_LEN bits */
+#define DAR_RX_FRAME_LENGTH_MASK       (0x7F)
+
+/* PHY_CTRL4 bits */
+#define DAR_PHY_CTRL4_TRCV_MSK         BIT(7)
+#define DAR_PHY_CTRL4_TC3TMOUT         BIT(6)
+#define DAR_PHY_CTRL4_PANCORDNTR0      BIT(5)
+#define DAR_PHY_CTRL4_CCATYPE          (3)
+#define DAR_PHY_CTRL4_CCATYPE_SHIFT    (3)
+#define DAR_PHY_CTRL4_CCATYPE_MASK     (0x18)
+#define DAR_PHY_CTRL4_TMRLOAD          BIT(2)
+#define DAR_PHY_CTRL4_PROMISCUOUS      BIT(1)
+#define DAR_PHY_CTRL4_TC2PRIME_EN      BIT(0)
+
+/* SRC_CTRL bits */
+#define DAR_SRC_CTRL_INDEX             (0x0F)
+#define DAR_SRC_CTRL_INDEX_SHIFT       (4)
+#define DAR_SRC_CTRL_ACK_FRM_PND       BIT(3)
+#define DAR_SRC_CTRL_SRCADDR_EN                BIT(2)
+#define DAR_SRC_CTRL_INDEX_EN          BIT(1)
+#define DAR_SRC_CTRL_INDEX_DISABLE     BIT(0)
+
+/* DAR_ASM_CTRL1 bits */
+#define DAR_ASM_CTRL1_CLEAR            BIT(7)
+#define DAR_ASM_CTRL1_START            BIT(6)
+#define DAR_ASM_CTRL1_SELFTST          BIT(5)
+#define DAR_ASM_CTRL1_CTR              BIT(4)
+#define DAR_ASM_CTRL1_CBC              BIT(3)
+#define DAR_ASM_CTRL1_AES              BIT(2)
+#define DAR_ASM_CTRL1_LOAD_MAC         BIT(1)
+
+/* DAR_ASM_CTRL2 bits */
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL                (7)
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT  (5)
+#define DAR_ASM_CTRL2_TSTPAS                   BIT(1)
+
+/* DAR_CLK_OUT_CTRL bits */
+#define DAR_CLK_OUT_CTRL_EXTEND                BIT(7)
+#define DAR_CLK_OUT_CTRL_HIZ           BIT(6)
+#define DAR_CLK_OUT_CTRL_SR            BIT(5)
+#define DAR_CLK_OUT_CTRL_DS            BIT(4)
+#define DAR_CLK_OUT_CTRL_EN            BIT(3)
+#define DAR_CLK_OUT_CTRL_DIV           (7)
+
+/* DAR_PWR_MODES bits */
+#define DAR_PWR_MODES_XTAL_READY       BIT(5)
+#define DAR_PWR_MODES_XTALEN           BIT(4)
+#define DAR_PWR_MODES_ASM_CLK_EN       BIT(3)
+#define DAR_PWR_MODES_AUTODOZE         BIT(1)
+#define DAR_PWR_MODES_PMC_MODE         BIT(0)
+
+/* RX_FRAME_FILTER bits */
+#define IAR_RX_FRAME_FLT_FRM_VER               (0xC0)
+#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT         (6)
+#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS    BIT(5)
+#define IAR_RX_FRAME_FLT_NS_FT                 BIT(4)
+#define IAR_RX_FRAME_FLT_CMD_FT                        BIT(3)
+#define IAR_RX_FRAME_FLT_ACK_FT                        BIT(2)
+#define IAR_RX_FRAME_FLT_DATA_FT               BIT(1)
+#define IAR_RX_FRAME_FLT_BEACON_FT             BIT(0)
+
+/* DUAL_PAN_CTRL bits */
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK (0xF0)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT       (4)
+#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK      BIT(3)
+#define IAR_DUAL_PAN_CTRL_PANCORDNTR1          BIT(2)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO                BIT(1)
+#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK       BIT(0)
+
+/* DUAL_PAN_STS bits */
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN1          BIT(7)
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN0          BIT(6)
+#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN       (0x3F)
+
+/* CCA_CTRL bits */
+#define IAR_CCA_CTRL_AGC_FRZ_EN                        BIT(6)
+#define IAR_CCA_CTRL_CONT_RSSI_EN              BIT(5)
+#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR BIT(4)
+#define IAR_CCA_CTRL_CCA3_AND_NOT_OR   BIT(3)
+#define IAR_CCA_CTRL_POWER_COMP_EN_LQI BIT(2)
+#define IAR_CCA_CTRL_POWER_COMP_EN_ED  BIT(1)
+#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1        BIT(0)
+
+/* ANT_PAD_CTRL bits */
+#define IAR_ANT_PAD_CTRL_ANTX_POL      (0x0F)
+#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT        (4)
+#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE BIT(3)
+#define IAR_ANT_PAD_CTRL_ANTX_HZ       BIT(2)
+#define IAR_ANT_PAD_CTRL_ANTX_EN       (3)
+
+/* MISC_PAD_CTRL bits */
+#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN  BIT(3)
+#define IAR_MISC_PAD_CTRL_IRQ_B_OD     BIT(2)
+#define IAR_MISC_PAD_CTRL_NON_GPIO_DS  BIT(1)
+#define IAR_MISC_PAD_CTRL_ANTX_CURR    (1)
+
+/* ANT_AGC_CTRL bits */
+#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT  (0)
+#define IAR_ANT_AGC_CTRL_FAD_EN_MASK   (1)
+#define IAR_ANT_AGC_CTRL_ANTX_SHIFT    (1)
+#define IAR_ANT_AGC_CTRL_ANTX_MASK     BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT)
+
+/* BSM_CTRL bits */
+#define BSM_CTRL_BSM_EN                (1)
+
+/* SOFT_RESET bits */
+#define IAR_SOFT_RESET_SOG_RST         BIT(7)
+#define IAR_SOFT_RESET_REGS_RST                BIT(4)
+#define IAR_SOFT_RESET_PLL_RST         BIT(3)
+#define IAR_SOFT_RESET_TX_RST          BIT(2)
+#define IAR_SOFT_RESET_RX_RST          BIT(1)
+#define IAR_SOFT_RESET_SEQ_MGR_RST     BIT(0)
+
+/* SEQ_MGR_CTRL bits */
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL                (3)
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT  (6)
+#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE         BIT(5)
+#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE                BIT(4)
+#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH        BIT(3)
+#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT   BIT(2)
+#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS          BIT(1)
+#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD                BIT(0)
+
+/* SEQ_MGR_STS bits */
+#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED    BIT(7)
+#define IAR_SEQ_MGR_STS_RX_MODE                        BIT(6)
+#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING     BIT(5)
+#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT                BIT(4)
+#define IAR_SEQ_MGR_STS_SEQ_IDLE               BIT(3)
+#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL          (7)
+
+/* ABORT_STS bits */
+#define IAR_ABORT_STS_PLL_ABORTED      BIT(2)
+#define IAR_ABORT_STS_TC3_ABORTED      BIT(1)
+#define IAR_ABORT_STS_SW_ABORTED       BIT(0)
+
+/* IAR_FILTERFAIL_CODE2 bits */
+#define IAR_FILTERFAIL_CODE2_PAN_SEL   BIT(7)
+#define IAR_FILTERFAIL_CODE2_9_8       (3)
+
+/* PHY_STS bits */
+#define IAR_PHY_STS_PLL_UNLOCK         BIT(7)
+#define IAR_PHY_STS_PLL_LOCK_ERR       BIT(6)
+#define IAR_PHY_STS_PLL_LOCK           BIT(5)
+#define IAR_PHY_STS_CRCVALID           BIT(3)
+#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL        BIT(2)
+#define IAR_PHY_STS_SFD_DET            BIT(1)
+#define IAR_PHY_STS_PREAMBLE_DET       BIT(0)
+
+/* TESTMODE_CTRL bits */
+#define IAR_TEST_MODE_CTRL_HOT_ANT             BIT(4)
+#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN       BIT(3)
+#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN                BIT(2)
+#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN       BIT(1)
+#define IAR_TEST_MODE_CTRL_FPGA_EN             BIT(0)
+
+/* DTM_CTRL1 bits */
+#define IAR_DTM_CTRL1_ATM_LOCKED       BIT(7)
+#define IAR_DTM_CTRL1_DTM_EN           BIT(6)
+#define IAR_DTM_CTRL1_PAGE5            BIT(5)
+#define IAR_DTM_CTRL1_PAGE4            BIT(4)
+#define IAR_DTM_CTRL1_PAGE3            BIT(3)
+#define IAR_DTM_CTRL1_PAGE2            BIT(2)
+#define IAR_DTM_CTRL1_PAGE1            BIT(1)
+#define IAR_DTM_CTRL1_PAGE0            BIT(0)
+
+/* TX_MODE_CTRL */
+#define IAR_TX_MODE_CTRL_TX_INV                BIT(4)
+#define IAR_TX_MODE_CTRL_BT_EN         BIT(3)
+#define IAR_TX_MODE_CTRL_DTS2          BIT(2)
+#define IAR_TX_MODE_CTRL_DTS1          BIT(1)
+#define IAR_TX_MODE_CTRL_DTS0          BIT(0)
+
+#define TX_MODE_CTRL_DTS_MASK  (7)
+
+#endif /* _MCR20A_H */
index 5166575a164d5ff1fde0c4fcf7bbd10537ac6932..adb826f55e604c2d766c44bc260d1ac9aa96bc07 100644 (file)
@@ -74,6 +74,7 @@ struct ipvl_dev {
        DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
        netdev_features_t       sfeatures;
        u32                     msg_enable;
+       spinlock_t              addrs_lock;
 };
 
 struct ipvl_addr {
@@ -176,4 +177,10 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 void ipvlan_link_delete(struct net_device *dev, struct list_head *head);
 void ipvlan_link_setup(struct net_device *dev);
 int ipvlan_link_register(struct rtnl_link_ops *ops);
+
+static inline bool netif_is_ipvlan_port(const struct net_device *dev)
+{
+       return rcu_access_pointer(dev->rx_handler) == ipvlan_handle_frame;
+}
+
 #endif /* __IPVLAN_H */
index 1b5dc200b573156c12ec2419c73c6fe566dd3fb9..1a8132eb2a3ec150fb1563a3d24ab03892b8aeab 100644 (file)
@@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
                                   const void *iaddr, bool is_v6)
 {
-       struct ipvl_addr *addr;
+       struct ipvl_addr *addr, *ret = NULL;
 
-       list_for_each_entry(addr, &ipvlan->addrs, anode)
-               if (addr_equal(is_v6, addr, iaddr))
-                       return addr;
-       return NULL;
+       rcu_read_lock();
+       list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+               if (addr_equal(is_v6, addr, iaddr)) {
+                       ret = addr;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return ret;
 }
 
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
 {
        struct ipvl_dev *ipvlan;
+       bool ret = false;
 
-       ASSERT_RTNL();
-
-       list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
-               if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
-                       return true;
+       rcu_read_lock();
+       list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+               if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+                       ret = true;
+                       break;
+               }
        }
-       return false;
+       rcu_read_unlock();
+       return ret;
 }
 
 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
@@ -498,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 
        /* In this mode we dont care about multicast and broadcast traffic */
        if (is_multicast_ether_addr(ethh->h_dest)) {
-               pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
-                                   ntohs(skb->protocol));
+               pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+                                    ntohs(skb->protocol));
                kfree_skb(skb);
                goto out;
        }
@@ -809,7 +817,8 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
                };
 
                skb_dst_drop(skb);
-               dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+               dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
+                                            skb, flags);
                skb_dst_set(skb, dst);
                break;
        }
index 67c91ceda9792c79015e526bed5fef9be1831d77..743d37fb034afcee25dd7e45b00efba4b4c7a418 100644 (file)
@@ -129,7 +129,6 @@ static int ipvlan_port_create(struct net_device *dev)
        if (err)
                goto err;
 
-       dev->priv_flags |= IFF_IPVLAN_MASTER;
        return 0;
 
 err:
@@ -142,7 +141,6 @@ static void ipvlan_port_destroy(struct net_device *dev)
        struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
        struct sk_buff *skb;
 
-       dev->priv_flags &= ~IFF_IPVLAN_MASTER;
        if (port->mode == IPVLAN_MODE_L3S) {
                dev->priv_flags &= ~IFF_L3MDEV_MASTER;
                ipvlan_unregister_nf_hook(dev_net(dev));
@@ -178,7 +176,7 @@ static int ipvlan_init(struct net_device *dev)
        dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
                     (phy_dev->state & IPVLAN_STATE_MASK);
        dev->features = phy_dev->features & IPVLAN_FEATURES;
-       dev->features |= NETIF_F_LLTX;
+       dev->features |= NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED;
        dev->gso_max_size = phy_dev->gso_max_size;
        dev->gso_max_segs = phy_dev->gso_max_segs;
        dev->hard_header_len = phy_dev->hard_header_len;
@@ -227,8 +225,10 @@ static int ipvlan_open(struct net_device *dev)
        else
                dev->flags &= ~IFF_NOARP;
 
-       list_for_each_entry(addr, &ipvlan->addrs, anode)
+       rcu_read_lock();
+       list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
                ipvlan_ht_addr_add(ipvlan, addr);
+       rcu_read_unlock();
 
        return dev_uc_add(phy_dev, phy_dev->dev_addr);
 }
@@ -244,8 +244,10 @@ static int ipvlan_stop(struct net_device *dev)
 
        dev_uc_del(phy_dev, phy_dev->dev_addr);
 
-       list_for_each_entry(addr, &ipvlan->addrs, anode)
+       rcu_read_lock();
+       list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
                ipvlan_ht_addr_del(addr);
+       rcu_read_unlock();
 
        return 0;
 }
@@ -419,6 +421,12 @@ static const struct header_ops ipvlan_header_ops = {
        .cache_update   = eth_header_cache_update,
 };
 
+static bool netif_is_ipvlan(const struct net_device *dev)
+{
+       /* both ipvlan and ipvtap devices use the same netdev_ops */
+       return dev->netdev_ops == &ipvlan_netdev_ops;
+}
+
 static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
                                             struct ethtool_link_ksettings *cmd)
 {
@@ -588,6 +596,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
        ipvlan->sfeatures = IPVLAN_FEATURES;
        ipvlan_adjust_mtu(ipvlan, phy_dev);
        INIT_LIST_HEAD(&ipvlan->addrs);
+       spin_lock_init(&ipvlan->addrs_lock);
 
        /* TODO Probably put random address here to be presented to the
         * world but keep using the physical-dev address for the outgoing
@@ -595,7 +604,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
         */
        memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
 
-       dev->priv_flags |= IFF_IPVLAN_SLAVE;
+       dev->priv_flags |= IFF_NO_RX_HANDLER;
 
        err = register_netdevice(dev);
        if (err < 0)
@@ -665,11 +674,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
        struct ipvl_dev *ipvlan = netdev_priv(dev);
        struct ipvl_addr *addr, *next;
 
+       spin_lock_bh(&ipvlan->addrs_lock);
        list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
                ipvlan_ht_addr_del(addr);
-               list_del(&addr->anode);
+               list_del_rcu(&addr->anode);
                kfree_rcu(addr, rcu);
        }
+       spin_unlock_bh(&ipvlan->addrs_lock);
 
        ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
        list_del_rcu(&ipvlan->pnode);
@@ -760,8 +771,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
                if (dev->reg_state != NETREG_UNREGISTERING)
                        break;
 
-               list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
-                                        pnode)
+               list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
                        ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
                                                            &lst_kill);
                unregister_netdevice_many(&lst_kill);
@@ -793,6 +803,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
        return NOTIFY_DONE;
 }
 
+/* the caller must held the addrs lock */
 static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
        struct ipvl_addr *addr;
@@ -811,7 +822,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
                addr->atype = IPVL_IPV6;
 #endif
        }
-       list_add_tail(&addr->anode, &ipvlan->addrs);
+
+       list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
 
        /* If the interface is not up, the address will be added to the hash
         * list by ipvlan_open.
@@ -826,15 +838,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
        struct ipvl_addr *addr;
 
+       spin_lock_bh(&ipvlan->addrs_lock);
        addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
-       if (!addr)
+       if (!addr) {
+               spin_unlock_bh(&ipvlan->addrs_lock);
                return;
+       }
 
        ipvlan_ht_addr_del(addr);
-       list_del(&addr->anode);
+       list_del_rcu(&addr->anode);
+       spin_unlock_bh(&ipvlan->addrs_lock);
        kfree_rcu(addr, rcu);
-
-       return;
 }
 
 static bool ipvlan_is_valid_dev(const struct net_device *dev)
@@ -853,14 +867,17 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
 #if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
-       if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
+       int ret = -EINVAL;
+
+       spin_lock_bh(&ipvlan->addrs_lock);
+       if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
                netif_err(ipvlan, ifup, ipvlan->dev,
                          "Failed to add IPv6=%pI6c addr for %s intf\n",
                          ip6_addr, ipvlan->dev->name);
-               return -EINVAL;
-       }
-
-       return ipvlan_add_addr(ipvlan, ip6_addr, true);
+       else
+               ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+       spin_unlock_bh(&ipvlan->addrs_lock);
+       return ret;
 }
 
 static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
@@ -899,10 +916,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
        struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
        struct ipvl_dev *ipvlan = netdev_priv(dev);
 
-       /* FIXME IPv6 autoconf calls us from bh without RTNL */
-       if (in_softirq())
-               return NOTIFY_DONE;
-
        if (!ipvlan_is_valid_dev(dev))
                return NOTIFY_DONE;
 
@@ -922,14 +935,17 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-       if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+       int ret = -EINVAL;
+
+       spin_lock_bh(&ipvlan->addrs_lock);
+       if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
                netif_err(ipvlan, ifup, ipvlan->dev,
                          "Failed to add IPv4=%pI4 on %s intf.\n",
                          ip4_addr, ipvlan->dev->name);
-               return -EINVAL;
-       }
-
-       return ipvlan_add_addr(ipvlan, ip4_addr, false);
+       else
+               ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+       spin_unlock_bh(&ipvlan->addrs_lock);
+       return ret;
 }
 
 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
@@ -1024,6 +1040,7 @@ static struct pernet_operations ipvlan_net_ops = {
        .id = &ipvlan_netid,
        .size = sizeof(struct ipvlan_netns),
        .exit = ipvlan_ns_exit,
+       .async = true,
 };
 
 static int __init ipvlan_init_module(void)
index 7de88b33d5b96d7f18a5f7c242a54c935b587086..9cbb0c8a896aff9d192850ad15734dc2872c0dfb 100644 (file)
@@ -3277,7 +3277,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 
        err = netdev_upper_dev_link(real_dev, dev, extack);
        if (err < 0)
-               goto unregister;
+               goto put_dev;
 
        /* need to be already registered so that ->init has run and
         * the MAC addr is set
@@ -3316,7 +3316,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
        macsec_del_dev(macsec);
 unlink:
        netdev_upper_dev_unlink(real_dev, dev);
-unregister:
+put_dev:
+       dev_put(real_dev);
        unregister_netdevice(dev);
        return err;
 }
index 8fc02d9db3d011ee1c193b9cdfb8c26e042e6f3e..725f4b4afc6da946e967d4070b9cf76143360332 100644 (file)
@@ -1036,7 +1036,7 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev,
        lowerdev_features &= (features | ~NETIF_F_LRO);
        features = netdev_increment_features(lowerdev_features, features, mask);
        features |= ALWAYS_ON_FEATURES;
-       features &= ~NETIF_F_NETNS_LOCAL;
+       features &= (ALWAYS_ON_FEATURES | MACVLAN_FEATURES);
 
        return features;
 }
index e8ae50e1255e4b1b7912a08db98fecf99caf98c6..319edc9c8ec7f04c533c1d61d3bb959ca2d445e9 100644 (file)
@@ -38,14 +38,6 @@ static int aquantia_config_aneg(struct phy_device *phydev)
        return 0;
 }
 
-static int aquantia_aneg_done(struct phy_device *phydev)
-{
-       int reg;
-
-       reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-       return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-}
-
 static int aquantia_config_intr(struct phy_device *phydev)
 {
        int err;
@@ -125,7 +117,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQ1202",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -137,7 +129,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQ2104",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -149,7 +141,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR105",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -161,7 +153,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR106",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -173,7 +165,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR107",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
@@ -185,7 +177,7 @@ static struct phy_driver aquantia_driver[] = {
        .name           = "Aquantia AQR405",
        .features       = PHY_AQUANTIA_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .aneg_done      = aquantia_aneg_done,
+       .aneg_done      = genphy_c45_aneg_done,
        .config_aneg    = aquantia_config_aneg,
        .config_intr    = aquantia_config_intr,
        .ack_interrupt  = aquantia_ack_interrupt,
index 171010eb4d9c5c36da0be9888fb75cc54e136768..5ad130c3da43c869b39dc8ec83ec6795aa82be7d 100644 (file)
@@ -341,8 +341,8 @@ void bcm_phy_get_strings(struct phy_device *phydev, u8 *data)
        unsigned int i;
 
        for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++)
-               memcpy(data + i * ETH_GSTRING_LEN,
-                      bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN);
+               strlcpy(data + i * ETH_GSTRING_LEN,
+                       bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN);
 }
 EXPORT_SYMBOL_GPL(bcm_phy_get_strings);
 
index 421feb8f92fee2ae13cba9424651f2442265a0ba..29b1c88b55cc494bae079da9b2283170508b4e9d 100644 (file)
@@ -565,7 +565,7 @@ static int bcm7xxx_28nm_set_tunable(struct phy_device *phydev,
        if (ret)
                return ret;
 
-       /* Disable EEE advertisment since this prevents the PHY
+       /* Disable EEE advertisement since this prevents the PHY
         * from successfully linking up, trigger auto-negotiation restart
         * to let the MAC decide what to do.
         */
index 9442db2218348713e87faf91ccc25f4a8444526a..8022cd317f62bca6338a7b66ab190e410d1fe5e1 100644 (file)
@@ -30,14 +30,6 @@ static int cortina_read_reg(struct phy_device *phydev, u16 regnum)
                            MII_ADDR_C45 | regnum);
 }
 
-static int cortina_config_aneg(struct phy_device *phydev)
-{
-       phydev->supported = SUPPORTED_10000baseT_Full;
-       phydev->advertising = SUPPORTED_10000baseT_Full;
-
-       return 0;
-}
-
 static int cortina_read_status(struct phy_device *phydev)
 {
        int gpio_int_status, ret = 0;
@@ -61,11 +53,6 @@ static int cortina_read_status(struct phy_device *phydev)
        return ret;
 }
 
-static int cortina_soft_reset(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int cortina_probe(struct phy_device *phydev)
 {
        u32 phy_id = 0;
@@ -101,9 +88,10 @@ static struct phy_driver cortina_driver[] = {
        .phy_id         = PHY_ID_CS4340,
        .phy_id_mask    = 0xffffffff,
        .name           = "Cortina CS4340",
-       .config_aneg    = cortina_config_aneg,
+       .config_init    = gen10g_config_init,
+       .config_aneg    = gen10g_config_aneg,
        .read_status    = cortina_read_status,
-       .soft_reset     = cortina_soft_reset,
+       .soft_reset     = gen10g_no_soft_reset,
        .probe          = cortina_probe,
 },
 };
index a11f80cb5388a46c0c7702e6e306a58c978e8153..7d936fb61c22cb33f2330335ef01a2a3f4c67d42 100644 (file)
 #define PHY_ID_PHY22F_1_4              0xD565A410
 #define PHY_ID_PHY11G_1_5              0xD565A401
 #define PHY_ID_PHY22F_1_5              0xD565A411
-#define PHY_ID_PHY11G_VR9              0xD565A409
-#define PHY_ID_PHY22F_VR9              0xD565A419
+#define PHY_ID_PHY11G_VR9_1_1          0xD565A408
+#define PHY_ID_PHY22F_VR9_1_1          0xD565A418
+#define PHY_ID_PHY11G_VR9_1_2          0xD565A409
+#define PHY_ID_PHY22F_VR9_1_2          0xD565A419
 
 static int xway_gphy_config_init(struct phy_device *phydev)
 {
@@ -312,9 +314,9 @@ static struct phy_driver xway_gphy[] = {
                .suspend        = genphy_suspend,
                .resume         = genphy_resume,
        }, {
-               .phy_id         = PHY_ID_PHY11G_VR9,
+               .phy_id         = PHY_ID_PHY11G_VR9_1_1,
                .phy_id_mask    = 0xffffffff,
-               .name           = "Intel XWAY PHY11G (xRX integrated)",
+               .name           = "Intel XWAY PHY11G (xRX v1.1 integrated)",
                .features       = PHY_GBIT_FEATURES,
                .flags          = PHY_HAS_INTERRUPT,
                .config_init    = xway_gphy_config_init,
@@ -324,9 +326,33 @@ static struct phy_driver xway_gphy[] = {
                .suspend        = genphy_suspend,
                .resume         = genphy_resume,
        }, {
-               .phy_id         = PHY_ID_PHY22F_VR9,
+               .phy_id         = PHY_ID_PHY22F_VR9_1_1,
                .phy_id_mask    = 0xffffffff,
-               .name           = "Intel XWAY PHY22F (xRX integrated)",
+               .name           = "Intel XWAY PHY22F (xRX v1.1 integrated)",
+               .features       = PHY_BASIC_FEATURES,
+               .flags          = PHY_HAS_INTERRUPT,
+               .config_init    = xway_gphy_config_init,
+               .ack_interrupt  = xway_gphy_ack_interrupt,
+               .did_interrupt  = xway_gphy_did_interrupt,
+               .config_intr    = xway_gphy_config_intr,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+       }, {
+               .phy_id         = PHY_ID_PHY11G_VR9_1_2,
+               .phy_id_mask    = 0xffffffff,
+               .name           = "Intel XWAY PHY11G (xRX v1.2 integrated)",
+               .features       = PHY_GBIT_FEATURES,
+               .flags          = PHY_HAS_INTERRUPT,
+               .config_init    = xway_gphy_config_init,
+               .ack_interrupt  = xway_gphy_ack_interrupt,
+               .did_interrupt  = xway_gphy_did_interrupt,
+               .config_intr    = xway_gphy_config_intr,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+       }, {
+               .phy_id         = PHY_ID_PHY22F_VR9_1_2,
+               .phy_id_mask    = 0xffffffff,
+               .name           = "Intel XWAY PHY22F (xRX v1.2 integrated)",
                .features       = PHY_BASIC_FEATURES,
                .flags          = PHY_HAS_INTERRUPT,
                .config_init    = xway_gphy_config_init,
@@ -346,8 +372,10 @@ static struct mdio_device_id __maybe_unused xway_gphy_tbl[] = {
        { PHY_ID_PHY22F_1_4, 0xffffffff },
        { PHY_ID_PHY11G_1_5, 0xffffffff },
        { PHY_ID_PHY22F_1_5, 0xffffffff },
-       { PHY_ID_PHY11G_VR9, 0xffffffff },
-       { PHY_ID_PHY22F_VR9, 0xffffffff },
+       { PHY_ID_PHY11G_VR9_1_1, 0xffffffff },
+       { PHY_ID_PHY22F_VR9_1_1, 0xffffffff },
+       { PHY_ID_PHY11G_VR9_1_2, 0xffffffff },
+       { PHY_ID_PHY22F_VR9_1_2, 0xffffffff },
        { }
 };
 MODULE_DEVICE_TABLE(mdio, xway_gphy_tbl);
index 22d9bc9c33a4bce864505babe6b3060348542a62..a75c511950c331643108b03a17a7d0aee1327a3d 100644 (file)
@@ -860,7 +860,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
                        return err;
 
                /* There appears to be a bug in the 88e1512 when used in
-                * SGMII to copper mode, where the AN advertisment register
+                * SGMII to copper mode, where the AN advertisement register
                 * clears the pause bits each time a negotiation occurs.
                 * This means we can never be truely sure what was advertised,
                 * so disable Pause support.
@@ -1452,8 +1452,8 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data)
        int i;
 
        for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) {
-               memcpy(data + i * ETH_GSTRING_LEN,
-                      marvell_hw_stats[i].string, ETH_GSTRING_LEN);
+               strlcpy(data + i * ETH_GSTRING_LEN,
+                       marvell_hw_stats[i].string, ETH_GSTRING_LEN);
        }
 }
 
index 8a0bd98fdec77400e729683a4aec6381a4c57051..9564916d2d7b79435176616f4d25cc20d5362d6a 100644 (file)
@@ -71,15 +71,6 @@ static int mv3310_probe(struct phy_device *phydev)
        return 0;
 }
 
-/*
- * Resetting the MV88X3310 causes it to become non-responsive.  Avoid
- * setting the reset bit(s).
- */
-static int mv3310_soft_reset(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int mv3310_config_init(struct phy_device *phydev)
 {
        __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
@@ -317,7 +308,7 @@ static int mv3310_read_status(struct phy_device *phydev)
                if (val < 0)
                        return val;
 
-               /* Read the link partner's 1G advertisment */
+               /* Read the link partner's 1G advertisement */
                val = phy_read_mmd(phydev, MDIO_MMD_AN, MV_AN_STAT1000);
                if (val < 0)
                        return val;
@@ -377,7 +368,7 @@ static struct phy_driver mv3310_drivers[] = {
                                  SUPPORTED_10000baseT_Full |
                                  SUPPORTED_Backplane,
                .probe          = mv3310_probe,
-               .soft_reset     = mv3310_soft_reset,
+               .soft_reset     = gen10g_no_soft_reset,
                .config_init    = mv3310_config_init,
                .config_aneg    = mv3310_config_aneg,
                .aneg_done      = mv3310_aneg_done,
index 2573ab012f163ca02f14837f5f1af08d806c4fd3..70f6115530af8c800d79660d3fc8aec2e56155f0 100644 (file)
@@ -163,8 +163,9 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
                            mdio_mux_mmioreg_switch_fn,
                            &s->mux_handle, s, NULL);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register mdio-mux bus %pOF\n",
-                       np);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(&pdev->dev,
+                               "failed to register mdio-mux bus %pOF\n", np);
                return ret;
        }
 
index 0f45310300f667bab84655d301d64eb8196ae128..f41b224a9cdbf49ccf82d72b5052686548c005a7 100644 (file)
@@ -635,25 +635,6 @@ static int ksz8873mll_config_aneg(struct phy_device *phydev)
        return 0;
 }
 
-/* This routine returns -1 as an indication to the caller that the
- * Micrel ksz9021 10/100/1000 PHY does not support standard IEEE
- * MMD extended PHY registers.
- */
-static int
-ksz9021_rd_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum)
-{
-       return -1;
-}
-
-/* This routine does nothing since the Micrel ksz9021 does not support
- * standard IEEE MMD extended PHY registers.
- */
-static int
-ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int devad, u16 regnum, u16 val)
-{
-       return -1;
-}
-
 static int kszphy_get_sset_count(struct phy_device *phydev)
 {
        return ARRAY_SIZE(kszphy_hw_stats);
@@ -664,8 +645,8 @@ static void kszphy_get_strings(struct phy_device *phydev, u8 *data)
        int i;
 
        for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++) {
-               memcpy(data + i * ETH_GSTRING_LEN,
-                      kszphy_hw_stats[i].string, ETH_GSTRING_LEN);
+               strlcpy(data + i * ETH_GSTRING_LEN,
+                       kszphy_hw_stats[i].string, ETH_GSTRING_LEN);
        }
 }
 
@@ -946,8 +927,8 @@ static struct phy_driver ksphy_driver[] = {
        .get_stats      = kszphy_get_stats,
        .suspend        = genphy_suspend,
        .resume         = genphy_resume,
-       .read_mmd       = ksz9021_rd_mmd_phyreg,
-       .write_mmd      = ksz9021_wr_mmd_phyreg,
+       .read_mmd       = genphy_read_mmd_unsupported,
+       .write_mmd      = genphy_write_mmd_unsupported,
 }, {
        .phy_id         = PHY_ID_KSZ9031,
        .phy_id_mask    = MICREL_PHY_ID_MASK,
index a4576859afae429c8d7ac9955459c3f187bd35dd..e1225545362d513351451f388d3120f783441594 100644 (file)
@@ -163,11 +163,11 @@ int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask)
 EXPORT_SYMBOL_GPL(genphy_c45_read_link);
 
 /**
- * genphy_c45_read_lpa - read the link partner advertisment and pause
+ * genphy_c45_read_lpa - read the link partner advertisement and pause
  * @phydev: target phy_device struct
  *
  * Read the Clause 45 defined base (7.19) and 10G (7.33) status registers,
- * filling in the link partner advertisment, pause and asym_pause members
+ * filling in the link partner advertisement, pause and asym_pause members
  * in @phydev.  This assumes that the auto-negotiation MMD is present, and
  * the backplane bit (7.48.0) is clear.  Clause 45 PHY drivers are expected
  * to fill in the remainder of the link partner advert from vendor registers.
@@ -176,7 +176,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 {
        int val;
 
-       /* Read the link partner's base page advertisment */
+       /* Read the link partner's base page advertisement */
        val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
        if (val < 0)
                return val;
@@ -185,7 +185,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
        phydev->pause = val & LPA_PAUSE_CAP ? 1 : 0;
        phydev->asym_pause = val & LPA_PAUSE_ASYM ? 1 : 0;
 
-       /* Read the link partner's 10G advertisment */
+       /* Read the link partner's 10G advertisement */
        val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
        if (val < 0)
                return val;
@@ -268,12 +268,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 
-static int gen10g_config_aneg(struct phy_device *phydev)
+int gen10g_config_aneg(struct phy_device *phydev)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
-static int gen10g_read_status(struct phy_device *phydev)
+int gen10g_read_status(struct phy_device *phydev)
 {
        u32 mmd_mask = phydev->c45_ids.devices_in_package;
        int ret;
@@ -291,14 +292,16 @@ static int gen10g_read_status(struct phy_device *phydev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_read_status);
 
-static int gen10g_soft_reset(struct phy_device *phydev)
+int gen10g_no_soft_reset(struct phy_device *phydev)
 {
        /* Do nothing for now */
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
 
-static int gen10g_config_init(struct phy_device *phydev)
+int gen10g_config_init(struct phy_device *phydev)
 {
        /* Temporarily just say we support everything */
        phydev->supported = SUPPORTED_10000baseT_Full;
@@ -306,22 +309,25 @@ static int gen10g_config_init(struct phy_device *phydev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_init);
 
-static int gen10g_suspend(struct phy_device *phydev)
+int gen10g_suspend(struct phy_device *phydev)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_suspend);
 
-static int gen10g_resume(struct phy_device *phydev)
+int gen10g_resume(struct phy_device *phydev)
 {
        return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_resume);
 
 struct phy_driver genphy_10g_driver = {
        .phy_id         = 0xffffffff,
        .phy_id_mask    = 0xffffffff,
        .name           = "Generic 10G PHY",
-       .soft_reset     = gen10g_soft_reset,
+       .soft_reset     = gen10g_no_soft_reset,
        .config_init    = gen10g_config_init,
        .features       = 0,
        .config_aneg    = gen10g_config_aneg,
index 4083f00c97a5bb5f5c2c1c62afe698bcdf12d13e..c7da4cbb11032d7883371cc50b93f956b7e7f0d2 100644 (file)
@@ -190,10 +190,10 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
 }
 
 /**
- * phy_resolve_aneg_linkmode - resolve the advertisments into phy settings
+ * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
  * @phydev: The phy_device struct
  *
- * Resolve our and the link partner advertisments into their corresponding
+ * Resolve our and the link partner advertisements into their corresponding
  * speed and duplex. If full duplex was negotiated, extract the pause mode
  * from the link partner mask.
  */
index e3e29c2b028b58d84034a534d34427b243513460..05c1e8ef15e61d26beac3542a0ab2836cab6c3a6 100644 (file)
@@ -617,6 +617,68 @@ static void phy_error(struct phy_device *phydev)
        phy_trigger_machine(phydev, false);
 }
 
+/**
+ * phy_disable_interrupts - Disable the PHY interrupts from the PHY side
+ * @phydev: target phy_device struct
+ */
+static int phy_disable_interrupts(struct phy_device *phydev)
+{
+       int err;
+
+       /* Disable PHY interrupts */
+       err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
+       if (err)
+               return err;
+
+       /* Clear the interrupt */
+       return phy_clear_interrupt(phydev);
+}
+
+/**
+ * phy_change - Called by the phy_interrupt to handle PHY changes
+ * @phydev: phy_device struct that interrupted
+ */
+static irqreturn_t phy_change(struct phy_device *phydev)
+{
+       if (phy_interrupt_is_valid(phydev)) {
+               if (phydev->drv->did_interrupt &&
+                   !phydev->drv->did_interrupt(phydev))
+                       return IRQ_NONE;
+
+               if (phydev->state == PHY_HALTED)
+                       if (phy_disable_interrupts(phydev))
+                               goto phy_err;
+       }
+
+       mutex_lock(&phydev->lock);
+       if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
+               phydev->state = PHY_CHANGELINK;
+       mutex_unlock(&phydev->lock);
+
+       /* reschedule state queue work to run as soon as possible */
+       phy_trigger_machine(phydev, true);
+
+       if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev))
+               goto phy_err;
+       return IRQ_HANDLED;
+
+phy_err:
+       phy_error(phydev);
+       return IRQ_NONE;
+}
+
+/**
+ * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes
+ * @work: work_struct that describes the work to be done
+ */
+void phy_change_work(struct work_struct *work)
+{
+       struct phy_device *phydev =
+               container_of(work, struct phy_device, phy_queue);
+
+       phy_change(phydev);
+}
+
 /**
  * phy_interrupt - PHY interrupt handler
  * @irq: interrupt line
@@ -632,9 +694,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
        if (PHY_HALTED == phydev->state)
                return IRQ_NONE;                /* It can't be ours.  */
 
-       phy_change(phydev);
-
-       return IRQ_HANDLED;
+       return phy_change(phydev);
 }
 
 /**
@@ -651,32 +711,6 @@ static int phy_enable_interrupts(struct phy_device *phydev)
        return phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED);
 }
 
-/**
- * phy_disable_interrupts - Disable the PHY interrupts from the PHY side
- * @phydev: target phy_device struct
- */
-static int phy_disable_interrupts(struct phy_device *phydev)
-{
-       int err;
-
-       /* Disable PHY interrupts */
-       err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
-       if (err)
-               goto phy_err;
-
-       /* Clear the interrupt */
-       err = phy_clear_interrupt(phydev);
-       if (err)
-               goto phy_err;
-
-       return 0;
-
-phy_err:
-       phy_error(phydev);
-
-       return err;
-}
-
 /**
  * phy_start_interrupts - request and enable interrupts for a PHY device
  * @phydev: target phy_device struct
@@ -719,50 +753,6 @@ int phy_stop_interrupts(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_stop_interrupts);
 
-/**
- * phy_change - Called by the phy_interrupt to handle PHY changes
- * @phydev: phy_device struct that interrupted
- */
-void phy_change(struct phy_device *phydev)
-{
-       if (phy_interrupt_is_valid(phydev)) {
-               if (phydev->drv->did_interrupt &&
-                   !phydev->drv->did_interrupt(phydev))
-                       return;
-
-               if (phydev->state == PHY_HALTED)
-                       if (phy_disable_interrupts(phydev))
-                               goto phy_err;
-       }
-
-       mutex_lock(&phydev->lock);
-       if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state))
-               phydev->state = PHY_CHANGELINK;
-       mutex_unlock(&phydev->lock);
-
-       /* reschedule state queue work to run as soon as possible */
-       phy_trigger_machine(phydev, true);
-
-       if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev))
-               goto phy_err;
-       return;
-
-phy_err:
-       phy_error(phydev);
-}
-
-/**
- * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes
- * @work: work_struct that describes the work to be done
- */
-void phy_change_work(struct work_struct *work)
-{
-       struct phy_device *phydev =
-               container_of(work, struct phy_device, phy_queue);
-
-       phy_change(phydev);
-}
-
 /**
  * phy_stop - Bring down the PHY link, and stop checking the status
  * @phydev: target phy_device struct
@@ -774,13 +764,8 @@ void phy_stop(struct phy_device *phydev)
        if (PHY_HALTED == phydev->state)
                goto out_unlock;
 
-       if (phy_interrupt_is_valid(phydev)) {
-               /* Disable PHY Interrupts */
-               phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
-
-               /* Clear any pending interrupts */
-               phy_clear_interrupt(phydev);
-       }
+       if (phy_interrupt_is_valid(phydev))
+               phy_disable_interrupts(phydev);
 
        phydev->state = PHY_HALTED;
 
@@ -819,7 +804,7 @@ void phy_start(struct phy_device *phydev)
                break;
        case PHY_HALTED:
                /* if phy was suspended, bring the physical link up again */
-               phy_resume(phydev);
+               __phy_resume(phydev);
 
                /* make sure interrupts are re-enabled for the PHY */
                if (phy_interrupt_is_valid(phydev)) {
index d39ae77707ef0ac49e7df50cb0b84ab222ed6f67..ac23322a32e1ce57e5fac4d90699dcbe48219d26 100644 (file)
@@ -135,9 +135,7 @@ static int mdio_bus_phy_resume(struct device *dev)
        if (!mdio_bus_phy_may_suspend(phydev))
                goto no_resume;
 
-       mutex_lock(&phydev->lock);
        ret = phy_resume(phydev);
-       mutex_unlock(&phydev->lock);
        if (ret < 0)
                return ret;
 
@@ -376,7 +374,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
        dev->duplex = -1;
        dev->pause = 0;
        dev->asym_pause = 0;
-       dev->link = 1;
+       dev->link = 0;
        dev->interface = PHY_INTERFACE_MODE_GMII;
 
        dev->autoneg = AUTONEG_ENABLE;
@@ -1014,10 +1012,17 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
        err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj,
                                "attached_dev");
        if (!err) {
-               err = sysfs_create_link(&dev->dev.kobj, &phydev->mdio.dev.kobj,
-                                       "phydev");
-               if (err)
-                       goto error;
+               err = sysfs_create_link_nowarn(&dev->dev.kobj,
+                                              &phydev->mdio.dev.kobj,
+                                              "phydev");
+               if (err) {
+                       dev_err(&dev->dev, "could not add device link to %s err %d\n",
+                               kobject_name(&phydev->mdio.dev.kobj),
+                               err);
+                       /* non-fatal - some net drivers can use one netdevice
+                        * with more then one phy
+                        */
+               }
 
                phydev->sysfs_links = true;
        }
@@ -1041,9 +1046,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
        if (err)
                goto error;
 
-       mutex_lock(&phydev->lock);
        phy_resume(phydev);
-       mutex_unlock(&phydev->lock);
        phy_led_triggers_register(phydev);
 
        return err;
@@ -1172,7 +1175,7 @@ int phy_suspend(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_suspend);
 
-int phy_resume(struct phy_device *phydev)
+int __phy_resume(struct phy_device *phydev)
 {
        struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
        int ret = 0;
@@ -1189,6 +1192,18 @@ int phy_resume(struct phy_device *phydev)
 
        return ret;
 }
+EXPORT_SYMBOL(__phy_resume);
+
+int phy_resume(struct phy_device *phydev)
+{
+       int ret;
+
+       mutex_lock(&phydev->lock);
+       ret = __phy_resume(phydev);
+       mutex_unlock(&phydev->lock);
+
+       return ret;
+}
 EXPORT_SYMBOL(phy_resume);
 
 int phy_loopback(struct phy_device *phydev, bool enable)
@@ -1658,6 +1673,23 @@ int genphy_config_init(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(genphy_config_init);
 
+/* This is used for the phy device which doesn't support the MMD extended
+ * register access, but it does have side effect when we are trying to access
+ * the MMD register via indirect method.
+ */
+int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, u16 regnum)
+{
+       return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(genphy_read_mmd_unsupported);
+
+int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
+                                u16 regnum, u16 val)
+{
+       return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(genphy_write_mmd_unsupported);
+
 int genphy_suspend(struct phy_device *phydev)
 {
        return phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN);
index 6ac8b29b2dc3c5fcf630a8e54fc4897ae330d24b..51a011a349fee8dfbc9dc35784c67dee6c0e4b81 100644 (file)
@@ -364,7 +364,7 @@ static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_stat
 }
 
 /* Flow control is resolved according to our and the link partners
- * advertisments using the following drawn from the 802.3 specs:
+ * advertisements using the following drawn from the 802.3 specs:
  *  Local device  Link partner
  *  Pause AsymDir Pause AsymDir Result
  *    1     X       1     X     TX+RX
@@ -679,12 +679,11 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 
        mutex_lock(&phy->lock);
        mutex_lock(&pl->state_mutex);
-       pl->netdev->phydev = phy;
        pl->phydev = phy;
        linkmode_copy(pl->supported, supported);
        linkmode_copy(pl->link_config.advertising, config.advertising);
 
-       /* Restrict the phy advertisment according to the MAC support. */
+       /* Restrict the phy advertisement according to the MAC support. */
        ethtool_convert_link_mode_to_legacy_u32(&advertising, config.advertising);
        phy->advertising = advertising;
        mutex_unlock(&pl->state_mutex);
@@ -817,7 +816,6 @@ void phylink_disconnect_phy(struct phylink *pl)
        if (phy) {
                mutex_lock(&phy->lock);
                mutex_lock(&pl->state_mutex);
-               pl->netdev->phydev = NULL;
                pl->phydev = NULL;
                mutex_unlock(&pl->state_mutex);
                mutex_unlock(&phy->lock);
@@ -889,7 +887,7 @@ void phylink_start(struct phylink *pl)
 
        /* Apply the link configuration to the MAC when starting. This allows
         * a fixed-link to start with the correct parameters, and also
-        * ensures that we set the appropriate advertisment for Serdes links.
+        * ensures that we set the appropriate advertisement for Serdes links.
         */
        phylink_resolve_flow(pl, &pl->link_config);
        phylink_mac_config(pl, &pl->link_config);
@@ -1076,7 +1074,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 
        config = pl->link_config;
 
-       /* Mask out unsupported advertisments */
+       /* Mask out unsupported advertisements */
        linkmode_and(config.advertising, kset->link_modes.advertising,
                     pl->supported);
 
@@ -1121,7 +1119,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
        if (phylink_validate(pl, pl->supported, &config))
                return -EINVAL;
 
-       /* If autonegotiation is enabled, we must have an advertisment */
+       /* If autonegotiation is enabled, we must have an advertisement */
        if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
                return -EINVAL;
 
@@ -1584,25 +1582,14 @@ static int phylink_sfp_module_insert(void *upstream,
        bool changed;
        u8 port;
 
-       sfp_parse_support(pl->sfp_bus, id, support);
-       port = sfp_parse_port(pl->sfp_bus, id, support);
-       iface = sfp_parse_interface(pl->sfp_bus, id);
-
        ASSERT_RTNL();
 
-       switch (iface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-       case PHY_INTERFACE_MODE_2500BASEX:
-       case PHY_INTERFACE_MODE_10GKR:
-               break;
-       default:
-               return -EINVAL;
-       }
+       sfp_parse_support(pl->sfp_bus, id, support);
+       port = sfp_parse_port(pl->sfp_bus, id, support);
 
        memset(&config, 0, sizeof(config));
        linkmode_copy(config.advertising, support);
-       config.interface = iface;
+       config.interface = PHY_INTERFACE_MODE_NA;
        config.speed = SPEED_UNKNOWN;
        config.duplex = DUPLEX_UNKNOWN;
        config.pause = MLO_PAUSE_AN;
@@ -1610,6 +1597,22 @@ static int phylink_sfp_module_insert(void *upstream,
 
        /* Ignore errors if we're expecting a PHY to attach later */
        ret = phylink_validate(pl, support, &config);
+       if (ret) {
+               netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+                          __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+               return ret;
+       }
+
+       iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+       if (iface == PHY_INTERFACE_MODE_NA) {
+               netdev_err(pl->netdev,
+                          "selection of interface failed, advertisement %*pb\n",
+                          __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+               return -EINVAL;
+       }
+
+       config.interface = iface;
+       ret = phylink_validate(pl, support, &config);
        if (ret) {
                netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
                           phylink_an_mode_str(MLO_AN_INBAND),
index ee3ca4a2f12b415c9e532a9aaf218e89b9664cc9..9f48ecf9c62700ea815492fb91a960ba11e7b248 100644 (file)
@@ -172,6 +172,8 @@ static struct phy_driver realtek_drvs[] = {
                .flags          = PHY_HAS_INTERRUPT,
                .ack_interrupt  = &rtl821x_ack_interrupt,
                .config_intr    = &rtl8211b_config_intr,
+               .read_mmd       = &genphy_read_mmd_unsupported,
+               .write_mmd      = &genphy_write_mmd_unsupported,
        }, {
                .phy_id         = 0x001cc914,
                .name           = "RTL8211DN Gigabit Ethernet",
index 8961209ee949bbcdbc0fda0f00d23d326f99b542..3d4ff5d0d2a6db9822ddfbebb26fc43a328fba1d 100644 (file)
@@ -105,68 +105,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 }
 EXPORT_SYMBOL_GPL(sfp_parse_port);
 
-/**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-                                   const struct sfp_eeprom_id *id)
-{
-       phy_interface_t iface;
-
-       /* Setting the serdes link mode is guesswork: there's no field in
-        * the EEPROM which indicates what mode should be used.
-        *
-        * If the module wants 64b66b, then it must be >= 10G.
-        *
-        * If it's a gigabit-only fiber module, it probably does not have
-        * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
-        * to SGMII mode (which is required to support non-gigabit speeds).
-        */
-       switch (id->base.encoding) {
-       case SFP_ENCODING_8472_64B66B:
-               iface = PHY_INTERFACE_MODE_10GKR;
-               break;
-
-       case SFP_ENCODING_8B10B:
-               if (!id->base.e1000_base_t &&
-                   !id->base.e100_base_lx &&
-                   !id->base.e100_base_fx)
-                       iface = PHY_INTERFACE_MODE_1000BASEX;
-               else
-                       iface = PHY_INTERFACE_MODE_SGMII;
-               break;
-
-       default:
-               if (id->base.e1000_base_cx) {
-                       iface = PHY_INTERFACE_MODE_1000BASEX;
-                       break;
-               }
-
-               iface = PHY_INTERFACE_MODE_NA;
-               dev_err(bus->sfp_dev,
-                       "SFP module encoding does not support 8b10b nor 64b66b\n");
-               break;
-       }
-
-       return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
 /**
  * sfp_parse_support() - Parse the eeprom id for supported link modes
  * @bus: a pointer to the &struct sfp_bus structure for the sfp module
@@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
                       unsigned long *support)
 {
        unsigned int br_min, br_nom, br_max;
-
-       phylink_set(support, Autoneg);
-       phylink_set(support, Pause);
-       phylink_set(support, Asym_Pause);
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
 
        /* Decode the bitrate information to MBd */
        br_min = br_nom = br_max = 0;
@@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 
        /* Set ethtool support from the compliance fields. */
        if (id->base.e10g_base_sr)
-               phylink_set(support, 10000baseSR_Full);
+               phylink_set(modes, 10000baseSR_Full);
        if (id->base.e10g_base_lr)
-               phylink_set(support, 10000baseLR_Full);
+               phylink_set(modes, 10000baseLR_Full);
        if (id->base.e10g_base_lrm)
-               phylink_set(support, 10000baseLRM_Full);
+               phylink_set(modes, 10000baseLRM_Full);
        if (id->base.e10g_base_er)
-               phylink_set(support, 10000baseER_Full);
+               phylink_set(modes, 10000baseER_Full);
        if (id->base.e1000_base_sx ||
            id->base.e1000_base_lx ||
            id->base.e1000_base_cx)
-               phylink_set(support, 1000baseX_Full);
+               phylink_set(modes, 1000baseX_Full);
        if (id->base.e1000_base_t) {
-               phylink_set(support, 1000baseT_Half);
-               phylink_set(support, 1000baseT_Full);
+               phylink_set(modes, 1000baseT_Half);
+               phylink_set(modes, 1000baseT_Full);
        }
 
        /* 1000Base-PX or 1000Base-BX10 */
@@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
        if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
                /* This may look odd, but some manufacturers use 12000MBd */
                if (br_min <= 12000 && br_max >= 10300)
-                       phylink_set(support, 10000baseCR_Full);
+                       phylink_set(modes, 10000baseCR_Full);
                if (br_min <= 3200 && br_max >= 3100)
-                       phylink_set(support, 2500baseX_Full);
+                       phylink_set(modes, 2500baseX_Full);
                if (br_min <= 1300 && br_max >= 1200)
-                       phylink_set(support, 1000baseX_Full);
+                       phylink_set(modes, 1000baseX_Full);
        }
        if (id->base.sfp_ct_passive) {
                if (id->base.passive.sff8431_app_e)
-                       phylink_set(support, 10000baseCR_Full);
+                       phylink_set(modes, 10000baseCR_Full);
        }
        if (id->base.sfp_ct_active) {
                if (id->base.active.sff8431_app_e ||
                    id->base.active.sff8431_lim) {
-                       phylink_set(support, 10000baseCR_Full);
+                       phylink_set(modes, 10000baseCR_Full);
                }
        }
 
@@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
        case 0x00: /* Unspecified */
                break;
        case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
-               phylink_set(support, 100000baseSR4_Full);
-               phylink_set(support, 25000baseSR_Full);
+               phylink_set(modes, 100000baseSR4_Full);
+               phylink_set(modes, 25000baseSR_Full);
                break;
        case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
        case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
-               phylink_set(support, 100000baseLR4_ER4_Full);
+               phylink_set(modes, 100000baseLR4_ER4_Full);
                break;
        case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
        case 0x0c: /* 25Gbase-CR CA-S */
        case 0x0d: /* 25Gbase-CR CA-N */
-               phylink_set(support, 100000baseCR4_Full);
-               phylink_set(support, 25000baseCR_Full);
+               phylink_set(modes, 100000baseCR4_Full);
+               phylink_set(modes, 25000baseCR_Full);
                break;
        default:
                dev_warn(bus->sfp_dev,
@@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
            id->base.fc_speed_200 ||
            id->base.fc_speed_400) {
                if (id->base.br_nominal >= 31)
-                       phylink_set(support, 2500baseX_Full);
+                       phylink_set(modes, 2500baseX_Full);
                if (id->base.br_nominal >= 12)
-                       phylink_set(support, 1000baseX_Full);
+                       phylink_set(modes, 1000baseX_Full);
        }
+
+       /* If we haven't discovered any modes that this module supports, try
+        * the encoding and bitrate to determine supported modes. Some BiDi
+        * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+        * the differing wavelengths, so do not set any transceiver bits.
+        */
+       if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+               /* If the encoding and bit rate allows 1000baseX */
+               if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+                   br_min <= 1300 && br_max >= 1200)
+                       phylink_set(modes, 1000baseX_Full);
+       }
+
+       bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+       phylink_set(support, Autoneg);
+       phylink_set(support, Pause);
+       phylink_set(support, Asym_Pause);
 }
 EXPORT_SYMBOL_GPL(sfp_parse_support);
 
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+                                    const struct sfp_eeprom_id *id,
+                                    unsigned long *link_modes)
+{
+       if (phylink_test(link_modes, 10000baseCR_Full) ||
+           phylink_test(link_modes, 10000baseSR_Full) ||
+           phylink_test(link_modes, 10000baseLR_Full) ||
+           phylink_test(link_modes, 10000baseLRM_Full) ||
+           phylink_test(link_modes, 10000baseER_Full))
+               return PHY_INTERFACE_MODE_10GKR;
+
+       if (phylink_test(link_modes, 2500baseX_Full))
+               return PHY_INTERFACE_MODE_2500BASEX;
+
+       if (id->base.e1000_base_t ||
+           id->base.e100_base_lx ||
+           id->base.e100_base_fx)
+               return PHY_INTERFACE_MODE_SGMII;
+
+       if (phylink_test(link_modes, 1000baseX_Full))
+               return PHY_INTERFACE_MODE_1000BASEX;
+
+       dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+       return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
 static LIST_HEAD(sfp_buses);
 static DEFINE_MUTEX(sfp_mutex);
 
index 6c7d9289078d3d27c4e6f7c425e3ae00d5733d85..83bf4959b043cceb3c22a11bdc3d37d1e31aec69 100644 (file)
@@ -42,6 +42,7 @@ enum {
 
        SFP_MOD_EMPTY = 0,
        SFP_MOD_PROBE,
+       SFP_MOD_HPOWER,
        SFP_MOD_PRESENT,
        SFP_MOD_ERROR,
 
@@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = {
  * access the I2C EEPROM.  However, Avago modules require 300ms.
  */
 #define T_PROBE_INIT   msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL msecs_to_jiffies(300)
 #define T_PROBE_RETRY  msecs_to_jiffies(100)
 
 /* SFP modules appear to always have their PHY configured for bus address
@@ -110,10 +112,12 @@ struct sfp {
        struct sfp_bus *sfp_bus;
        struct phy_device *mod_phy;
        const struct sff_data *type;
+       u32 max_power_mW;
 
        unsigned int (*get_state)(struct sfp *);
        void (*set_state)(struct sfp *, unsigned int);
        int (*read)(struct sfp *, bool, u8, void *, size_t);
+       int (*write)(struct sfp *, bool, u8, void *, size_t);
 
        struct gpio_desc *gpio[GPIO_MAX];
 
@@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
        }
 }
 
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
-                        void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+                       size_t len)
 {
        struct i2c_msg msgs[2];
+       u8 bus_addr = a2 ? 0x51 : 0x50;
        int ret;
 
        msgs[0].addr = bus_addr;
@@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
        msgs[1].len = len;
        msgs[1].buf = buf;
 
-       ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+       ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
        if (ret < 0)
                return ret;
 
        return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
-                       size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+       size_t len)
 {
-       return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+       struct i2c_msg msgs[1];
+       u8 bus_addr = a2 ? 0x51 : 0x50;
+       int ret;
+
+       msgs[0].addr = bus_addr;
+       msgs[0].flags = 0;
+       msgs[0].len = 1 + len;
+       msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+       if (!msgs[0].buf)
+               return -ENOMEM;
+
+       msgs[0].buf[0] = dev_addr;
+       memcpy(&msgs[0].buf[1], buf, len);
+
+       ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+       kfree(msgs[0].buf);
+
+       if (ret < 0)
+               return ret;
+
+       return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
 static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
@@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
 
        sfp->i2c = i2c;
        sfp->read = sfp_i2c_read;
+       sfp->write = sfp_i2c_write;
 
        i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
        if (IS_ERR(i2c_mii))
@@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
        return sfp->read(sfp, a2, addr, buf, len);
 }
 
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+       return sfp->write(sfp, a2, addr, buf, len);
+}
+
 static unsigned int sfp_check(void *buf, size_t len)
 {
        u8 *p, check;
@@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp)
                sfp_sm_probe_phy(sfp);
 }
 
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+       u32 power;
+       u8 val;
+       int err;
+
+       power = 1000;
+       if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+               power = 1500;
+       if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+               power = 2000;
+
+       if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+           (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+           SFP_DIAGMON_DDM) {
+               /* The module appears not to implement bus address 0xa2,
+                * or requires an address change sequence, so assume that
+                * the module powers up in the indicated power mode.
+                */
+               if (power > sfp->max_power_mW) {
+                       dev_err(sfp->dev,
+                               "Host does not support %u.%uW modules\n",
+                               power / 1000, (power / 100) % 10);
+                       return -EINVAL;
+               }
+               return 0;
+       }
+
+       if (power > sfp->max_power_mW) {
+               dev_warn(sfp->dev,
+                        "Host does not support %u.%uW modules, module left in power mode 1\n",
+                        power / 1000, (power / 100) % 10);
+               return 0;
+       }
+
+       if (power <= 1000)
+               return 0;
+
+       err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+       if (err != sizeof(val)) {
+               dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+               err = -EAGAIN;
+               goto err;
+       }
+
+       val |= BIT(0);
+
+       err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+       if (err != sizeof(val)) {
+               dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+               err = -EAGAIN;
+               goto err;
+       }
+
+       dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+                power / 1000, (power / 100) % 10);
+       return T_HPOWER_LEVEL;
+
+err:
+       return err;
+}
+
 static int sfp_sm_mod_probe(struct sfp *sfp)
 {
        /* SFP module inserted - read I2C data */
        struct sfp_eeprom_id id;
        u8 check;
-       int err;
+       int ret;
 
-       err = sfp_read(sfp, false, 0, &id, sizeof(id));
-       if (err < 0) {
-               dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+       ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+       if (ret < 0) {
+               dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
                return -EAGAIN;
        }
 
-       if (err != sizeof(id)) {
-               dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+       if (ret != sizeof(id)) {
+               dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
                return -EAGAIN;
        }
 
@@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
                dev_warn(sfp->dev,
                         "module address swap to access page 0xA2 is not supported.\n");
 
-       return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+       ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+       if (ret < 0)
+               return ret;
+
+       return sfp_sm_mod_hpower(sfp);
 }
 
 static void sfp_sm_mod_remove(struct sfp *sfp)
@@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
                if (event == SFP_E_REMOVE) {
                        sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
                } else if (event == SFP_E_TIMEOUT) {
-                       int err = sfp_sm_mod_probe(sfp);
+                       int val = sfp_sm_mod_probe(sfp);
 
-                       if (err == 0)
+                       if (val == 0)
                                sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
-                       else if (err == -EAGAIN)
-                               sfp_sm_set_timer(sfp, T_PROBE_RETRY);
-                       else
+                       else if (val > 0)
+                               sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+                       else if (val != -EAGAIN)
                                sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+                       else
+                               sfp_sm_set_timer(sfp, T_PROBE_RETRY);
                }
                break;
 
+       case SFP_MOD_HPOWER:
+               if (event == SFP_E_TIMEOUT) {
+                       sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+                       break;
+               }
+               /* fallthrough */
        case SFP_MOD_PRESENT:
        case SFP_MOD_ERROR:
                if (event == SFP_E_REMOVE) {
@@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev)
        if (!(sfp->gpio[GPIO_MODDEF0]))
                sfp->get_state = sff_gpio_get_state;
 
+       device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+                                &sfp->max_power_mW);
+       if (!sfp->max_power_mW)
+               sfp->max_power_mW = 1000;
+
+       dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+                sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
        sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
        if (!sfp->sfp_bus)
                return -ENOMEM;
index fb2cef764e9ab56416ba2f2d85ddd75af08a01ed..22f3bdd8206cff3c7eb9cda24d585314ecaa3a0d 100644 (file)
@@ -34,39 +34,17 @@ MODULE_LICENSE("GPL v2");
                                MDIO_PHYXS_LNSTAT_SYNC3 | \
                                MDIO_PHYXS_LNSTAT_ALIGN)
 
-static int teranetics_config_init(struct phy_device *phydev)
-{
-       phydev->supported = SUPPORTED_10000baseT_Full;
-       phydev->advertising = SUPPORTED_10000baseT_Full;
-
-       return 0;
-}
-
-static int teranetics_soft_reset(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int teranetics_aneg_done(struct phy_device *phydev)
 {
-       int reg;
-
        /* auto negotiation state can only be checked when using copper
         * port, if using fiber port, just lie it's done.
         */
-       if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) {
-               reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-               return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-       }
+       if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93))
+               return genphy_c45_aneg_done(phydev);
 
        return 1;
 }
 
-static int teranetics_config_aneg(struct phy_device *phydev)
-{
-       return 0;
-}
-
 static int teranetics_read_status(struct phy_device *phydev)
 {
        int reg;
@@ -102,10 +80,10 @@ static struct phy_driver teranetics_driver[] = {
        .phy_id         = PHY_ID_TN2020,
        .phy_id_mask    = 0xffffffff,
        .name           = "Teranetics TN2020",
-       .soft_reset     = teranetics_soft_reset,
+       .soft_reset     = gen10g_no_soft_reset,
        .aneg_done      = teranetics_aneg_done,
-       .config_init    = teranetics_config_init,
-       .config_aneg    = teranetics_config_aneg,
+       .config_init    = gen10g_config_init,
+       .config_aneg    = gen10g_config_aneg,
        .read_status    = teranetics_read_status,
        .match_phy_device = teranetics_match_phy_device,
 },
index 255a5def56e941939e02642d0ee9868ec1fff5bd..926c2c322d436eae70fb7bd9a8cc9c831140cefb 100644 (file)
@@ -257,7 +257,7 @@ struct ppp_net {
 /* Prototypes. */
 static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
                        struct file *file, unsigned int cmd, unsigned long arg);
-static void ppp_xmit_process(struct ppp *ppp);
+static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb);
 static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
 static void ppp_push(struct ppp *ppp);
 static void ppp_channel_push(struct channel *pch);
@@ -513,13 +513,12 @@ static ssize_t ppp_write(struct file *file, const char __user *buf,
                goto out;
        }
 
-       skb_queue_tail(&pf->xq, skb);
-
        switch (pf->kind) {
        case INTERFACE:
-               ppp_xmit_process(PF_TO_PPP(pf));
+               ppp_xmit_process(PF_TO_PPP(pf), skb);
                break;
        case CHANNEL:
+               skb_queue_tail(&pf->xq, skb);
                ppp_channel_push(PF_TO_CHANNEL(pf));
                break;
        }
@@ -971,6 +970,7 @@ static struct pernet_operations ppp_net_ops = {
        .exit = ppp_exit_net,
        .id   = &ppp_net_id,
        .size = sizeof(struct ppp_net),
+       .async = true,
 };
 
 static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
@@ -1267,8 +1267,8 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
        put_unaligned_be16(proto, pp);
 
        skb_scrub_packet(skb, !net_eq(ppp->ppp_net, dev_net(dev)));
-       skb_queue_tail(&ppp->file.xq, skb);
-       ppp_xmit_process(ppp);
+       ppp_xmit_process(ppp, skb);
+
        return NETDEV_TX_OK;
 
  outf:
@@ -1420,13 +1420,14 @@ static void ppp_setup(struct net_device *dev)
  */
 
 /* Called to do any work queued up on the transmit side that can now be done */
-static void __ppp_xmit_process(struct ppp *ppp)
+static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
 {
-       struct sk_buff *skb;
-
        ppp_xmit_lock(ppp);
        if (!ppp->closing) {
                ppp_push(ppp);
+
+               if (skb)
+                       skb_queue_tail(&ppp->file.xq, skb);
                while (!ppp->xmit_pending &&
                       (skb = skb_dequeue(&ppp->file.xq)))
                        ppp_send_frame(ppp, skb);
@@ -1440,7 +1441,7 @@ static void __ppp_xmit_process(struct ppp *ppp)
        ppp_xmit_unlock(ppp);
 }
 
-static void ppp_xmit_process(struct ppp *ppp)
+static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
 {
        local_bh_disable();
 
@@ -1448,7 +1449,7 @@ static void ppp_xmit_process(struct ppp *ppp)
                goto err;
 
        (*this_cpu_ptr(ppp->xmit_recursion))++;
-       __ppp_xmit_process(ppp);
+       __ppp_xmit_process(ppp, skb);
        (*this_cpu_ptr(ppp->xmit_recursion))--;
 
        local_bh_enable();
@@ -1458,6 +1459,8 @@ static void ppp_xmit_process(struct ppp *ppp)
 err:
        local_bh_enable();
 
+       kfree_skb(skb);
+
        if (net_ratelimit())
                netdev_err(ppp->dev, "recursion detected\n");
 }
@@ -1942,7 +1945,7 @@ static void __ppp_channel_push(struct channel *pch)
        if (skb_queue_empty(&pch->file.xq)) {
                ppp = pch->ppp;
                if (ppp)
-                       __ppp_xmit_process(ppp);
+                       __ppp_xmit_process(ppp, NULL);
        }
 }
 
@@ -3161,6 +3164,15 @@ ppp_connect_channel(struct channel *pch, int unit)
                goto outl;
 
        ppp_lock(ppp);
+       spin_lock_bh(&pch->downl);
+       if (!pch->chan) {
+               /* Don't connect unregistered channels */
+               spin_unlock_bh(&pch->downl);
+               ppp_unlock(ppp);
+               ret = -ENOTCONN;
+               goto outl;
+       }
+       spin_unlock_bh(&pch->downl);
        if (pch->file.hdrlen > ppp->file.hdrlen)
                ppp->file.hdrlen = pch->file.hdrlen;
        hdrlen = pch->file.hdrlen + 2;  /* for protocol bytes */
index bd89d1c559cef5329a7b7dfa238d04d4bf54811c..c10e6181a2f0d0758eb1e43b60c19e85efd0b98a 100644 (file)
@@ -1161,6 +1161,7 @@ static struct pernet_operations pppoe_net_ops = {
        .exit = pppoe_exit_net,
        .id   = &pppoe_net_id,
        .size = sizeof(struct pppoe_net),
+       .async = true,
 };
 
 static int __init pppoe_init(void)
index a468439969df7f7166c3116bfaa543f82ae52c91..222093e878a8aacf7239c3ce75bfe9d64f3e40c7 100644 (file)
@@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port)
 }
 #endif
 
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+                              struct netlink_ext_ack *extack)
 {
        struct netdev_lag_upper_info lag_upper_info;
        int err;
 
        lag_upper_info.tx_type = team->mode->lag_tx_type;
        err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
-                                          &lag_upper_info, NULL);
+                                          &lag_upper_info, extack);
        if (err)
                return err;
        port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup);
 static int team_dev_type_check_change(struct net_device *dev,
                                      struct net_device *port_dev);
 
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+                        struct netlink_ext_ack *extack)
 {
        struct net_device *dev = team->dev;
        struct team_port *port;
@@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
        int err;
 
        if (port_dev->flags & IFF_LOOPBACK) {
+               NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
                netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
                           portname);
                return -EINVAL;
        }
 
        if (team_port_exists(port_dev)) {
+               NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
                netdev_err(dev, "Device %s is already a port "
                                "of a team device\n", portname);
                return -EBUSY;
@@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 
        if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
            vlan_uses_dev(dev)) {
+               NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
                netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
                           portname);
                return -EPERM;
@@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
                return err;
 
        if (port_dev->flags & IFF_UP) {
+               NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
                netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
                           portname);
                return -EBUSY;
@@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
                goto err_handler_register;
        }
 
-       err = team_upper_dev_link(team, port);
+       err = team_upper_dev_link(team, port, extack);
        if (err) {
                netdev_err(dev, "Device %s failed to set upper link\n",
                           portname);
@@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
        int err;
 
        mutex_lock(&team->lock);
-       err = team_port_add(team, port_dev);
+       err = team_port_add(team, port_dev, extack);
        mutex_unlock(&team->lock);
 
        if (!err)
@@ -2395,7 +2401,7 @@ static int team_nl_send_options_get(struct team *team, u32 portid, u32 seq,
        if (!nlh) {
                err = __send_and_alloc_skb(&skb, team, portid, send_func);
                if (err)
-                       goto errout;
+                       return err;
                goto send_done;
        }
 
@@ -2681,7 +2687,7 @@ static int team_nl_send_port_list_get(struct team *team, u32 portid, u32 seq,
        if (!nlh) {
                err = __send_and_alloc_skb(&skb, team, portid, send_func);
                if (err)
-                       goto errout;
+                       return err;
                goto send_done;
        }
 
index d531954512c7a331984bbf8f59910984b9333159..a1ba262f40ad0755d2cea34867851bb7acd8e2de 100644 (file)
@@ -182,7 +182,6 @@ struct tun_file {
        struct tun_struct *detached;
        struct ptr_ring tx_ring;
        struct xdp_rxq_info xdp_rxq;
-       int xdp_pending_pkts;
 };
 
 struct tun_flow_entry {
@@ -657,7 +656,7 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
        return tun;
 }
 
-static void tun_ptr_free(void *ptr)
+void tun_ptr_free(void *ptr)
 {
        if (!ptr)
                return;
@@ -669,6 +668,7 @@ static void tun_ptr_free(void *ptr)
                __skb_array_destroy_skb(ptr);
        }
 }
+EXPORT_SYMBOL_GPL(tun_ptr_free);
 
 static void tun_queue_purge(struct tun_file *tfile)
 {
@@ -1614,7 +1614,6 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
        unsigned int delta = 0;
        char *buf;
        size_t copied;
-       bool xdp_xmit = false;
        int err, pad = TUN_RX_PAD;
 
        rcu_read_lock();
@@ -1644,6 +1643,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
        else
                *skb_xdp = 0;
 
+       preempt_disable();
        rcu_read_lock();
        xdp_prog = rcu_dereference(tun->xdp_prog);
        if (xdp_prog && !*skb_xdp) {
@@ -1663,15 +1663,22 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
                case XDP_REDIRECT:
                        get_page(alloc_frag->page);
                        alloc_frag->offset += buflen;
-                       ++tfile->xdp_pending_pkts;
                        err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
+                       xdp_do_flush_map();
                        if (err)
                                goto err_redirect;
                        rcu_read_unlock();
+                       preempt_enable();
                        return NULL;
                case XDP_TX:
-                       xdp_xmit = true;
-                       /* fall through */
+                       get_page(alloc_frag->page);
+                       alloc_frag->offset += buflen;
+                       if (tun_xdp_xmit(tun->dev, &xdp))
+                               goto err_redirect;
+                       tun_xdp_flush(tun->dev);
+                       rcu_read_unlock();
+                       preempt_enable();
+                       return NULL;
                case XDP_PASS:
                        delta = orig_data - xdp.data;
                        break;
@@ -1689,6 +1696,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
        skb = build_skb(buf, buflen);
        if (!skb) {
                rcu_read_unlock();
+               preempt_enable();
                return ERR_PTR(-ENOMEM);
        }
 
@@ -1697,14 +1705,8 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
        get_page(alloc_frag->page);
        alloc_frag->offset += buflen;
 
-       if (xdp_xmit) {
-               skb->dev = tun->dev;
-               generic_xdp_tx(skb, xdp_prog);
-               rcu_read_unlock();
-               return NULL;
-       }
-
        rcu_read_unlock();
+       preempt_enable();
 
        return skb;
 
@@ -1712,6 +1714,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
        put_page(alloc_frag->page);
 err_xdp:
        rcu_read_unlock();
+       preempt_enable();
        this_cpu_inc(tun->pcpu_stats->rx_dropped);
        return NULL;
 }
@@ -1985,11 +1988,6 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
        result = tun_get_user(tun, tfile, NULL, from,
                              file->f_flags & O_NONBLOCK, false);
 
-       if (tfile->xdp_pending_pkts) {
-               tfile->xdp_pending_pkts = 0;
-               xdp_do_flush_map();
-       }
-
        tun_put(tun);
        return result;
 }
@@ -2382,13 +2380,6 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
        ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
                           m->msg_flags & MSG_DONTWAIT,
                           m->msg_flags & MSG_MORE);
-
-       if (tfile->xdp_pending_pkts >= NAPI_POLL_WEIGHT ||
-           !(m->msg_flags & MSG_MORE)) {
-               tfile->xdp_pending_pkts = 0;
-               xdp_do_flush_map();
-       }
-
        tun_put(tun);
        return ret;
 }
@@ -3231,7 +3222,6 @@ static int tun_chr_open(struct inode *inode, struct file * file)
        sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
 
        memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
-       tfile->xdp_pending_pkts = 0;
 
        return 0;
 }
index f32261ecd2150036d3575986ab2e1971def5086f..fb1b78d4b9ef945b10642e3dfa7825637c028a5a 100644 (file)
@@ -1223,7 +1223,7 @@ static int ax88179_led_setting(struct usbnet *dev)
        return 0;
 }
 
-static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
+static int ax88179_link_bind_or_reset(struct usbnet *dev, bool do_reset)
 {
        u8 buf[5];
        u16 *tmp16;
@@ -1231,12 +1231,11 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
        struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
        struct ethtool_eee eee_data;
 
-       usbnet_get_endpoints(dev, intf);
-
        tmp16 = (u16 *)buf;
        tmp = (u8 *)buf;
 
-       memset(ax179_data, 0, sizeof(*ax179_data));
+       if (!do_reset)
+               memset(ax179_data, 0, sizeof(*ax179_data));
 
        /* Power up ethernet PHY */
        *tmp16 = 0;
@@ -1249,9 +1248,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
        ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp);
        msleep(100);
 
+       if (do_reset)
+               ax88179_auto_detach(dev, 0);
+
        ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
                         ETH_ALEN, dev->net->dev_addr);
-       memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
+       if (!do_reset)
+               memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
        /* RX bulk configuration */
        memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
@@ -1266,19 +1269,21 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
        ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH,
                          1, 1, tmp);
 
-       dev->net->netdev_ops = &ax88179_netdev_ops;
-       dev->net->ethtool_ops = &ax88179_ethtool_ops;
-       dev->net->needed_headroom = 8;
-       dev->net->max_mtu = 4088;
-
-       /* Initialize MII structure */
-       dev->mii.dev = dev->net;
-       dev->mii.mdio_read = ax88179_mdio_read;
-       dev->mii.mdio_write = ax88179_mdio_write;
-       dev->mii.phy_id_mask = 0xff;
-       dev->mii.reg_num_mask = 0xff;
-       dev->mii.phy_id = 0x03;
-       dev->mii.supports_gmii = 1;
+       if (!do_reset) {
+               dev->net->netdev_ops = &ax88179_netdev_ops;
+               dev->net->ethtool_ops = &ax88179_ethtool_ops;
+               dev->net->needed_headroom = 8;
+               dev->net->max_mtu = 4088;
+
+               /* Initialize MII structure */
+               dev->mii.dev = dev->net;
+               dev->mii.mdio_read = ax88179_mdio_read;
+               dev->mii.mdio_write = ax88179_mdio_write;
+               dev->mii.phy_id_mask = 0xff;
+               dev->mii.reg_num_mask = 0xff;
+               dev->mii.phy_id = 0x03;
+               dev->mii.supports_gmii = 1;
+       }
 
        dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
                              NETIF_F_RXCSUM;
@@ -1330,6 +1335,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
        return 0;
 }
 
+static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+       usbnet_get_endpoints(dev, intf);
+
+       return ax88179_link_bind_or_reset(dev, false);
+}
+
 static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
        u16 tmp16;
@@ -1458,74 +1470,7 @@ ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 
 static int ax88179_link_reset(struct usbnet *dev)
 {
-       struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
-       u8 tmp[5], link_sts;
-       u16 mode, tmp16, delay = HZ / 10;
-       u32 tmp32 = 0x40000000;
-       unsigned long jtimeout;
-
-       jtimeout = jiffies + delay;
-       while (tmp32 & 0x40000000) {
-               mode = 0;
-               ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &mode);
-               ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2,
-                                 &ax179_data->rxctl);
-
-               /*link up, check the usb device control TX FIFO full or empty*/
-               ax88179_read_cmd(dev, 0x81, 0x8c, 0, 4, &tmp32);
-
-               if (time_after(jiffies, jtimeout))
-                       return 0;
-       }
-
-       mode = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN |
-              AX_MEDIUM_RXFLOW_CTRLEN;
-
-       ax88179_read_cmd(dev, AX_ACCESS_MAC, PHYSICAL_LINK_STATUS,
-                        1, 1, &link_sts);
-
-       ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID,
-                        GMII_PHY_PHYSR, 2, &tmp16);
-
-       if (!(tmp16 & GMII_PHY_PHYSR_LINK)) {
-               return 0;
-       } else if (GMII_PHY_PHYSR_GIGA == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
-               mode |= AX_MEDIUM_GIGAMODE | AX_MEDIUM_EN_125MHZ;
-               if (dev->net->mtu > 1500)
-                       mode |= AX_MEDIUM_JUMBO_EN;
-
-               if (link_sts & AX_USB_SS)
-                       memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
-               else if (link_sts & AX_USB_HS)
-                       memcpy(tmp, &AX88179_BULKIN_SIZE[1], 5);
-               else
-                       memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-       } else if (GMII_PHY_PHYSR_100 == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
-               mode |= AX_MEDIUM_PS;
-
-               if (link_sts & (AX_USB_SS | AX_USB_HS))
-                       memcpy(tmp, &AX88179_BULKIN_SIZE[2], 5);
-               else
-                       memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-       } else {
-               memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-       }
-
-       /* RX bulk configuration */
-       ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp);
-
-       dev->rx_urb_size = (1024 * (tmp[3] + 2));
-
-       if (tmp16 & GMII_PHY_PHYSR_FULL)
-               mode |= AX_MEDIUM_FULL_DUPLEX;
-       ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE,
-                         2, 2, &mode);
-
-       ax179_data->eee_enabled = ax88179_chk_eee(dev);
-
-       netif_carrier_on(dev->net);
-
-       return 0;
+       return ax88179_link_bind_or_reset(dev, true);
 }
 
 static int ax88179_reset(struct usbnet *dev)
@@ -1556,7 +1501,6 @@ static int ax88179_reset(struct usbnet *dev)
 
        ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN,
                         dev->net->dev_addr);
-       memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
        /* RX bulk configuration */
        memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
index f7180f8db39e15834738a28f2cd7e764062ca165..61ea4eaace5d0ae78a9613555562c248de0de977 100644 (file)
@@ -83,11 +83,8 @@ static int eem_bind(struct usbnet *dev, struct usb_interface *intf)
        int status = 0;
 
        status = usbnet_get_endpoints(dev, intf);
-       if (status < 0) {
-               usb_set_intfdata(intf, NULL);
-               usb_driver_release_interface(driver_of(intf), intf);
+       if (status < 0)
                return status;
-       }
 
        /* no jumbogram (16K) support for now */
 
index 05dca3e5c93d4baf7fb975a6fef30d3d322f1aaf..fff4b13eece29cd3d742309a0f7d57436ec22699 100644 (file)
@@ -895,6 +895,12 @@ static const struct usb_device_id  products[] = {
                                      USB_CDC_SUBCLASS_ETHERNET,
                                      USB_CDC_PROTO_NONE),
        .driver_info = (unsigned long)&wwan_info,
+}, {
+       /* Cinterion PLS8 modem by GEMALTO */
+       USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0061, USB_CLASS_COMM,
+                                     USB_CDC_SUBCLASS_ETHERNET,
+                                     USB_CDC_PROTO_NONE),
+       .driver_info = (unsigned long)&wwan_info,
 }, {
        USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET,
                        USB_CDC_PROTO_NONE),
index ce0b0b4e3a57c49b3ea72cec38e46547a39d6a9e..bd2ba365902883f1b4d66c8d83650d81f1813652 100644 (file)
@@ -114,14 +114,14 @@ kalmia_init_and_get_ethernet_addr(struct usbnet *dev, u8 *ethernet_addr)
                return -ENOMEM;
 
        memcpy(usb_buf, init_msg_1, 12);
-       status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_1)
-               / sizeof(init_msg_1[0]), usb_buf, 24);
+       status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_1),
+                                        usb_buf, 24);
        if (status != 0)
                return status;
 
        memcpy(usb_buf, init_msg_2, 12);
-       status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_2)
-               / sizeof(init_msg_2[0]), usb_buf, 28);
+       status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_2),
+                                        usb_buf, 28);
        if (status != 0)
                return status;
 
@@ -150,12 +150,8 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->rx_urb_size = dev->hard_mtu * 10; // Found as optimal after testing
 
        status = kalmia_init_and_get_ethernet_addr(dev, ethernet_addr);
-
-       if (status) {
-               usb_set_intfdata(intf, NULL);
-               usb_driver_release_interface(driver_of(intf), intf);
+       if (status)
                return status;
-       }
 
        memcpy(dev->net->dev_addr, ethernet_addr, ETH_ALEN);
 
index dbabd7ca5268a73b9b4b159199c867dd647b1d10..257916f172cdf1c9524a238415b05f9c228a9f4a 100644 (file)
@@ -157,12 +157,8 @@ static int vl600_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
                s->current_rx_buf = skb_copy_expand(skb, 0,
                                le32_to_cpup(&frame->len), GFP_ATOMIC);
-               if (!s->current_rx_buf) {
-                       netif_err(dev, ifup, dev->net, "Reserving %i bytes "
-                                       "for packet assembly failed.\n",
-                                       le32_to_cpup(&frame->len));
+               if (!s->current_rx_buf)
                        dev->net->stats.rx_errors++;
-               }
 
                return 0;
        }
index 958b2e8b90f689249abfea6c713e491300a7dc94..86f7196f9d91fbf55c791fff88687a43518d66d8 100644 (file)
@@ -1794,7 +1794,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 
                tx_data += len;
                agg->skb_len += len;
-               agg->skb_num++;
+               agg->skb_num += skb_shinfo(skb)->gso_segs ?: 1;
 
                dev_kfree_skb_any(skb);
 
index 8a22ff67b0268a588428c61c6a6211e3c6c2a02a..d9eea8cfe6cb9a3bf8d0d4ce9198af9bccf9c757 100644 (file)
@@ -315,6 +315,7 @@ static void __usbnet_status_stop_force(struct usbnet *dev)
 void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
 {
        struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64);
+       unsigned long flags;
        int     status;
 
        if (test_bit(EVENT_RX_PAUSED, &dev->flags)) {
@@ -326,10 +327,10 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
        if (skb->protocol == 0)
                skb->protocol = eth_type_trans (skb, dev->net);
 
-       u64_stats_update_begin(&stats64->syncp);
+       flags = u64_stats_update_begin_irqsave(&stats64->syncp);
        stats64->rx_packets++;
        stats64->rx_bytes += skb->len;
-       u64_stats_update_end(&stats64->syncp);
+       u64_stats_update_end_irqrestore(&stats64->syncp, flags);
 
        netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n",
                  skb->len + sizeof (struct ethhdr), skb->protocol);
@@ -1248,11 +1249,12 @@ static void tx_complete (struct urb *urb)
 
        if (urb->status == 0) {
                struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->stats64);
+               unsigned long flags;
 
-               u64_stats_update_begin(&stats64->syncp);
+               flags = u64_stats_update_begin_irqsave(&stats64->syncp);
                stats64->tx_packets += entry->packets;
                stats64->tx_bytes += entry->length;
-               u64_stats_update_end(&stats64->syncp);
+               u64_stats_update_end_irqrestore(&stats64->syncp, flags);
        } else {
                dev->net->stats.tx_errors++;
 
index 9bb9e562b8934d33f65592db8077e5fa720f6cda..7b187ec7411ec053d9825b3774fe4036ea4e59f8 100644 (file)
@@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
        page_off += *len;
 
        while (--*num_buf) {
+               int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
                unsigned int buflen;
                void *buf;
                int off;
@@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
                /* guard against a misconfigured or uncooperative backend that
                 * is sending packet larger than the MTU.
                 */
-               if ((page_off + buflen) > PAGE_SIZE) {
+               if ((page_off + buflen + tailroom) > PAGE_SIZE) {
                        put_page(p);
                        goto err_buf;
                }
@@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        unsigned int truesize;
        unsigned int headroom = mergeable_ctx_to_headroom(ctx);
        bool sent;
+       int err;
 
        head_skb = NULL;
 
@@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                void *data;
                u32 act;
 
-               /* This happens when rx buffer size is underestimated */
+               /* This happens when rx buffer size is underestimated
+                * or headroom is not enough because of the buffer
+                * was refilled before XDP is set. This should only
+                * happen for the first several packets, so we don't
+                * care much about its performance.
+                */
                if (unlikely(num_buf > 1 ||
                             headroom < virtnet_get_headroom(vi))) {
                        /* linearize data for XDP */
@@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 
                act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
-               if (act != XDP_PASS)
-                       ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
-
                switch (act) {
                case XDP_PASS:
                        /* recalculate offset to account for any header
@@ -770,6 +774,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                                goto err_xdp;
                        rcu_read_unlock();
                        goto xdp_xmit;
+               case XDP_REDIRECT:
+                       err = xdp_do_redirect(dev, &xdp, xdp_prog);
+                       if (err) {
+                               if (unlikely(xdp_page != page))
+                                       put_page(xdp_page);
+                               goto err_xdp;
+                       }
+                       *xdp_xmit = true;
+                       if (unlikely(xdp_page != page))
+                               goto err_xdp;
+                       rcu_read_unlock();
+                       goto xdp_xmit;
                default:
                        bpf_warn_invalid_xdp_action(act);
                case XDP_ABORTED:
@@ -1013,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 }
 
 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
-                                         struct ewma_pkt_len *avg_pkt_len)
+                                         struct ewma_pkt_len *avg_pkt_len,
+                                         unsigned int room)
 {
        const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
        unsigned int len;
 
-       len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
+       if (room)
+               return PAGE_SIZE - room;
+
+       len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
                                rq->min_buf_len, PAGE_SIZE - hdr_len);
+
        return ALIGN(len, L1_CACHE_BYTES);
 }
 
@@ -1028,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 {
        struct page_frag *alloc_frag = &rq->alloc_frag;
        unsigned int headroom = virtnet_get_headroom(vi);
+       unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+       unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
        char *buf;
        void *ctx;
        int err;
        unsigned int len, hole;
 
-       len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
-       if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
+       /* Extra tailroom is needed to satisfy XDP's assumption. This
+        * means rx frags coalescing won't work, but consider we've
+        * disabled GSO for XDP, it won't be a big issue.
+        */
+       len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+       if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
                return -ENOMEM;
 
        buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
        buf += headroom; /* advance address leaving hole at front of pkt */
        get_page(alloc_frag->page);
-       alloc_frag->offset += len + headroom;
+       alloc_frag->offset += len + room;
        hole = alloc_frag->size - alloc_frag->offset;
-       if (hole < len + headroom) {
+       if (hole < len + room) {
                /* To avoid internal fragmentation, if there is very likely not
                 * enough space for another buffer, add the remaining space to
                 * the current buffer.
@@ -2185,8 +2212,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
        }
 
        /* Make sure NAPI is not using any XDP TX queues for RX. */
-       for (i = 0; i < vi->max_queue_pairs; i++)
-               napi_disable(&vi->rq[i].napi);
+       if (netif_running(dev))
+               for (i = 0; i < vi->max_queue_pairs; i++)
+                       napi_disable(&vi->rq[i].napi);
 
        netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
        err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
@@ -2205,7 +2233,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
                }
                if (old_prog)
                        bpf_prog_put(old_prog);
-               virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+               if (netif_running(dev))
+                       virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
        }
 
        return 0;
@@ -2576,12 +2605,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
 {
        struct virtnet_info *vi = netdev_priv(queue->dev);
        unsigned int queue_index = get_netdev_rx_queue_index(queue);
+       unsigned int headroom = virtnet_get_headroom(vi);
+       unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
        struct ewma_pkt_len *avg;
 
        BUG_ON(queue_index >= vi->max_queue_pairs);
        avg = &vi->rq[queue_index].mrg_avg_pkt_len;
        return sprintf(buf, "%u\n",
-                      get_mergeable_buf_len(&vi->rq[queue_index], avg));
+                      get_mergeable_buf_len(&vi->rq[queue_index], avg,
+                                      SKB_DATA_ALIGN(headroom + tailroom)));
 }
 
 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
@@ -2825,8 +2857,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 
        /* Assume link up if device can't report link status,
           otherwise get link status from config. */
+       netif_carrier_off(dev);
        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
-               netif_carrier_off(dev);
                schedule_work(&vi->config_work);
        } else {
                vi->status = VIRTIO_NET_S_LINK_UP;
index 8b39c160743d41170c2e64bdd36c45f98c7df051..e04937f44f33313eb3eebf3bc1ea598686965950 100644 (file)
@@ -977,6 +977,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 {
        int ret;
        u32 count;
+       int num_pkts;
+       int tx_num_deferred;
        unsigned long flags;
        struct vmxnet3_tx_ctx ctx;
        union Vmxnet3_GenericDesc *gdesc;
@@ -1075,12 +1077,12 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 #else
        gdesc = ctx.sop_txd;
 #endif
+       tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred);
        if (ctx.mss) {
                gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
                gdesc->txd.om = VMXNET3_OM_TSO;
                gdesc->txd.msscof = ctx.mss;
-               le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
-                            gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
+               num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss;
        } else {
                if (skb->ip_summed == CHECKSUM_PARTIAL) {
                        gdesc->txd.hlen = ctx.eth_ip_hdr_size;
@@ -1091,8 +1093,10 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
                        gdesc->txd.om = 0;
                        gdesc->txd.msscof = 0;
                }
-               le32_add_cpu(&tq->shared->txNumDeferred, 1);
+               num_pkts = 1;
        }
+       le32_add_cpu(&tq->shared->txNumDeferred, num_pkts);
+       tx_num_deferred += num_pkts;
 
        if (skb_vlan_tag_present(skb)) {
                gdesc->txd.ti = 1;
@@ -1118,8 +1122,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 
        spin_unlock_irqrestore(&tq->tx_lock, flags);
 
-       if (le32_to_cpu(tq->shared->txNumDeferred) >=
-                                       le32_to_cpu(tq->shared->txThreshold)) {
+       if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) {
                tq->shared->txNumDeferred = 0;
                VMXNET3_WRITE_BAR0_REG(adapter,
                                       VMXNET3_REG_TXPROD + tq->qid * 8,
@@ -1470,7 +1473,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                        vmxnet3_rx_csum(adapter, skb,
                                        (union Vmxnet3_GenericDesc *)rcd);
                        skb->protocol = eth_type_trans(skb, adapter->netdev);
-                       if (!rcd->tcp || !adapter->lro)
+                       if (!rcd->tcp ||
+                           !(adapter->netdev->features & NETIF_F_LRO))
                                goto not_lro;
 
                        if (segCnt != 0 && mss != 0) {
index 5ba222920e8009d39e73d78ab4132ade53b3c2dd..59ec34052a651ee831bbcbacee467965c5aaba36 100644 (file)
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.11.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.13.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040b00
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040d00
 
 #if defined(CONFIG_PCI_MSI)
        /* RSS only makes sense if MSI-X is supported. */
@@ -342,9 +342,6 @@ struct vmxnet3_adapter {
        u8                      __iomem *hw_addr1; /* for BAR 1 */
        u8                              version;
 
-       bool                            rxcsum;
-       bool                            lro;
-
 #ifdef VMXNET3_RSS
        struct UPT1_RSSConf             *rss_conf;
        bool                            rss;
index 9ce0182223a0a659af0e0ae96797fad80e28cd24..c6be49d3a9ebdae8b547d330b97f60a9ade0a631 100644 (file)
@@ -941,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
                                             const struct net_device *dev,
                                             struct flowi6 *fl6,
                                             int ifindex,
+                                            const struct sk_buff *skb,
                                             int flags)
 {
        struct net_vrf *vrf = netdev_priv(dev);
@@ -959,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
        if (!table)
                return NULL;
 
-       return ip6_pol_route(net, table, ifindex, fl6, flags);
+       return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
 }
 
 static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
@@ -977,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
        struct net *net = dev_net(vrf_dev);
        struct rt6_info *rt6;
 
-       rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+       rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
                                   RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
        if (unlikely(!rt6))
                return;
@@ -1110,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
        if (!ipv6_addr_any(&fl6->saddr))
                flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-       rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+       rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
        if (rt)
                dst = &rt->dst;
 
@@ -1434,6 +1435,7 @@ static struct pernet_operations vrf_net_ops __net_initdata = {
        .init = vrf_netns_init,
        .id   = &vrf_net_id,
        .size = sizeof(bool),
+       .async = true,
 };
 
 static int __init vrf_init_module(void)
index fab7a4db249efa6921b35115e0662fff7d1b7695..aa5f034d6ad157917c5f247b9e07f95afb0bbd0d 100644 (file)
@@ -3752,6 +3752,7 @@ static struct pernet_operations vxlan_net_ops = {
        .exit_batch = vxlan_exit_batch_net,
        .id   = &vxlan_net_id,
        .size = sizeof(struct vxlan_net),
+       .async = true,
 };
 
 static int __init vxlan_init_module(void)
index afeca6bcdade60a45fb6588cb69e69e32014cc7f..ab8b3cbbb205cc42f2bea15259bee0b77c2ff055 100644 (file)
@@ -574,7 +574,10 @@ static void ppp_timer(struct timer_list *t)
                        ppp_cp_event(proto->dev, proto->pid, TO_GOOD, 0, 0,
                                     0, NULL);
                        proto->restart_counter--;
-               } else
+               } else if (netif_carrier_ok(proto->dev))
+                       ppp_cp_event(proto->dev, proto->pid, TO_GOOD, 0, 0,
+                                    0, NULL);
+               else
                        ppp_cp_event(proto->dev, proto->pid, TO_BAD, 0, 0,
                                     0, NULL);
                break;
index b78ee676e1029fc21d5ed189b118f43b62aac631..5b64bda7d9e71969c535b93d983235e7f5530ceb 100644 (file)
@@ -263,9 +263,6 @@ struct sk_buff *i2400mu_rx(struct i2400mu *i2400mu, struct sk_buff *rx_skb)
                new_skb = skb_copy_expand(rx_skb, 0, rx_size - rx_skb->len,
                                          GFP_KERNEL);
                if (new_skb == NULL) {
-                       if (printk_ratelimit())
-                               dev_err(dev, "RX: Can't reallocate skb to %d; "
-                                       "RX dropped\n", rx_size);
                        kfree_skb(rx_skb);
                        rx_skb = NULL;
                        goto out;       /* drop it...*/
index d5a2dc728078ba528d4c47b7ca2fbe694c05dbe6..9317367e37f0c4234753288bc9fadddb0f042672 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ADMTEK
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_ADMTEK
index 44b2470af81d79166f72a458d6a20056b8a84839..82ab7c33cf979706efdc9489142f9daf17032ed5 100644 (file)
@@ -8,8 +8,8 @@ config WLAN_VENDOR_ATH
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
          For more information and documentation on this module you can visit:
index e89e5ef2c2a4edd467ee842ce7316031f2e1c9d2..f246e9ed4a814d783f2543e02d0ff6b4dfec1a02 100644 (file)
@@ -729,6 +729,7 @@ static void ath9k_set_hw_capab(struct ath9k_htc_priv *priv,
        ieee80211_hw_set(hw, SPECTRUM_MGMT);
        ieee80211_hw_set(hw, SIGNAL_DBM);
        ieee80211_hw_set(hw, AMPDU_AGGREGATION);
+       ieee80211_hw_set(hw, DOESNT_SUPPORT_QOS_NDP);
 
        if (ath9k_ps_enable)
                ieee80211_hw_set(hw, SUPPORTS_PS);
index a43cfd1632543eda1939f281839abb2102dc75b3..3e684f8c1f93ba168f53b6c26286ad25ff1ed873 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ATMEL
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_ATMEL
index d3651ceb5046c2c6efc9b1a5144624dbcbb7ebb8..eebe2864835f95ec9bd3818e9a62e08825e2c9a4 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_BROADCOM
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_BROADCOM
index 0b76a615708e1856cb2643577a2a2261785f4bab..0b90a63bdeb142436ac8ec6a12c8f355e314e99d 100644 (file)
@@ -253,7 +253,6 @@ void brcmf_dev_reset(struct device *dev);
 /* Configure the "global" bus state used by upper layers */
 void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state);
 
-int brcmf_bus_started(struct device *dev);
 s32 brcmf_iovar_data_set(struct device *dev, char *name, void *data, u32 len);
 void brcmf_bus_add_txhdrlen(struct device *dev, uint len);
 
index 15fa00d79fc66bb7eb7d7c770c6980ee45333355..74a83020c0735142e6127e990433cfd07b17ded3 100644 (file)
@@ -5124,6 +5124,9 @@ static int brcmf_cfg80211_set_pmk(struct wiphy *wiphy, struct net_device *dev,
        if (WARN_ON(ifp->vif->profile.use_fwsup != BRCMF_PROFILE_FWSUP_1X))
                return -EINVAL;
 
+       if (conf->pmk_len > BRCMF_WSEC_MAX_PSK_LEN)
+               return -ERANGE;
+
        return brcmf_set_pmk(ifp, conf->pmk, conf->pmk_len);
 }
 
index 9be0b051066a23214a2b29a0cac18ef64548427e..70ef9835b647ecd6fc192dceabf2f6bc8137ed1f 100644 (file)
@@ -365,9 +365,6 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
 
        /* Enable tx beamforming, errors can be ignored (not supported) */
        (void)brcmf_fil_iovar_int_set(ifp, "txbf", 1);
-
-       /* do bus specific preinit here */
-       err = brcmf_bus_preinit(ifp->drvr->bus_if);
 done:
        return err;
 }
index 930e423f83a86803e02c9e7795a7f4cfc013fa43..19048526b4af6cc672672fe7b31e43432063943b 100644 (file)
@@ -914,55 +914,6 @@ static int brcmf_inet6addr_changed(struct notifier_block *nb,
 }
 #endif
 
-int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
-{
-       struct brcmf_pub *drvr = NULL;
-       int ret = 0;
-       int i;
-
-       brcmf_dbg(TRACE, "Enter\n");
-
-       /* Allocate primary brcmf_info */
-       drvr = kzalloc(sizeof(struct brcmf_pub), GFP_ATOMIC);
-       if (!drvr)
-               return -ENOMEM;
-
-       for (i = 0; i < ARRAY_SIZE(drvr->if2bss); i++)
-               drvr->if2bss[i] = BRCMF_BSSIDX_INVALID;
-
-       mutex_init(&drvr->proto_block);
-
-       /* Link to bus module */
-       drvr->hdrlen = 0;
-       drvr->bus_if = dev_get_drvdata(dev);
-       drvr->bus_if->drvr = drvr;
-       drvr->settings = settings;
-
-       /* attach debug facilities */
-       brcmf_debug_attach(drvr);
-
-       /* Attach and link in the protocol */
-       ret = brcmf_proto_attach(drvr);
-       if (ret != 0) {
-               brcmf_err("brcmf_prot_attach failed\n");
-               goto fail;
-       }
-
-       /* Attach to events important for core code */
-       brcmf_fweh_register(drvr, BRCMF_E_PSM_WATCHDOG,
-                           brcmf_psm_watchdog_notify);
-
-       /* attach firmware event handler */
-       brcmf_fweh_attach(drvr);
-
-       return ret;
-
-fail:
-       brcmf_detach(dev);
-
-       return ret;
-}
-
 static int brcmf_revinfo_read(struct seq_file *s, void *data)
 {
        struct brcmf_bus *bus_if = dev_get_drvdata(s->private);
@@ -993,11 +944,10 @@ static int brcmf_revinfo_read(struct seq_file *s, void *data)
        return 0;
 }
 
-int brcmf_bus_started(struct device *dev)
+static int brcmf_bus_started(struct brcmf_pub *drvr)
 {
        int ret = -1;
-       struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-       struct brcmf_pub *drvr = bus_if->drvr;
+       struct brcmf_bus *bus_if = drvr->bus_if;
        struct brcmf_if *ifp;
        struct brcmf_if *p2p_ifp;
 
@@ -1013,6 +963,11 @@ int brcmf_bus_started(struct device *dev)
        /* signal bus ready */
        brcmf_bus_change_state(bus_if, BRCMF_BUS_UP);
 
+       /* do bus specific preinit here */
+       ret = brcmf_bus_preinit(bus_if);
+       if (ret < 0)
+               goto fail;
+
        /* Bus is ready, do any initialization */
        ret = brcmf_c_preinit_dcmds(ifp);
        if (ret < 0)
@@ -1088,6 +1043,60 @@ int brcmf_bus_started(struct device *dev)
        return ret;
 }
 
+int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
+{
+       struct brcmf_pub *drvr = NULL;
+       int ret = 0;
+       int i;
+
+       brcmf_dbg(TRACE, "Enter\n");
+
+       /* Allocate primary brcmf_info */
+       drvr = kzalloc(sizeof(*drvr), GFP_ATOMIC);
+       if (!drvr)
+               return -ENOMEM;
+
+       for (i = 0; i < ARRAY_SIZE(drvr->if2bss); i++)
+               drvr->if2bss[i] = BRCMF_BSSIDX_INVALID;
+
+       mutex_init(&drvr->proto_block);
+
+       /* Link to bus module */
+       drvr->hdrlen = 0;
+       drvr->bus_if = dev_get_drvdata(dev);
+       drvr->bus_if->drvr = drvr;
+       drvr->settings = settings;
+
+       /* attach debug facilities */
+       brcmf_debug_attach(drvr);
+
+       /* Attach and link in the protocol */
+       ret = brcmf_proto_attach(drvr);
+       if (ret != 0) {
+               brcmf_err("brcmf_prot_attach failed\n");
+               goto fail;
+       }
+
+       /* Attach to events important for core code */
+       brcmf_fweh_register(drvr, BRCMF_E_PSM_WATCHDOG,
+                           brcmf_psm_watchdog_notify);
+
+       /* attach firmware event handler */
+       brcmf_fweh_attach(drvr);
+
+       ret = brcmf_bus_started(drvr);
+       if (ret != 0) {
+               brcmf_err("dongle is not responding: err=%d\n", ret);
+               goto fail;
+       }
+       return 0;
+
+fail:
+       brcmf_detach(dev);
+
+       return ret;
+}
+
 void brcmf_bus_add_txhdrlen(struct device *dev, uint len)
 {
        struct brcmf_bus *bus_if = dev_get_drvdata(dev);
@@ -1185,6 +1194,12 @@ void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state)
        int ifidx;
 
        brcmf_dbg(TRACE, "%d -> %d\n", bus->state, state);
+
+       if (!drvr) {
+               brcmf_dbg(INFO, "ignoring transition, bus not attached yet\n");
+               return;
+       }
+
        bus->state = state;
 
        if (state == BRCMF_BUS_UP) {
index df8a1ecb99241588384fa4a8503822137e12b39b..232dcbb8331111e362a78c0042580e67e6233b6c 100644 (file)
@@ -181,6 +181,7 @@ enum brcmf_netif_stop_reason {
  * @netif_stop_lock: spinlock for update netif_stop from multiple sources.
  * @pend_8021x_cnt: tracks outstanding number of 802.1x frames.
  * @pend_8021x_wait: used for signalling change in count.
+ * @fwil_fwerr: flag indicating fwil layer should return firmware error codes.
  */
 struct brcmf_if {
        struct brcmf_pub *drvr;
@@ -198,6 +199,7 @@ struct brcmf_if {
        wait_queue_head_t pend_8021x_wait;
        struct in6_addr ipv6_addr_tbl[NDOL_MAX_ENTRIES];
        u8 ipv6addr_idx;
+       bool fwil_fwerr;
 };
 
 int brcmf_netdev_wait_pend8021x(struct brcmf_if *ifp);
index 47de35a338532f65f4817b11b98b1f51b2c4344f..bede7b7fd9962c01a62eb2e8f43f6c281ce0d976 100644 (file)
@@ -104,6 +104,9 @@ static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp,
        u32 data;
        int err;
 
+       /* we need to know firmware error */
+       ifp->fwil_fwerr = true;
+
        err = brcmf_fil_iovar_int_get(ifp, name, &data);
        if (err == 0) {
                brcmf_dbg(INFO, "enabling feature: %s\n", brcmf_feat_names[id]);
@@ -112,6 +115,8 @@ static void brcmf_feat_iovar_int_get(struct brcmf_if *ifp,
                brcmf_dbg(TRACE, "%s feature check failed: %d\n",
                          brcmf_feat_names[id], err);
        }
+
+       ifp->fwil_fwerr = false;
 }
 
 static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp,
@@ -120,6 +125,9 @@ static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp,
 {
        int err;
 
+       /* we need to know firmware error */
+       ifp->fwil_fwerr = true;
+
        err = brcmf_fil_iovar_data_set(ifp, name, data, len);
        if (err != -BRCMF_FW_UNSUPPORTED) {
                brcmf_dbg(INFO, "enabling feature: %s\n", brcmf_feat_names[id]);
@@ -128,6 +136,8 @@ static void brcmf_feat_iovar_data_set(struct brcmf_if *ifp,
                brcmf_dbg(TRACE, "%s feature check failed: %d\n",
                          brcmf_feat_names[id], err);
        }
+
+       ifp->fwil_fwerr = false;
 }
 
 #define MAX_CAPS_BUFFER_SIZE   512
index f2cfdd3b2bf1a252f75240f393e2d462b8c3cfb0..fc5751116d99bf89f46769eb57bee290f6ae35c8 100644 (file)
@@ -131,6 +131,9 @@ brcmf_fil_cmd_data(struct brcmf_if *ifp, u32 cmd, void *data, u32 len, bool set)
                          brcmf_fil_get_errstr((u32)(-fwerr)), fwerr);
                err = -EBADE;
        }
+       if (ifp->fwil_fwerr)
+               return fwerr;
+
        return err;
 }
 
index 2ee54133efa1c05a3f30aacb1491cde4d270dfef..82064e90978497cce2ce98759cf099829a757bf1 100644 (file)
@@ -462,25 +462,23 @@ static int brcmf_p2p_set_firmware(struct brcmf_if *ifp, u8 *p2p_mac)
  * @dev_addr: optional device address.
  *
  * P2P needs mac addresses for P2P device and interface. If no device
- * address it specified, these are derived from the primary net device, ie.
- * the permanent ethernet address of the device.
+ * address it specified, these are derived from a random ethernet
+ * address.
  */
 static void brcmf_p2p_generate_bss_mac(struct brcmf_p2p_info *p2p, u8 *dev_addr)
 {
-       struct brcmf_if *pri_ifp = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif->ifp;
-       bool local_admin = false;
+       bool random_addr = false;
 
-       if (!dev_addr || is_zero_ether_addr(dev_addr)) {
-               dev_addr = pri_ifp->mac_addr;
-               local_admin = true;
-       }
+       if (!dev_addr || is_zero_ether_addr(dev_addr))
+               random_addr = true;
 
-       /* Generate the P2P Device Address.  This consists of the device's
-        * primary MAC address with the locally administered bit set.
+       /* Generate the P2P Device Address obtaining a random ethernet
+        * address with the locally administered bit set.
         */
-       memcpy(p2p->dev_addr, dev_addr, ETH_ALEN);
-       if (local_admin)
-               p2p->dev_addr[0] |= 0x02;
+       if (random_addr)
+               eth_random_addr(p2p->dev_addr);
+       else
+               memcpy(p2p->dev_addr, dev_addr, ETH_ALEN);
 
        /* Generate the P2P Interface Address.  If the discovery and connection
         * BSSCFGs need to simultaneously co-exist, then this address must be
index 8752707557bf3c0a37dde088e82a57fd1e27e39d..a7d827ce1684a0908979a6389abecb840a242974 100644 (file)
@@ -1581,24 +1581,6 @@ static void brcmf_pcie_release_resource(struct brcmf_pciedev_info *devinfo)
 }
 
 
-static int brcmf_pcie_attach_bus(struct brcmf_pciedev_info *devinfo)
-{
-       int ret;
-
-       /* Attach to the common driver interface */
-       ret = brcmf_attach(&devinfo->pdev->dev, devinfo->settings);
-       if (ret) {
-               brcmf_err("brcmf_attach failed\n");
-       } else {
-               ret = brcmf_bus_started(&devinfo->pdev->dev);
-               if (ret)
-                       brcmf_err("dongle is not responding\n");
-       }
-
-       return ret;
-}
-
-
 static u32 brcmf_pcie_buscore_prep_addr(const struct pci_dev *pdev, u32 addr)
 {
        u32 ret_addr;
@@ -1735,7 +1717,7 @@ static void brcmf_pcie_setup(struct device *dev, int ret,
        init_waitqueue_head(&devinfo->mbdata_resp_wait);
 
        brcmf_pcie_intr_enable(devinfo);
-       if (brcmf_pcie_attach_bus(devinfo) == 0)
+       if (brcmf_attach(&devinfo->pdev->dev, devinfo->settings) == 0)
                return;
 
        brcmf_pcie_bus_console_read(devinfo);
index 08686147b59d5ed4e14c76e71eb077aa70712733..4a6459a429ec76f99a113ffe58ae83c4833ad4b7 100644 (file)
@@ -1706,8 +1706,7 @@ brcmf_sdio_read_control(struct brcmf_sdio *bus, u8 *hdr, uint len, uint doff)
        u8 *buf = NULL, *rbuf;
        int sdret;
 
-       brcmf_dbg(TRACE, "Enter\n");
-
+       brcmf_dbg(SDIO, "Enter\n");
        if (bus->rxblen)
                buf = vzalloc(bus->rxblen);
        if (!buf)
@@ -1810,7 +1809,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes)
        struct brcmf_sdio_hdrinfo *rd = &bus->cur_read, rd_new;
        u8 head_read = 0;
 
-       brcmf_dbg(TRACE, "Enter\n");
+       brcmf_dbg(SDIO, "Enter\n");
 
        /* Not finished unless we encounter no more frames indication */
        bus->rxpending = true;
@@ -2345,7 +2344,7 @@ static int brcmf_sdio_tx_ctrlframe(struct brcmf_sdio *bus, u8 *frame, u16 len)
        struct brcmf_sdio_hdrinfo hd_info = {0};
        int ret;
 
-       brcmf_dbg(TRACE, "Enter\n");
+       brcmf_dbg(SDIO, "Enter\n");
 
        /* Back the pointer to make room for bus header */
        frame -= bus->tx_hdrlen;
@@ -2521,7 +2520,7 @@ static void brcmf_sdio_dpc(struct brcmf_sdio *bus)
        uint framecnt;                  /* Temporary counter of tx/rx frames */
        int err = 0;
 
-       brcmf_dbg(TRACE, "Enter\n");
+       brcmf_dbg(SDIO, "Enter\n");
 
        sdio_claim_host(bus->sdiodev->func1);
 
@@ -2606,7 +2605,7 @@ static void brcmf_sdio_dpc(struct brcmf_sdio *bus)
 
        /* Would be active due to wake-wlan in gSPI */
        if (intstatus & I_CHIPACTIVE) {
-               brcmf_dbg(INFO, "Dongle reports CHIPACTIVE\n");
+               brcmf_dbg(SDIO, "Dongle reports CHIPACTIVE\n");
                intstatus &= ~I_CHIPACTIVE;
        }
 
@@ -3411,6 +3410,20 @@ static int brcmf_sdio_bus_preinit(struct device *dev)
        u32 value;
        int err;
 
+       /* maxctl provided by common layer */
+       if (WARN_ON(!bus_if->maxctl))
+               return -EINVAL;
+
+       /* Allocate control receive buffer */
+       bus_if->maxctl += bus->roundup;
+       value = roundup((bus_if->maxctl + SDPCM_HDRLEN), ALIGNMENT);
+       value += bus->head_align;
+       bus->rxbuf = kmalloc(value, GFP_ATOMIC);
+       if (bus->rxbuf)
+               bus->rxblen = value;
+
+       brcmf_sdio_debugfs_create(bus);
+
        /* the commands below use the terms tx and rx from
         * a device perspective, ie. bus:txglom affects the
         * bus transfers from device to host.
@@ -4026,9 +4039,8 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
                                         void *nvram, u32 nvram_len)
 {
        struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-       struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
-       struct brcmf_sdio *bus = sdiodev->bus;
-       struct brcmf_sdio_dev *sdiod = bus->sdiodev;
+       struct brcmf_sdio_dev *sdiod = bus_if->bus_priv.sdio;
+       struct brcmf_sdio *bus = sdiod->bus;
        struct brcmf_core *core = bus->sdio_core;
        u8 saveclk;
 
@@ -4037,9 +4049,6 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
        if (err)
                goto fail;
 
-       if (!bus_if->drvr)
-               return;
-
        /* try to download image and nvram to the dongle */
        bus->alp_only = true;
        err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len);
@@ -4051,7 +4060,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
        bus->sdcnt.tickcnt = 0;
        brcmf_sdio_wd_timer(bus, true);
 
-       sdio_claim_host(sdiodev->func1);
+       sdio_claim_host(sdiod->func1);
 
        /* Make sure backplane clock is on, needed to generate F2 interrupt */
        brcmf_sdio_clkctl(bus, CLK_AVAIL, false);
@@ -4059,9 +4068,9 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
                goto release;
 
        /* Force clocks on backplane to be sure F2 interrupt propagates */
-       saveclk = brcmf_sdiod_readb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR, &err);
+       saveclk = brcmf_sdiod_readb(sdiod, SBSDIO_FUNC1_CHIPCLKCSR, &err);
        if (!err) {
-               brcmf_sdiod_writeb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
+               brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_CHIPCLKCSR,
                                   (saveclk | SBSDIO_FORCE_HT), &err);
        }
        if (err) {
@@ -4073,7 +4082,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
        brcmf_sdiod_writel(sdiod, core->base + SD_REG(tosbmailboxdata),
                           SDPCM_PROT_VERSION << SMB_DATA_VERSION_SHIFT, NULL);
 
-       err = sdio_enable_func(sdiodev->func2);
+       err = sdio_enable_func(sdiod->func2);
 
        brcmf_dbg(INFO, "enable F2: err=%d\n", err);
 
@@ -4085,10 +4094,10 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
                                   bus->hostintmask, NULL);
 
 
-               brcmf_sdiod_writeb(sdiodev, SBSDIO_WATERMARK, 8, &err);
+               brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK, 8, &err);
        } else {
                /* Disable F2 again */
-               sdio_disable_func(sdiodev->func2);
+               sdio_disable_func(sdiod->func2);
                goto release;
        }
 
@@ -4096,7 +4105,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
                brcmf_sdio_sr_init(bus);
        } else {
                /* Restore previous clock setting */
-               brcmf_sdiod_writeb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
+               brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_CHIPCLKCSR,
                                   saveclk, &err);
        }
 
@@ -4104,7 +4113,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
                /* Allow full data communication using DPC from now on. */
                brcmf_sdiod_change_state(bus->sdiodev, BRCMF_SDIOD_DATA);
 
-               err = brcmf_sdiod_intr_register(sdiodev);
+               err = brcmf_sdiod_intr_register(sdiod);
                if (err != 0)
                        brcmf_err("intr register failed:%d\n", err);
        }
@@ -4113,20 +4122,29 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
        if (err != 0)
                brcmf_sdio_clkctl(bus, CLK_NONE, false);
 
-       sdio_release_host(sdiodev->func1);
+       sdio_release_host(sdiod->func1);
 
-       err = brcmf_bus_started(dev);
+       /* Assign bus interface call back */
+       sdiod->bus_if->dev = sdiod->dev;
+       sdiod->bus_if->ops = &brcmf_sdio_bus_ops;
+       sdiod->bus_if->chip = bus->ci->chip;
+       sdiod->bus_if->chiprev = bus->ci->chiprev;
+
+       /* Attach to the common layer, reserve hdr space */
+       err = brcmf_attach(sdiod->dev, sdiod->settings);
        if (err != 0) {
-               brcmf_err("dongle is not responding\n");
+               brcmf_err("brcmf_attach failed\n");
                goto fail;
        }
+
+       /* ready */
        return;
 
 release:
-       sdio_release_host(sdiodev->func1);
+       sdio_release_host(sdiod->func1);
 fail:
        brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
-       device_release_driver(&sdiodev->func2->dev);
+       device_release_driver(&sdiod->func2->dev);
        device_release_driver(dev);
 }
 
@@ -4188,39 +4206,13 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
        bus->dpc_triggered = false;
        bus->dpc_running = false;
 
-       /* Assign bus interface call back */
-       bus->sdiodev->bus_if->dev = bus->sdiodev->dev;
-       bus->sdiodev->bus_if->ops = &brcmf_sdio_bus_ops;
-       bus->sdiodev->bus_if->chip = bus->ci->chip;
-       bus->sdiodev->bus_if->chiprev = bus->ci->chiprev;
-
        /* default sdio bus header length for tx packet */
        bus->tx_hdrlen = SDPCM_HWHDR_LEN + SDPCM_SWHDR_LEN;
 
-       /* Attach to the common layer, reserve hdr space */
-       ret = brcmf_attach(bus->sdiodev->dev, bus->sdiodev->settings);
-       if (ret != 0) {
-               brcmf_err("brcmf_attach failed\n");
-               goto fail;
-       }
-
        /* Query the F2 block size, set roundup accordingly */
        bus->blocksize = bus->sdiodev->func2->cur_blksize;
        bus->roundup = min(max_roundup, bus->blocksize);
 
-       /* Allocate buffers */
-       if (bus->sdiodev->bus_if->maxctl) {
-               bus->sdiodev->bus_if->maxctl += bus->roundup;
-               bus->rxblen =
-                   roundup((bus->sdiodev->bus_if->maxctl + SDPCM_HDRLEN),
-                           ALIGNMENT) + bus->head_align;
-               bus->rxbuf = kmalloc(bus->rxblen, GFP_ATOMIC);
-               if (!(bus->rxbuf)) {
-                       brcmf_err("rxbuf allocation failed\n");
-                       goto fail;
-               }
-       }
-
        sdio_claim_host(bus->sdiodev->func1);
 
        /* Disable F2 to clear any intermediate frame state on the dongle */
@@ -4241,7 +4233,6 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
        /* SR state */
        bus->sr_enabled = false;
 
-       brcmf_sdio_debugfs_create(bus);
        brcmf_dbg(INFO, "completed!!\n");
 
        ret = brcmf_fw_map_chip_to_name(bus->ci->chip, bus->ci->chiprev,
index b27170c12482de0ca4115b4bc9484c214d1c4b86..41642dda40fd0e2b172bdb2b00b821ca06d42265 100644 (file)
@@ -1146,39 +1146,15 @@ static int brcmf_usb_get_fwname(struct device *dev, u32 chip, u32 chiprev,
 }
 
 static const struct brcmf_bus_ops brcmf_usb_bus_ops = {
-       .txdata = brcmf_usb_tx,
+       .preinit = brcmf_usb_up,
        .stop = brcmf_usb_down,
+       .txdata = brcmf_usb_tx,
        .txctl = brcmf_usb_tx_ctlpkt,
        .rxctl = brcmf_usb_rx_ctlpkt,
        .wowl_config = brcmf_usb_wowl_config,
        .get_fwname = brcmf_usb_get_fwname,
 };
 
-static int brcmf_usb_bus_setup(struct brcmf_usbdev_info *devinfo)
-{
-       int ret;
-
-       /* Attach to the common driver interface */
-       ret = brcmf_attach(devinfo->dev, devinfo->settings);
-       if (ret) {
-               brcmf_err("brcmf_attach failed\n");
-               return ret;
-       }
-
-       ret = brcmf_usb_up(devinfo->dev);
-       if (ret)
-               goto fail;
-
-       ret = brcmf_bus_started(devinfo->dev);
-       if (ret)
-               goto fail;
-
-       return 0;
-fail:
-       brcmf_detach(devinfo->dev);
-       return ret;
-}
-
 static void brcmf_usb_probe_phase2(struct device *dev, int ret,
                                   const struct firmware *fw,
                                   void *nvram, u32 nvlen)
@@ -1206,7 +1182,8 @@ static void brcmf_usb_probe_phase2(struct device *dev, int ret,
        if (ret)
                goto error;
 
-       ret = brcmf_usb_bus_setup(devinfo);
+       /* Attach to the common driver interface */
+       ret = brcmf_attach(devinfo->dev, devinfo->settings);
        if (ret)
                goto error;
 
@@ -1256,7 +1233,7 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo)
        }
 
        if (!brcmf_usb_dlneeded(devinfo)) {
-               ret = brcmf_usb_bus_setup(devinfo);
+               ret = brcmf_attach(devinfo->dev, devinfo->settings);
                if (ret)
                        goto fail;
                /* we are done */
@@ -1459,7 +1436,7 @@ static int brcmf_usb_resume(struct usb_interface *intf)
 
        brcmf_dbg(USB, "Enter\n");
        if (!devinfo->wowl_enabled)
-               return brcmf_usb_bus_setup(devinfo);
+               return brcmf_attach(devinfo->dev, devinfo->settings);
 
        devinfo->bus_pub.state = BRCMFMAC_USB_STATE_UP;
        brcmf_usb_rx_fill_all(devinfo);
index 3a03287fa9122860db4ba371324699bfea344326..db783e94f929eb4c22573ed598b8a32f6ea25094 100644 (file)
@@ -652,7 +652,6 @@ static void brcms_reg_apply_radar_flags(struct wiphy *wiphy)
                 */
                if (!(ch->flags & IEEE80211_CHAN_DISABLED))
                        ch->flags |= IEEE80211_CHAN_RADAR |
-                                    IEEE80211_CHAN_NO_IR |
                                     IEEE80211_CHAN_NO_IR;
        }
 }
index b22567dff893d75a48543081ecb6a3d0ebaaaf01..26eb8b0c21049cd4eed4185c5435081cf6f8af94 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_CISCO
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_CISCO
index 5b14f2f64a8afde3077bbbb340382f15349bd430..6fdc14b08b8e895538953e45b3401163cf4be522 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_INTEL
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_INTEL
index c5f2ddf9b0fe5fafb8633ce7a7905bd5f8fb61d8..e5a2fc738ac3616fe64f04c1472bc1d044f0c0d7 100644 (file)
@@ -91,7 +91,6 @@ config IWLWIFI_BCAST_FILTERING
 config IWLWIFI_PCIE_RTPM
        bool "Enable runtime power management mode for PCIe devices"
        depends on IWLMVM && PM && EXPERT
-       default false
        help
          Say Y here to enable runtime power management for PCIe
          devices.  If enabled, the device will go into low power mode
index 3721a3ed358b830fb94925c01273aba1a4e633cf..f824bebceb06081e915a07d746420b602f024fd5 100644 (file)
@@ -211,7 +211,7 @@ enum {
  * @TE_V2_NOTIF_HOST_FRAG_END:request/receive notification on frag end
  * @TE_V2_NOTIF_INTERNAL_FRAG_START: internal FW use.
  * @TE_V2_NOTIF_INTERNAL_FRAG_END: internal FW use.
- * @T2_V2_START_IMMEDIATELY: start time event immediately
+ * @TE_V2_START_IMMEDIATELY: start time event immediately
  * @TE_V2_DEP_OTHER: depends on another time event
  * @TE_V2_DEP_TSF: depends on a specific time
  * @TE_V2_EVENT_SOCIOPATHIC: can't co-exist with other events of tha same MAC
@@ -230,7 +230,7 @@ enum iwl_time_event_policy {
        TE_V2_NOTIF_HOST_FRAG_END = BIT(5),
        TE_V2_NOTIF_INTERNAL_FRAG_START = BIT(6),
        TE_V2_NOTIF_INTERNAL_FRAG_END = BIT(7),
-       T2_V2_START_IMMEDIATELY = BIT(11),
+       TE_V2_START_IMMEDIATELY = BIT(11),
 
        /* placement characteristics */
        TE_V2_DEP_OTHER = BIT(TE_V2_PLACEMENT_POS),
index 67aefc8fc9acc51a61070cc828a3c27019746409..7bd704a3e6409b3cab2f8032eddc63afb1e41a5c 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -33,6 +34,7 @@
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -942,7 +944,6 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
 
 out:
        iwl_fw_free_dump_desc(fwrt);
-       fwrt->dump.trig = NULL;
        clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status);
        IWL_DEBUG_INFO(fwrt, "WRT dump done\n");
 }
@@ -1112,6 +1113,14 @@ void iwl_fw_error_dump_wk(struct work_struct *work)
            fwrt->ops->dump_start(fwrt->ops_ctx))
                return;
 
+       if (fwrt->ops && fwrt->ops->fw_running &&
+           !fwrt->ops->fw_running(fwrt->ops_ctx)) {
+               IWL_ERR(fwrt, "Firmware not running - cannot dump error\n");
+               iwl_fw_free_dump_desc(fwrt);
+               clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status);
+               goto out;
+       }
+
        if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
                /* stop recording */
                iwl_fw_dbg_stop_recording(fwrt);
@@ -1145,7 +1154,7 @@ void iwl_fw_error_dump_wk(struct work_struct *work)
                        iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, out_ctrl);
                }
        }
-
+out:
        if (fwrt->ops && fwrt->ops->dump_end)
                fwrt->ops->dump_end(fwrt->ops_ctx);
 }
index 223fb77a3aa9d64456244dd4c5156b8885cec6fd..72259bff9922f7308a3d78250247ccfd29d84a8c 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -33,6 +34,7 @@
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -91,6 +93,7 @@ static inline void iwl_fw_free_dump_desc(struct iwl_fw_runtime *fwrt)
        if (fwrt->dump.desc != &iwl_dump_desc_assert)
                kfree(fwrt->dump.desc);
        fwrt->dump.desc = NULL;
+       fwrt->dump.trig = NULL;
 }
 
 void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt);
index e57ff92a68ae2062721e3ad9bbec32997a263e79..3da468d2cc92f50697649adc7cc12d63c473ccb3 100644 (file)
@@ -75,6 +75,20 @@ static inline void iwl_fw_cancel_timestamp(struct iwl_fw_runtime *fwrt)
        cancel_delayed_work_sync(&fwrt->timestamp.wk);
 }
 
+static inline void iwl_fw_suspend_timestamp(struct iwl_fw_runtime *fwrt)
+{
+       cancel_delayed_work_sync(&fwrt->timestamp.wk);
+}
+
+static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt)
+{
+       if (!fwrt->timestamp.delay)
+               return;
+
+       schedule_delayed_work(&fwrt->timestamp.wk,
+                             round_jiffies_relative(fwrt->timestamp.delay));
+}
+
 #else
 static inline int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt,
                                          struct dentry *dbgfs_dir)
@@ -84,4 +98,8 @@ static inline int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt,
 
 static inline void iwl_fw_cancel_timestamp(struct iwl_fw_runtime *fwrt) {}
 
+static inline void iwl_fw_suspend_timestamp(struct iwl_fw_runtime *fwrt) {}
+
+static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt) {}
+
 #endif /* CONFIG_IWLWIFI_DEBUGFS */
index c39fe84bb4c4a9741497dbea4a143ccd3aeacca7..2efac307909e1c6cfae6b7d80ac28eaa555c1100 100644 (file)
@@ -77,8 +77,14 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
 }
 IWL_EXPORT_SYMBOL(iwl_fw_runtime_init);
 
-void iwl_fw_runtime_exit(struct iwl_fw_runtime *fwrt)
+void iwl_fw_runtime_suspend(struct iwl_fw_runtime *fwrt)
 {
-       iwl_fw_cancel_timestamp(fwrt);
+       iwl_fw_suspend_timestamp(fwrt);
 }
-IWL_EXPORT_SYMBOL(iwl_fw_runtime_exit);
+IWL_EXPORT_SYMBOL(iwl_fw_runtime_suspend);
+
+void iwl_fw_runtime_resume(struct iwl_fw_runtime *fwrt)
+{
+       iwl_fw_resume_timestamp(fwrt);
+}
+IWL_EXPORT_SYMBOL(iwl_fw_runtime_resume);
index e25c049f980f27e33bb90e2234b5015f5f52b298..3fb940ebd74aae826076657daab9b0d2d39e3044 100644 (file)
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -26,6 +27,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -68,6 +70,7 @@
 struct iwl_fw_runtime_ops {
        int (*dump_start)(void *ctx);
        void (*dump_end)(void *ctx);
+       bool (*fw_running)(void *ctx);
 };
 
 #define MAX_NUM_LMAC 2
@@ -150,6 +153,10 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
 
 void iwl_fw_runtime_exit(struct iwl_fw_runtime *fwrt);
 
+void iwl_fw_runtime_suspend(struct iwl_fw_runtime *fwrt);
+
+void iwl_fw_runtime_resume(struct iwl_fw_runtime *fwrt);
+
 static inline void iwl_fw_set_current_image(struct iwl_fw_runtime *fwrt,
                                            enum iwl_ucode_type cur_fw_img)
 {
index 0e6cf39285f405d8c42e65ea2d0ef7d4e86179f0..2efe9b099556d862c946f2aeaff9746cfc3dded5 100644 (file)
@@ -1098,6 +1098,8 @@ int iwl_mvm_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
        /* make sure the d0i3 exit work is not pending */
        flush_work(&mvm->d0i3_exit_work);
 
+       iwl_fw_runtime_suspend(&mvm->fwrt);
+
        ret = iwl_trans_suspend(trans);
        if (ret)
                return ret;
@@ -2012,6 +2014,8 @@ int iwl_mvm_resume(struct ieee80211_hw *hw)
 
        mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
 
+       iwl_fw_runtime_resume(&mvm->fwrt);
+
        return ret;
 }
 
@@ -2038,6 +2042,8 @@ static int iwl_mvm_d3_test_open(struct inode *inode, struct file *file)
 
        mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_D3;
 
+       iwl_fw_runtime_suspend(&mvm->fwrt);
+
        /* start pseudo D3 */
        rtnl_lock();
        err = __iwl_mvm_suspend(mvm->hw, mvm->hw->wiphy->wowlan_config, true);
@@ -2098,6 +2104,8 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
        __iwl_mvm_resume(mvm, true);
        rtnl_unlock();
 
+       iwl_fw_runtime_resume(&mvm->fwrt);
+
        mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
 
        iwl_abort_notification_waits(&mvm->notif_wait);
index a7892c1254a293f8184b470d5b97d7c425051d7c..9c436d8d001d39f47880e16e1d14fb64d574587d 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -35,6 +36,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1281,9 +1283,6 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm,
 {
        int ret;
 
-       if (!iwl_mvm_firmware_running(mvm))
-               return -EIO;
-
        ret = iwl_mvm_ref_sync(mvm, IWL_MVM_REF_PRPH_WRITE);
        if (ret)
                return ret;
index 2f22e14e00fe881bc9868a22c25ba41286a9ea51..8ba16fc24e3af0bd6bc07b6de7195e375d3f0cb1 100644 (file)
@@ -438,7 +438,8 @@ int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
        }
 
        /* Allocate the CAB queue for softAP and GO interfaces */
-       if (vif->type == NL80211_IFTYPE_AP) {
+       if (vif->type == NL80211_IFTYPE_AP ||
+           vif->type == NL80211_IFTYPE_ADHOC) {
                /*
                 * For TVQM this will be overwritten later with the FW assigned
                 * queue value (when queue is enabled).
index 8aed40a8bc385fa6553e815027972f73feccdbe1..ebf511150f4d02561362d8f3973ec2751cb8d3ef 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -2106,15 +2107,40 @@ static int iwl_mvm_start_ap_ibss(struct ieee80211_hw *hw,
        if (ret)
                goto out_remove;
 
-       ret = iwl_mvm_add_mcast_sta(mvm, vif);
-       if (ret)
-               goto out_unbind;
-
-       /* Send the bcast station. At this stage the TBTT and DTIM time events
-        * are added and applied to the scheduler */
-       ret = iwl_mvm_send_add_bcast_sta(mvm, vif);
-       if (ret)
-               goto out_rm_mcast;
+       /*
+        * This is not very nice, but the simplest:
+        * For older FWs adding the mcast sta before the bcast station may
+        * cause assert 0x2b00.
+        * This is fixed in later FW so make the order of removal depend on
+        * the TLV
+        */
+       if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE)) {
+               ret = iwl_mvm_add_mcast_sta(mvm, vif);
+               if (ret)
+                       goto out_unbind;
+               /*
+                * Send the bcast station. At this stage the TBTT and DTIM time
+                * events are added and applied to the scheduler
+                */
+               ret = iwl_mvm_send_add_bcast_sta(mvm, vif);
+               if (ret) {
+                       iwl_mvm_rm_mcast_sta(mvm, vif);
+                       goto out_unbind;
+               }
+       } else {
+               /*
+                * Send the bcast station. At this stage the TBTT and DTIM time
+                * events are added and applied to the scheduler
+                */
+               iwl_mvm_send_add_bcast_sta(mvm, vif);
+               if (ret)
+                       goto out_unbind;
+               iwl_mvm_add_mcast_sta(mvm, vif);
+               if (ret) {
+                       iwl_mvm_send_rm_bcast_sta(mvm, vif);
+                       goto out_unbind;
+               }
+       }
 
        /* must be set before quota calculations */
        mvmvif->ap_ibss_active = true;
@@ -2144,7 +2170,6 @@ static int iwl_mvm_start_ap_ibss(struct ieee80211_hw *hw,
        iwl_mvm_power_update_mac(mvm);
        mvmvif->ap_ibss_active = false;
        iwl_mvm_send_rm_bcast_sta(mvm, vif);
-out_rm_mcast:
        iwl_mvm_rm_mcast_sta(mvm, vif);
 out_unbind:
        iwl_mvm_binding_remove_vif(mvm, vif);
@@ -2682,6 +2707,10 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 
                /* enable beacon filtering */
                WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0));
+
+               iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band,
+                                    false);
+
                ret = 0;
        } else if (old_state == IEEE80211_STA_AUTHORIZED &&
                   new_state == IEEE80211_STA_ASSOC) {
index 2d28e08042186e7ad0a060a267569e972b3c7feb..89ff02d7c87663ce7a1351940308292234f85a48 100644 (file)
@@ -90,6 +90,7 @@
 #include "fw/runtime.h"
 #include "fw/dbg.h"
 #include "fw/acpi.h"
+#include "fw/debugfs.h"
 
 #define IWL_MVM_MAX_ADDRESSES          5
 /* RSSI offset for WkP */
@@ -1783,6 +1784,7 @@ static inline u32 iwl_mvm_flushable_queues(struct iwl_mvm *mvm)
 
 static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm)
 {
+       iwl_fw_cancel_timestamp(&mvm->fwrt);
        iwl_free_fw_paging(&mvm->fwrt);
        clear_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status);
        iwl_fw_dump_conf_clear(&mvm->fwrt);
index 5d525a0023dc3ff4ea1ee074093f547aa6232016..ab7fb5aad984a509a788f1355056da1f2aa45765 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -35,6 +36,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -552,9 +554,15 @@ static void iwl_mvm_fwrt_dump_end(void *ctx)
        iwl_mvm_unref(mvm, IWL_MVM_REF_FW_DBG_COLLECT);
 }
 
+static bool iwl_mvm_fwrt_fw_running(void *ctx)
+{
+       return iwl_mvm_firmware_running(ctx);
+}
+
 static const struct iwl_fw_runtime_ops iwl_mvm_fwrt_ops = {
        .dump_start = iwl_mvm_fwrt_dump_start,
        .dump_end = iwl_mvm_fwrt_dump_end,
+       .fw_running = iwl_mvm_fwrt_fw_running,
 };
 
 static struct iwl_op_mode *
@@ -802,7 +810,6 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
        iwl_mvm_leds_exit(mvm);
        iwl_mvm_thermal_exit(mvm);
  out_free:
-       iwl_fw_runtime_exit(&mvm->fwrt);
        iwl_fw_flush_dump(&mvm->fwrt);
 
        if (iwlmvm_mod_params.init_dbg)
@@ -843,7 +850,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode)
 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_IWLWIFI_DEBUGFS)
        kfree(mvm->d3_resume_sram);
 #endif
-       iwl_fw_runtime_exit(&mvm->fwrt);
        iwl_trans_op_mode_leave(mvm->trans);
 
        iwl_phy_db_free(mvm->phy_db);
index 60abb0084ee5905dd38f89a694542ddf1094a742..47f4c7a1d80d26ca22ba118914be3c44ce373c13 100644 (file)
@@ -2684,7 +2684,8 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
                                struct ieee80211_sta *sta,
                                struct iwl_lq_sta *lq_sta,
                                enum nl80211_band band,
-                               struct rs_rate *rate)
+                               struct rs_rate *rate,
+                               bool init)
 {
        int i, nentries;
        unsigned long active_rate;
@@ -2738,14 +2739,25 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
         */
        if (sta->vht_cap.vht_supported &&
            best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) {
-               switch (sta->bandwidth) {
-               case IEEE80211_STA_RX_BW_160:
-               case IEEE80211_STA_RX_BW_80:
-               case IEEE80211_STA_RX_BW_40:
+               /*
+                * In AP mode, when a new station associates, rs is initialized
+                * immediately upon association completion, before the phy
+                * context is updated with the association parameters, so the
+                * sta bandwidth might be wider than the phy context allows.
+                * To avoid this issue, always initialize rs with 20mhz
+                * bandwidth rate, and after authorization, when the phy context
+                * is already up-to-date, re-init rs with the correct bw.
+                */
+               u32 bw = init ? RATE_MCS_CHAN_WIDTH_20 : rs_bw_from_sta_bw(sta);
+
+               switch (bw) {
+               case RATE_MCS_CHAN_WIDTH_40:
+               case RATE_MCS_CHAN_WIDTH_80:
+               case RATE_MCS_CHAN_WIDTH_160:
                        initial_rates = rs_optimal_rates_vht;
                        nentries = ARRAY_SIZE(rs_optimal_rates_vht);
                        break;
-               case IEEE80211_STA_RX_BW_20:
+               case RATE_MCS_CHAN_WIDTH_20:
                        initial_rates = rs_optimal_rates_vht_20mhz;
                        nentries = ARRAY_SIZE(rs_optimal_rates_vht_20mhz);
                        break;
@@ -2756,7 +2768,7 @@ static void rs_get_initial_rate(struct iwl_mvm *mvm,
 
                active_rate = lq_sta->active_siso_rate;
                rate->type = LQ_VHT_SISO;
-               rate->bw = rs_bw_from_sta_bw(sta);
+               rate->bw = bw;
        } else if (sta->ht_cap.ht_supported &&
                   best_rssi > IWL_RS_LOW_RSSI_THRESHOLD) {
                initial_rates = rs_optimal_rates_ht;
@@ -2839,7 +2851,7 @@ static void rs_initialize_lq(struct iwl_mvm *mvm,
        tbl = &(lq_sta->lq_info[active_tbl]);
        rate = &tbl->rate;
 
-       rs_get_initial_rate(mvm, sta, lq_sta, band, rate);
+       rs_get_initial_rate(mvm, sta, lq_sta, band, rate, init);
        rs_init_optimal_rate(mvm, sta, lq_sta);
 
        WARN_ONCE(rate->ant != ANT_A && rate->ant != ANT_B,
index a3f7c1bf3cc858b9166d33707fac43a5b1cf1037..580de5851fc7f6a129980337a0c6e844d63374c1 100644 (file)
@@ -71,6 +71,7 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
        struct ieee80211_rx_status *stats = IEEE80211_SKB_RXCB(skb);
        struct iwl_mvm_key_pn *ptk_pn;
+       int res;
        u8 tid, keyidx;
        u8 pn[IEEE80211_CCMP_PN_LEN];
        u8 *extiv;
@@ -127,12 +128,13 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
        pn[4] = extiv[1];
        pn[5] = extiv[0];
 
-       if (memcmp(pn, ptk_pn->q[queue].pn[tid],
-                  IEEE80211_CCMP_PN_LEN) <= 0)
+       res = memcmp(pn, ptk_pn->q[queue].pn[tid], IEEE80211_CCMP_PN_LEN);
+       if (res < 0)
+               return -1;
+       if (!res && !(stats->flag & RX_FLAG_ALLOW_SAME_PN))
                return -1;
 
-       if (!(stats->flag & RX_FLAG_AMSDU_MORE))
-               memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN);
+       memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN);
        stats->flag |= RX_FLAG_PN_VALIDATED;
 
        return 0;
@@ -314,28 +316,21 @@ static void iwl_mvm_rx_csum(struct ieee80211_sta *sta,
 }
 
 /*
- * returns true if a packet outside BA session is a duplicate and
- * should be dropped
+ * returns true if a packet is a duplicate and should be dropped.
+ * Updates AMSDU PN tracking info
  */
-static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue,
-                                 struct ieee80211_rx_status *rx_status,
-                                 struct ieee80211_hdr *hdr,
-                                 struct iwl_rx_mpdu_desc *desc)
+static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue,
+                          struct ieee80211_rx_status *rx_status,
+                          struct ieee80211_hdr *hdr,
+                          struct iwl_rx_mpdu_desc *desc)
 {
        struct iwl_mvm_sta *mvm_sta;
        struct iwl_mvm_rxq_dup_data *dup_data;
-       u8 baid, tid, sub_frame_idx;
+       u8 tid, sub_frame_idx;
 
        if (WARN_ON(IS_ERR_OR_NULL(sta)))
                return false;
 
-       baid = (le32_to_cpu(desc->reorder_data) &
-               IWL_RX_MPDU_REORDER_BAID_MASK) >>
-               IWL_RX_MPDU_REORDER_BAID_SHIFT;
-
-       if (baid != IWL_RX_REORDER_DATA_INVALID_BAID)
-               return false;
-
        mvm_sta = iwl_mvm_sta_from_mac80211(sta);
        dup_data = &mvm_sta->dup_data[queue];
 
@@ -365,6 +360,12 @@ static bool iwl_mvm_is_nonagg_dup(struct ieee80211_sta *sta, int queue,
                     dup_data->last_sub_frame[tid] >= sub_frame_idx))
                return true;
 
+       /* Allow same PN as the first subframe for following sub frames */
+       if (dup_data->last_seq[tid] == hdr->seq_ctrl &&
+           sub_frame_idx > dup_data->last_sub_frame[tid] &&
+           desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU)
+               rx_status->flag |= RX_FLAG_ALLOW_SAME_PN;
+
        dup_data->last_seq[tid] = hdr->seq_ctrl;
        dup_data->last_sub_frame[tid] = sub_frame_idx;
 
@@ -971,7 +972,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
                if (ieee80211_is_data(hdr->frame_control))
                        iwl_mvm_rx_csum(sta, skb, desc);
 
-               if (iwl_mvm_is_nonagg_dup(sta, queue, rx_status, hdr, desc)) {
+               if (iwl_mvm_is_dup(sta, queue, rx_status, hdr, desc)) {
                        kfree_skb(skb);
                        goto out;
                }
index 6b2674e0260682e6bc52df8dabedec9270c99f4d..630e23cb0ffb55f9cbbc4ef4496beba4d4bc62c2 100644 (file)
@@ -2039,7 +2039,7 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
        struct iwl_trans_txq_scd_cfg cfg = {
                .fifo = IWL_MVM_TX_FIFO_MCAST,
                .sta_id = msta->sta_id,
-               .tid = IWL_MAX_TID_COUNT,
+               .tid = 0,
                .aggregate = false,
                .frame_limit = IWL_FRAME_LIMIT,
        };
@@ -2052,6 +2052,17 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
                    vif->type != NL80211_IFTYPE_ADHOC))
                return -ENOTSUPP;
 
+       /*
+        * In IBSS, ieee80211_check_queues() sets the cab_queue to be
+        * invalid, so make sure we use the queue we want.
+        * Note that this is done here as we want to avoid making DQA
+        * changes in mac80211 layer.
+        */
+       if (vif->type == NL80211_IFTYPE_ADHOC) {
+               vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE;
+               mvmvif->cab_queue = vif->cab_queue;
+       }
+
        /*
         * While in previous FWs we had to exclude cab queue from TFD queue
         * mask, now it is needed as any other queue.
@@ -2079,24 +2090,13 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
        if (iwl_mvm_has_new_tx_api(mvm)) {
                int queue = iwl_mvm_tvqm_enable_txq(mvm, vif->cab_queue,
                                                    msta->sta_id,
-                                                   IWL_MAX_TID_COUNT,
+                                                   0,
                                                    timeout);
                mvmvif->cab_queue = queue;
        } else if (!fw_has_api(&mvm->fw->ucode_capa,
-                              IWL_UCODE_TLV_API_STA_TYPE)) {
-               /*
-                * In IBSS, ieee80211_check_queues() sets the cab_queue to be
-                * invalid, so make sure we use the queue we want.
-                * Note that this is done here as we want to avoid making DQA
-                * changes in mac80211 layer.
-                */
-               if (vif->type == NL80211_IFTYPE_ADHOC) {
-                       vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE;
-                       mvmvif->cab_queue = vif->cab_queue;
-               }
+                              IWL_UCODE_TLV_API_STA_TYPE))
                iwl_mvm_enable_txq(mvm, vif->cab_queue, vif->cab_queue, 0,
                                   &cfg, timeout);
-       }
 
        return 0;
 }
@@ -2115,7 +2115,7 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
        iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true, 0);
 
        iwl_mvm_disable_txq(mvm, mvmvif->cab_queue, vif->cab_queue,
-                           IWL_MAX_TID_COUNT, 0);
+                           0, 0);
 
        ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id);
        if (ret)
@@ -3170,8 +3170,9 @@ static int __iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, u8 sta_id,
        int ret, size;
        u32 status;
 
+       /* This is a valid situation for GTK removal */
        if (sta_id == IWL_MVM_INVALID_STA)
-               return -EINVAL;
+               return 0;
 
        key_flags = cpu_to_le16((keyconf->keyidx << STA_KEY_FLG_KEYID_POS) &
                                 STA_KEY_FLG_KEYID_MSK);
index 200ab50ec86b2f22a0b2eeacf102a60c1c293f1c..acb217e666dbc6c38a88e88bb84f8992f01c8082 100644 (file)
@@ -616,7 +616,7 @@ void iwl_mvm_protect_session(struct iwl_mvm *mvm,
        time_cmd.repeat = 1;
        time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START |
                                      TE_V2_NOTIF_HOST_EVENT_END |
-                                     T2_V2_START_IMMEDIATELY);
+                                     TE_V2_START_IMMEDIATELY);
 
        if (!wait_for_notif) {
                iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd);
@@ -803,7 +803,7 @@ int iwl_mvm_start_p2p_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
        time_cmd.repeat = 1;
        time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START |
                                      TE_V2_NOTIF_HOST_EVENT_END |
-                                     T2_V2_START_IMMEDIATELY);
+                                     TE_V2_START_IMMEDIATELY);
 
        return iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd);
 }
@@ -913,6 +913,8 @@ int iwl_mvm_schedule_csa_period(struct iwl_mvm *mvm,
        time_cmd.interval = cpu_to_le32(1);
        time_cmd.policy = cpu_to_le16(TE_V2_NOTIF_HOST_EVENT_START |
                                      TE_V2_ABSENCE);
+       if (!apply_time)
+               time_cmd.policy |= cpu_to_le16(TE_V2_START_IMMEDIATELY);
 
        return iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd);
 }
index dda77b327c9861d09c06aa56f5ab24b762764717..af6dfceab6b855baad6cfb2878dc431ce924f79c 100644 (file)
@@ -419,11 +419,11 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
 {
        struct ieee80211_key_conf *keyconf = info->control.hw_key;
        u8 *crypto_hdr = skb_frag->data + hdrlen;
+       enum iwl_tx_cmd_sec_ctrl type = TX_CMD_SEC_CCM;
        u64 pn;
 
        switch (keyconf->cipher) {
        case WLAN_CIPHER_SUITE_CCMP:
-       case WLAN_CIPHER_SUITE_CCMP_256:
                iwl_mvm_set_tx_cmd_ccmp(info, tx_cmd);
                iwl_mvm_set_tx_cmd_pn(info, crypto_hdr);
                break;
@@ -447,13 +447,16 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
                break;
        case WLAN_CIPHER_SUITE_GCMP:
        case WLAN_CIPHER_SUITE_GCMP_256:
+               type = TX_CMD_SEC_GCMP;
+               /* Fall through */
+       case WLAN_CIPHER_SUITE_CCMP_256:
                /* TODO: Taking the key from the table might introduce a race
                 * when PTK rekeying is done, having an old packets with a PN
                 * based on the old key but the message encrypted with a new
                 * one.
                 * Need to handle this.
                 */
-               tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TX_CMD_SEC_KEY_FROM_TABLE;
+               tx_cmd->sec_ctl |= type | TX_CMD_SEC_KEY_FROM_TABLE;
                tx_cmd->key[0] = keyconf->hw_key_idx;
                iwl_mvm_set_tx_cmd_pn(info, crypto_hdr);
                break;
@@ -645,7 +648,11 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
                if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE ||
                    info.control.vif->type == NL80211_IFTYPE_AP ||
                    info.control.vif->type == NL80211_IFTYPE_ADHOC) {
-                       sta_id = mvmvif->bcast_sta.sta_id;
+                       if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE)
+                               sta_id = mvmvif->bcast_sta.sta_id;
+                       else
+                               sta_id = mvmvif->mcast_sta.sta_id;
+
                        queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info,
                                                           hdr->frame_control);
                        if (queue < 0)
index 6d0a907d5ba58f8666d06e65744d1c179edf3013..fabae0f6068390c1a054deb5cc5bbf9da7f63354 100644 (file)
@@ -147,7 +147,7 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
        /* Sanity check on number of chunks */
        num_tbs = iwl_pcie_gen2_get_num_tbs(trans, tfd);
 
-       if (num_tbs >= trans_pcie->max_tbs) {
+       if (num_tbs > trans_pcie->max_tbs) {
                IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
                return;
        }
index 3f85713c41dcc9291130f25873ef97bd702335f1..1a566287993d5d9054f3aa22b4a329d405feb854 100644 (file)
@@ -378,7 +378,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
        /* Sanity check on number of chunks */
        num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
 
-       if (num_tbs >= trans_pcie->max_tbs) {
+       if (num_tbs > trans_pcie->max_tbs) {
                IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
                /* @todo issue fatal error, it is quite serious situation */
                return;
index 9da136049955afe3b3f4bef6b6fad42b59863cc5..e89fce1d4f272b1a638f670aa88bbd647f6ca928 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_INTERSIL
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_INTERSIL
index 3c64afa161bf1727d8f015b8afa658c41b3e8ff3..100cf42db65d55f2c5ced4bd0461cb6c56bea66c 100644 (file)
@@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 
 static unsigned int hwsim_net_id;
 
-static int hwsim_netgroup;
+static struct ida hwsim_netgroup_ida = IDA_INIT;
 
 struct hwsim_net {
        int netgroup;
@@ -267,11 +267,13 @@ static inline int hwsim_net_get_netgroup(struct net *net)
        return hwsim_net->netgroup;
 }
 
-static inline void hwsim_net_set_netgroup(struct net *net)
+static inline int hwsim_net_set_netgroup(struct net *net)
 {
        struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
 
-       hwsim_net->netgroup = hwsim_netgroup++;
+       hwsim_net->netgroup = ida_simple_get(&hwsim_netgroup_ida,
+                                            0, 0, GFP_KERNEL);
+       return hwsim_net->netgroup >= 0 ? 0 : -ENOMEM;
 }
 
 static inline u32 hwsim_net_get_wmediumd(struct net *net)
@@ -2740,6 +2742,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
        mutex_init(&data->mutex);
 
        data->netgroup = hwsim_net_get_netgroup(net);
+       data->wmediumd = hwsim_net_get_wmediumd(net);
 
        /* Enable frame retransmissions for lossy channels */
        hw->max_rates = 4;
@@ -3507,9 +3510,7 @@ static int __init hwsim_init_netlink(void)
 
 static __net_init int hwsim_init_net(struct net *net)
 {
-       hwsim_net_set_netgroup(net);
-
-       return 0;
+       return hwsim_net_set_netgroup(net);
 }
 
 static void __net_exit hwsim_exit_net(struct net *net)
@@ -3532,6 +3533,8 @@ static void __net_exit hwsim_exit_net(struct net *net)
                queue_work(hwsim_wq, &data->destroy_work);
        }
        spin_unlock_bh(&hwsim_radio_lock);
+
+       ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net));
 }
 
 static struct pernet_operations hwsim_net_ops = {
@@ -3539,6 +3542,7 @@ static struct pernet_operations hwsim_net_ops = {
        .exit = hwsim_exit_net,
        .id   = &hwsim_net_id,
        .size = sizeof(struct hwsim_net),
+       .async = true,
 };
 
 static void hwsim_exit_netlink(void)
index 4938c7ec0009c37413cf457f8336f9a1d5eb7406..27038901d3ee0d5fb639fbf6676d58c27d2e5358 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_MARVELL
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_MARVELL
index 8772e39493273e8ed7a9ad34dc9d5ede49c31929..feebfdcf025ad40dbda217548a7f893a4561a8a4 100644 (file)
@@ -341,6 +341,36 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv,
                       le16_to_cpu(ht_cap->header.len));
 
                mwifiex_fill_cap_info(priv, radio_type, &ht_cap->ht_cap);
+               /* Update HT40 capability from current channel information */
+               if (bss_desc->bcn_ht_oper) {
+                       u8 ht_param = bss_desc->bcn_ht_oper->ht_param;
+                       u8 radio =
+                       mwifiex_band_to_radio_type(bss_desc->bss_band);
+                       int freq =
+                       ieee80211_channel_to_frequency(bss_desc->channel,
+                                                      radio);
+                       struct ieee80211_channel *chan =
+                       ieee80211_get_channel(priv->adapter->wiphy, freq);
+
+                       switch (ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+                       case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+                               if (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) {
+                                       ht_cap->ht_cap.cap_info &=
+                                       ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+                                       ht_cap->ht_cap.cap_info &=
+                                       ~IEEE80211_HT_CAP_SGI_40;
+                               }
+                               break;
+                       case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+                               if (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) {
+                                       ht_cap->ht_cap.cap_info &=
+                                       ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+                                       ht_cap->ht_cap.cap_info &=
+                                       ~IEEE80211_HT_CAP_SGI_40;
+                               }
+                               break;
+                       }
+               }
 
                *buffer += sizeof(struct mwifiex_ie_types_htcap);
                ret_len += sizeof(struct mwifiex_ie_types_htcap);
index ce4432c535f0543757345d352f32a5b08972c4ba..7f7e9de2db1c52968e4aa5712e7cbf0e87e30a30 100644 (file)
@@ -95,18 +95,32 @@ u8 mwifiex_chan_type_to_sec_chan_offset(enum nl80211_channel_type chan_type)
 
 /* This function maps IEEE HT secondary channel type to NL80211 channel type
  */
-u8 mwifiex_sec_chan_offset_to_chan_type(u8 second_chan_offset)
+u8 mwifiex_get_chan_type(struct mwifiex_private *priv)
 {
-       switch (second_chan_offset) {
-       case IEEE80211_HT_PARAM_CHA_SEC_NONE:
-               return NL80211_CHAN_HT20;
-       case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-               return NL80211_CHAN_HT40PLUS;
-       case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-               return NL80211_CHAN_HT40MINUS;
-       default:
-               return NL80211_CHAN_HT20;
+       struct mwifiex_channel_band channel_band;
+       int ret;
+
+       ret = mwifiex_get_chan_info(priv, &channel_band);
+
+       if (!ret) {
+               switch (channel_band.band_config.chan_width) {
+               case CHAN_BW_20MHZ:
+                       if (IS_11N_ENABLED(priv))
+                               return NL80211_CHAN_HT20;
+                       else
+                               return NL80211_CHAN_NO_HT;
+               case CHAN_BW_40MHZ:
+                       if (channel_band.band_config.chan2_offset ==
+                           SEC_CHAN_ABOVE)
+                               return NL80211_CHAN_HT40PLUS;
+                       else
+                               return NL80211_CHAN_HT40MINUS;
+               default:
+                       return NL80211_CHAN_HT20;
+               }
        }
+
+       return NL80211_CHAN_HT20;
 }
 
 /*
@@ -3937,7 +3951,6 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy,
        struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev);
        struct mwifiex_bssdescriptor *curr_bss;
        struct ieee80211_channel *chan;
-       u8 second_chan_offset;
        enum nl80211_channel_type chan_type;
        enum nl80211_band band;
        int freq;
@@ -3954,10 +3967,7 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy,
                chan = ieee80211_get_channel(wiphy, freq);
 
                if (priv->ht_param_present) {
-                       second_chan_offset = priv->assoc_resp_ht_param &
-                                       IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
-                       chan_type = mwifiex_sec_chan_offset_to_chan_type
-                                                       (second_chan_offset);
+                       chan_type = mwifiex_get_chan_type(priv);
                        cfg80211_chandef_create(chandef, chan, chan_type);
                } else {
                        cfg80211_chandef_create(chandef, chan,
index 874660052055cdc6ce3b1704115f90001e7d41c7..7014f440e6f8e86c9d4e26b46edfaff1eee86f82 100644 (file)
@@ -1529,7 +1529,8 @@ int mwifiex_ret_get_hw_spec(struct mwifiex_private *priv,
 
        adapter->fw_release_number = le32_to_cpu(hw_spec->fw_release_number);
        adapter->fw_api_ver = (adapter->fw_release_number >> 16) & 0xff;
-       adapter->number_of_antenna = le16_to_cpu(hw_spec->number_of_antenna);
+       adapter->number_of_antenna =
+                       le16_to_cpu(hw_spec->number_of_antenna) & 0xf;
 
        if (le32_to_cpu(hw_spec->dot_11ac_dev_cap)) {
                adapter->is_hw_11ac_capable = true;
index 188e4c3708363ea29915c478796c046d1ac9d15a..46696ea0b23e8260f3029c0cb1b9f0de3d5928de 100644 (file)
@@ -294,4 +294,21 @@ enum rdwr_status {
        RDWR_STATUS_DONE = 2
 };
 
+enum mwifiex_chan_width {
+       CHAN_BW_20MHZ = 0,
+       CHAN_BW_10MHZ,
+       CHAN_BW_40MHZ,
+       CHAN_BW_80MHZ,
+       CHAN_BW_8080MHZ,
+       CHAN_BW_160MHZ,
+       CHAN_BW_5MHZ,
+};
+
+enum mwifiex_chan_offset {
+       SEC_CHAN_NONE = 0,
+       SEC_CHAN_ABOVE = 1,
+       SEC_CHAN_5MHZ = 2,
+       SEC_CHAN_BELOW = 3
+};
+
 #endif /* !_MWIFIEX_DECL_H_ */
index 9c2cdef540742670fe4da7adc05d93c1dd15e435..c5dc518f768b5373bb4e111c389fdf1c7e6335df 100644 (file)
@@ -411,6 +411,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER {
 #define HostCmd_CMD_TDLS_OPER                         0x0122
 #define HostCmd_CMD_FW_DUMP_EVENT                    0x0125
 #define HostCmd_CMD_SDIO_SP_RX_AGGR_CFG               0x0223
+#define HostCmd_CMD_STA_CONFIGURE                    0x023f
 #define HostCmd_CMD_CHAN_REGION_CFG                  0x0242
 #define HostCmd_CMD_PACKET_AGGR_CTRL                 0x0251
 
@@ -2285,6 +2286,11 @@ struct host_cmd_ds_pkt_aggr_ctrl {
        __le16 tx_aggr_align;
 } __packed;
 
+struct host_cmd_ds_sta_configure {
+       __le16 action;
+       u8 tlv_buffer[0];
+} __packed;
+
 struct host_cmd_ds_command {
        __le16 command;
        __le16 size;
@@ -2361,6 +2367,7 @@ struct host_cmd_ds_command {
                struct host_cmd_ds_gtk_rekey_params rekey;
                struct host_cmd_ds_chan_region_cfg reg_cfg;
                struct host_cmd_ds_pkt_aggr_ctrl pkt_aggr_ctrl;
+               struct host_cmd_ds_sta_configure sta_cfg;
        } params;
 } __packed;
 
index 12e73995033229d8dee1ad002a4ea4c4dfd462ef..b6484582845a61c9dbb1b57381287cb67fd1a7fa 100644 (file)
@@ -943,13 +943,26 @@ int mwifiex_set_mac_address(struct mwifiex_private *priv,
                            struct net_device *dev)
 {
        int ret;
-       u64 mac_addr;
+       u64 mac_addr, old_mac_addr;
 
-       if (priv->bss_type != MWIFIEX_BSS_TYPE_P2P)
-               goto done;
+       if (priv->bss_type == MWIFIEX_BSS_TYPE_ANY)
+               return -ENOTSUPP;
 
        mac_addr = ether_addr_to_u64(priv->curr_addr);
-       mac_addr |= BIT_ULL(MWIFIEX_MAC_LOCAL_ADMIN_BIT);
+       old_mac_addr = mac_addr;
+
+       if (priv->bss_type == MWIFIEX_BSS_TYPE_P2P)
+               mac_addr |= BIT_ULL(MWIFIEX_MAC_LOCAL_ADMIN_BIT);
+
+       if (mwifiex_get_intf_num(priv->adapter, priv->bss_type) > 1) {
+               /* Set mac address based on bss_type/bss_num */
+               mac_addr ^= BIT_ULL(priv->bss_type + 8);
+               mac_addr += priv->bss_num;
+       }
+
+       if (mac_addr == old_mac_addr)
+               goto done;
+
        u64_to_ether_addr(mac_addr, priv->curr_addr);
 
        /* Send request to firmware */
@@ -957,13 +970,14 @@ int mwifiex_set_mac_address(struct mwifiex_private *priv,
                               HostCmd_ACT_GEN_SET, 0, NULL, true);
 
        if (ret) {
+               u64_to_ether_addr(old_mac_addr, priv->curr_addr);
                mwifiex_dbg(priv->adapter, ERROR,
                            "set mac address failed: ret=%d\n", ret);
                return ret;
        }
 
 done:
-       memcpy(dev->dev_addr, priv->curr_addr, ETH_ALEN);
+       ether_addr_copy(dev->dev_addr, priv->curr_addr);
        return 0;
 }
 
index 6b5539b1f4d81616e3ee951842cc4e089a9b73df..9bde181700dc25de9a6303f749b91383454fa41d 100644 (file)
@@ -517,6 +517,18 @@ enum mwifiex_iface_work_flags {
        MWIFIEX_IFACE_WORK_CARD_RESET,
 };
 
+struct mwifiex_band_config {
+       u8 chan_band:2;
+       u8 chan_width:2;
+       u8 chan2_offset:2;
+       u8 scan_mode:2;
+} __packed;
+
+struct mwifiex_channel_band {
+       struct mwifiex_band_config band_config;
+       u8 channel;
+};
+
 struct mwifiex_private {
        struct mwifiex_adapter *adapter;
        u8 bss_type;
@@ -1280,6 +1292,19 @@ mwifiex_copy_rates(u8 *dest, u32 pos, u8 *src, int len)
        return pos;
 }
 
+/* This function return interface number with the same bss_type.
+ */
+static inline u8
+mwifiex_get_intf_num(struct mwifiex_adapter *adapter, u8 bss_type)
+{
+       u8 i, num = 0;
+
+       for (i = 0; i < adapter->priv_num; i++)
+               if (adapter->priv[i] && adapter->priv[i]->bss_type == bss_type)
+                       num++;
+       return num;
+}
+
 /*
  * This function returns the correct private structure pointer based
  * upon the BSS type and BSS number.
@@ -1544,7 +1569,7 @@ int mwifiex_check_network_compatibility(struct mwifiex_private *priv,
                                        struct mwifiex_bssdescriptor *bss_desc);
 
 u8 mwifiex_chan_type_to_sec_chan_offset(enum nl80211_channel_type chan_type);
-u8 mwifiex_sec_chan_offset_to_chan_type(u8 second_chan_offset);
+u8 mwifiex_get_chan_type(struct mwifiex_private *priv);
 
 struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
                                              const char *name,
@@ -1670,6 +1695,8 @@ void mwifiex_queue_main_work(struct mwifiex_adapter *adapter);
 int mwifiex_get_wakeup_reason(struct mwifiex_private *priv, u16 action,
                              int cmd_type,
                              struct mwifiex_ds_wakeup_reason *wakeup_reason);
+int mwifiex_get_chan_info(struct mwifiex_private *priv,
+                         struct mwifiex_channel_band *channel_band);
 int mwifiex_ret_wakeup_reason(struct mwifiex_private *priv,
                              struct host_cmd_ds_command *resp,
                              struct host_cmd_ds_wakeup_reason *wakeup_reason);
index 211e47d8b3181ddab78d90f293953eaf714b3d2b..4ed10cf82f9a4c7459e6429087eca637470afd40 100644 (file)
@@ -1898,6 +1898,25 @@ static int mwifiex_cmd_get_wakeup_reason(struct mwifiex_private *priv,
        return 0;
 }
 
+static int mwifiex_cmd_get_chan_info(struct host_cmd_ds_command *cmd,
+                                    u16 cmd_action)
+{
+       struct host_cmd_ds_sta_configure *sta_cfg_cmd = &cmd->params.sta_cfg;
+       struct host_cmd_tlv_channel_band *tlv_band_channel =
+       (struct host_cmd_tlv_channel_band *)sta_cfg_cmd->tlv_buffer;
+
+       cmd->command = cpu_to_le16(HostCmd_CMD_STA_CONFIGURE);
+       cmd->size = cpu_to_le16(sizeof(*sta_cfg_cmd) +
+                               sizeof(*tlv_band_channel) + S_DS_GEN);
+       sta_cfg_cmd->action = cpu_to_le16(cmd_action);
+       memset(tlv_band_channel, 0, sizeof(*tlv_band_channel));
+       tlv_band_channel->header.type = cpu_to_le16(TLV_TYPE_CHANNELBANDLIST);
+       tlv_band_channel->header.len  = cpu_to_le16(sizeof(*tlv_band_channel) -
+                                       sizeof(struct mwifiex_ie_types_header));
+
+       return 0;
+}
+
 /* This function check if the command is supported by firmware */
 static int mwifiex_is_cmd_supported(struct mwifiex_private *priv, u16 cmd_no)
 {
@@ -2210,6 +2229,9 @@ int mwifiex_sta_prepare_cmd(struct mwifiex_private *priv, uint16_t cmd_no,
                cmd_ptr->command = cpu_to_le16(cmd_no);
                cmd_ptr->size = cpu_to_le16(S_DS_GEN);
                break;
+       case HostCmd_CMD_STA_CONFIGURE:
+               ret = mwifiex_cmd_get_chan_info(cmd_ptr, cmd_action);
+               break;
        default:
                mwifiex_dbg(priv->adapter, ERROR,
                            "PREP_CMD: unknown cmd- %#x\n", cmd_no);
index 1bd4e13b8449a51ce0a6e789b5b02a8761334322..69e3b624adbb9432fe49de863de2d0e95418c739 100644 (file)
@@ -1170,6 +1170,22 @@ static int mwifiex_ret_pkt_aggr_ctrl(struct mwifiex_private *priv,
        return 0;
 }
 
+static int mwifiex_ret_get_chan_info(struct mwifiex_private *priv,
+                                    struct host_cmd_ds_command *resp,
+                                    struct mwifiex_channel_band *channel_band)
+{
+       struct host_cmd_ds_sta_configure *sta_cfg_cmd = &resp->params.sta_cfg;
+       struct host_cmd_tlv_channel_band *tlv_band_channel;
+
+       tlv_band_channel =
+       (struct host_cmd_tlv_channel_band *)sta_cfg_cmd->tlv_buffer;
+       memcpy(&channel_band->band_config, &tlv_band_channel->band_config,
+              sizeof(struct mwifiex_band_config));
+       channel_band->channel = tlv_band_channel->channel;
+
+       return 0;
+}
+
 /*
  * This function handles the command responses.
  *
@@ -1393,6 +1409,9 @@ int mwifiex_process_sta_cmdresp(struct mwifiex_private *priv, u16 cmdresp_no,
        case HostCmd_CMD_CHAN_REGION_CFG:
                ret = mwifiex_ret_chan_region_cfg(priv, resp);
                break;
+       case HostCmd_CMD_STA_CONFIGURE:
+               ret = mwifiex_ret_get_chan_info(priv, resp, data_buf);
+               break;
        default:
                mwifiex_dbg(adapter, ERROR,
                            "CMD_RESP: unknown cmd response %#x\n",
index a6077ab3efc3244d80c99d74e5e4c68fa74ab67a..5414b755cf8225829e86b0bb26390e7635fadf73 100644 (file)
@@ -146,7 +146,6 @@ int mwifiex_fill_new_bss_desc(struct mwifiex_private *priv,
        size_t beacon_ie_len;
        struct mwifiex_bss_priv *bss_priv = (void *)bss->priv;
        const struct cfg80211_bss_ies *ies;
-       int ret;
 
        rcu_read_lock();
        ies = rcu_dereference(bss->ies);
@@ -190,48 +189,7 @@ int mwifiex_fill_new_bss_desc(struct mwifiex_private *priv,
        if (bss_desc->cap_info_bitmap & WLAN_CAPABILITY_SPECTRUM_MGMT)
                bss_desc->sensed_11h = true;
 
-       ret = mwifiex_update_bss_desc_with_ie(priv->adapter, bss_desc);
-       if (ret)
-               return ret;
-
-       /* Update HT40 capability based on current channel information */
-       if (bss_desc->bcn_ht_oper && bss_desc->bcn_ht_cap) {
-               u8 ht_param = bss_desc->bcn_ht_oper->ht_param;
-               u8 radio = mwifiex_band_to_radio_type(bss_desc->bss_band);
-               struct ieee80211_supported_band *sband =
-                                               priv->wdev.wiphy->bands[radio];
-               int freq = ieee80211_channel_to_frequency(bss_desc->channel,
-                                                         radio);
-               struct ieee80211_channel *chan =
-                       ieee80211_get_channel(priv->adapter->wiphy, freq);
-
-               switch (ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
-               case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-                       if (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) {
-                               sband->ht_cap.cap &=
-                                       ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-                               sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
-                       } else {
-                               sband->ht_cap.cap |=
-                                       IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
-                                       IEEE80211_HT_CAP_SGI_40;
-                       }
-                       break;
-               case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-                       if (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) {
-                               sband->ht_cap.cap &=
-                                       ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-                               sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
-                       } else {
-                               sband->ht_cap.cap |=
-                                       IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
-                                       IEEE80211_HT_CAP_SGI_40;
-                       }
-                       break;
-               }
-       }
-
-       return 0;
+       return mwifiex_update_bss_desc_with_ie(priv->adapter, bss_desc);
 }
 
 void mwifiex_dnld_txpwr_table(struct mwifiex_private *priv)
@@ -1523,3 +1481,15 @@ int mwifiex_get_wakeup_reason(struct mwifiex_private *priv, u16 action,
 
        return status;
 }
+
+int mwifiex_get_chan_info(struct mwifiex_private *priv,
+                         struct mwifiex_channel_band *channel_band)
+{
+       int status = 0;
+
+       status = mwifiex_send_cmd(priv, HostCmd_CMD_STA_CONFIGURE,
+                                 HostCmd_ACT_GEN_GET, 0, channel_band,
+                                 MWIFIEX_SYNC_CMD);
+
+       return status;
+}
index 92ce4062f307768e740b5124741bde16aa9f6d29..ff5fc8987b0a03fc0509257214636334a1eca3b4 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_MEDIATEK
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_MEDIATEK
index 85f8d324ebf82c3d3f2fc15676d123da396274f7..4f30cdcd2b5379ba0e0b9610ad8175983e567c58 100644 (file)
@@ -119,6 +119,52 @@ static int mt76_led_init(struct mt76_dev *dev)
        return devm_led_classdev_register(dev->dev, &dev->led_cdev);
 }
 
+static void mt76_init_stream_cap(struct mt76_dev *dev,
+                                struct ieee80211_supported_band *sband,
+                                bool vht)
+{
+       struct ieee80211_sta_ht_cap *ht_cap = &sband->ht_cap;
+       int i, nstream = __sw_hweight8(dev->antenna_mask);
+       struct ieee80211_sta_vht_cap *vht_cap;
+       u16 mcs_map = 0;
+
+       if (nstream > 1)
+               ht_cap->cap |= IEEE80211_HT_CAP_TX_STBC;
+       else
+               ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC;
+
+       for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
+               ht_cap->mcs.rx_mask[i] = i < nstream ? 0xff : 0;
+
+       if (!vht)
+               return;
+
+       vht_cap = &sband->vht_cap;
+       if (nstream > 1)
+               vht_cap->cap |= IEEE80211_VHT_CAP_TXSTBC;
+       else
+               vht_cap->cap &= ~IEEE80211_VHT_CAP_TXSTBC;
+
+       for (i = 0; i < 8; i++) {
+               if (i < nstream)
+                       mcs_map |= (IEEE80211_VHT_MCS_SUPPORT_0_9 << (i * 2));
+               else
+                       mcs_map |=
+                               (IEEE80211_VHT_MCS_NOT_SUPPORTED << (i * 2));
+       }
+       vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map);
+       vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
+}
+
+void mt76_set_stream_caps(struct mt76_dev *dev, bool vht)
+{
+       if (dev->cap.has_2ghz)
+               mt76_init_stream_cap(dev, &dev->sband_2g.sband, false);
+       if (dev->cap.has_5ghz)
+               mt76_init_stream_cap(dev, &dev->sband_5g.sband, vht);
+}
+EXPORT_SYMBOL_GPL(mt76_set_stream_caps);
+
 static int
 mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
                const struct ieee80211_channel *chan, int n_chan,
@@ -128,7 +174,6 @@ mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
        struct ieee80211_sta_ht_cap *ht_cap;
        struct ieee80211_sta_vht_cap *vht_cap;
        void *chanlist;
-       u16 mcs_map;
        int size;
 
        size = n_chan * sizeof(*chan);
@@ -153,34 +198,20 @@ mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
                       IEEE80211_HT_CAP_GRN_FLD |
                       IEEE80211_HT_CAP_SGI_20 |
                       IEEE80211_HT_CAP_SGI_40 |
-                      IEEE80211_HT_CAP_TX_STBC |
                       (1 << IEEE80211_HT_CAP_RX_STBC_SHIFT);
 
-       ht_cap->mcs.rx_mask[0] = 0xff;
-       ht_cap->mcs.rx_mask[1] = 0xff;
        ht_cap->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
        ht_cap->ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K;
        ht_cap->ampdu_density = IEEE80211_HT_MPDU_DENSITY_4;
 
+       mt76_init_stream_cap(dev, sband, vht);
+
        if (!vht)
                return 0;
 
        vht_cap = &sband->vht_cap;
        vht_cap->vht_supported = true;
-
-       mcs_map = (IEEE80211_VHT_MCS_SUPPORT_0_9 << (0 * 2)) |
-                 (IEEE80211_VHT_MCS_SUPPORT_0_9 << (1 * 2)) |
-                 (IEEE80211_VHT_MCS_NOT_SUPPORTED << (2 * 2)) |
-                 (IEEE80211_VHT_MCS_NOT_SUPPORTED << (3 * 2)) |
-                 (IEEE80211_VHT_MCS_NOT_SUPPORTED << (4 * 2)) |
-                 (IEEE80211_VHT_MCS_NOT_SUPPORTED << (5 * 2)) |
-                 (IEEE80211_VHT_MCS_NOT_SUPPORTED << (6 * 2)) |
-                 (IEEE80211_VHT_MCS_NOT_SUPPORTED << (7 * 2));
-
-       vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map);
-       vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
        vht_cap->cap |= IEEE80211_VHT_CAP_RXLDPC |
-                       IEEE80211_VHT_CAP_TXSTBC |
                        IEEE80211_VHT_CAP_RXSTBC_1 |
                        IEEE80211_VHT_CAP_SHORT_GI_80;
 
@@ -262,6 +293,9 @@ int mt76_register_device(struct mt76_dev *dev, bool vht,
 
        wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR;
 
+       wiphy->available_antennas_tx = dev->antenna_mask;
+       wiphy->available_antennas_rx = dev->antenna_mask;
+
        hw->txq_data_size = sizeof(struct mt76_txq);
        hw->max_tx_fragments = 16;
 
index d2ce15093eddd14cfdfb8bf6a63309433709d495..065ff78059c38948a389d8c9434354b53dcb7e5c 100644 (file)
@@ -253,6 +253,8 @@ struct mt76_dev {
        u32 rev;
        unsigned long state;
 
+       u8 antenna_mask;
+
        struct mt76_sband sband_2g;
        struct mt76_sband sband_5g;
        struct debugfs_blob_wrapper eeprom;
@@ -423,6 +425,7 @@ void mt76_release_buffered_frames(struct ieee80211_hw *hw,
 void mt76_set_channel(struct mt76_dev *dev);
 int mt76_get_survey(struct ieee80211_hw *hw, int idx,
                    struct survey_info *survey);
+void mt76_set_stream_caps(struct mt76_dev *dev, bool vht);
 
 int mt76_rx_aggr_start(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tid,
                       u16 ssn, u8 size);
index e62131b88102099c97ca581ad85655a4f36889a7..783b8122ec3c9b0121029bf5f294ae45f794ac70 100644 (file)
@@ -180,6 +180,7 @@ int mt76x2_eeprom_init(struct mt76x2_dev *dev);
 int mt76x2_apply_calibration_data(struct mt76x2_dev *dev, int channel);
 void mt76x2_set_tx_ackto(struct mt76x2_dev *dev);
 
+void mt76x2_phy_set_antenna(struct mt76x2_dev *dev);
 int mt76x2_phy_start(struct mt76x2_dev *dev);
 int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
                         struct cfg80211_chan_def *chandef);
index 9c9bf3e785ba9633f80a56af8608a92b99adb81e..5bb50027c1e8368b222501026eba965c13371b5f 100644 (file)
@@ -222,11 +222,10 @@ static int
 mt76x2_eeprom_load(struct mt76x2_dev *dev)
 {
        void *efuse;
-       int len = MT7662_EEPROM_SIZE;
        bool found;
        int ret;
 
-       ret = mt76_eeprom_init(&dev->mt76, len);
+       ret = mt76_eeprom_init(&dev->mt76, MT7662_EEPROM_SIZE);
        if (ret < 0)
                return ret;
 
@@ -234,14 +233,15 @@ mt76x2_eeprom_load(struct mt76x2_dev *dev)
        if (found)
                found = !mt76x2_check_eeprom(dev);
 
-       dev->mt76.otp.data = devm_kzalloc(dev->mt76.dev, len, GFP_KERNEL);
-       dev->mt76.otp.size = len;
+       dev->mt76.otp.data = devm_kzalloc(dev->mt76.dev, MT7662_EEPROM_SIZE,
+                                         GFP_KERNEL);
+       dev->mt76.otp.size = MT7662_EEPROM_SIZE;
        if (!dev->mt76.otp.data)
                return -ENOMEM;
 
        efuse = dev->mt76.otp.data;
 
-       if (mt76x2_get_efuse_data(dev, efuse, len))
+       if (mt76x2_get_efuse_data(dev, efuse, MT7662_EEPROM_SIZE))
                goto out;
 
        if (found) {
@@ -249,7 +249,7 @@ mt76x2_eeprom_load(struct mt76x2_dev *dev)
        } else {
                /* FIXME: check if efuse data is complete */
                found = true;
-               memcpy(dev->mt76.eeprom.data, efuse, len);
+               memcpy(dev->mt76.eeprom.data, efuse, MT7662_EEPROM_SIZE);
        }
 
 out:
index 9dbf94947324e3ed2532aef574350783f65f6d76..934c331d995e9c3611ee9f470177de1c4b231461 100644 (file)
@@ -857,6 +857,9 @@ int mt76x2_register_device(struct mt76x2_dev *dev)
        dev->mt76.led_cdev.brightness_set = mt76x2_led_set_brightness;
        dev->mt76.led_cdev.blink_set = mt76x2_led_set_blink;
 
+       /* init antenna configuration */
+       dev->mt76.antenna_mask = 3;
+
        ret = mt76_register_device(&dev->mt76, true, mt76x2_rates,
                                   ARRAY_SIZE(mt76x2_rates));
        if (ret)
index 7ea3d841918e92393ebfd7810b4ec61e365edb84..d183156525837bb7448090c451197e67a11fc158 100644 (file)
@@ -198,8 +198,8 @@ void mt76x2_mac_write_txwi(struct mt76x2_dev *dev, struct mt76x2_txwi *txwi,
                ccmp_pn[5] = pn >> 24;
                ccmp_pn[6] = pn >> 32;
                ccmp_pn[7] = pn >> 40;
-               txwi->iv = *((u32 *) &ccmp_pn[0]);
-               txwi->eiv = *((u32 *) &ccmp_pn[1]);
+               txwi->iv = *((__le32 *)&ccmp_pn[0]);
+               txwi->eiv = *((__le32 *)&ccmp_pn[1]);
        }
 
        spin_lock_bh(&dev->mt76.lock);
index 205043b470b208e6a4a04f1bb6c618c62929ff90..25f4cebef26da18c0d4611b275edfe75c7ebc1af 100644 (file)
@@ -549,6 +549,40 @@ mt76x2_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta, bool set)
        return 0;
 }
 
+static int mt76x2_set_antenna(struct ieee80211_hw *hw, u32 tx_ant,
+                             u32 rx_ant)
+{
+       struct mt76x2_dev *dev = hw->priv;
+
+       if (!tx_ant || tx_ant > 3 || tx_ant != rx_ant)
+               return -EINVAL;
+
+       mutex_lock(&dev->mutex);
+
+       dev->chainmask = (tx_ant == 3) ? 0x202 : 0x101;
+       dev->mt76.antenna_mask = tx_ant;
+
+       mt76_set_stream_caps(&dev->mt76, true);
+       mt76x2_phy_set_antenna(dev);
+
+       mutex_unlock(&dev->mutex);
+
+       return 0;
+}
+
+static int mt76x2_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant,
+                             u32 *rx_ant)
+{
+       struct mt76x2_dev *dev = hw->priv;
+
+       mutex_lock(&dev->mutex);
+       *tx_ant = dev->mt76.antenna_mask;
+       *rx_ant = dev->mt76.antenna_mask;
+       mutex_unlock(&dev->mutex);
+
+       return 0;
+}
+
 const struct ieee80211_ops mt76x2_ops = {
        .tx = mt76x2_tx,
        .start = mt76x2_start,
@@ -573,5 +607,7 @@ const struct ieee80211_ops mt76x2_ops = {
        .set_coverage_class = mt76x2_set_coverage_class,
        .get_survey = mt76_get_survey,
        .set_tim = mt76x2_set_tim,
+       .set_antenna = mt76x2_set_antenna,
+       .get_antenna = mt76x2_get_antenna,
 };
 
index 5b742749d5dea8476738b5fd175841c926880a1b..fcc37eb7ce0b41e9bedbe1bd0e4592f006043ba2 100644 (file)
@@ -361,29 +361,52 @@ mt76x2_phy_set_band(struct mt76x2_dev *dev, int band, bool primary_upper)
                       primary_upper);
 }
 
-static void
-mt76x2_set_rx_chains(struct mt76x2_dev *dev)
+void mt76x2_phy_set_antenna(struct mt76x2_dev *dev)
 {
        u32 val;
 
        val = mt76_rr(dev, MT_BBP(AGC, 0));
-       val &= ~(BIT(3) | BIT(4));
+       val &= ~(BIT(4) | BIT(1));
+       switch (dev->mt76.antenna_mask) {
+       case 1:
+               /* disable mac DAC control */
+               mt76_clear(dev, MT_BBP(IBI, 9), BIT(11));
+               mt76_clear(dev, MT_BBP(TXBE, 5), 3);
+               mt76_rmw_field(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT, 0x3);
+               mt76_rmw_field(dev, MT_BBP(CORE, 32), GENMASK(21, 20), 2);
+               /* disable DAC 1 */
+               mt76_rmw_field(dev, MT_BBP(CORE, 33), GENMASK(12, 9), 4);
 
-       if (dev->chainmask & BIT(1))
-               val |= BIT(3);
+               val &= ~(BIT(3) | BIT(0));
+               break;
+       case 2:
+               /* disable mac DAC control */
+               mt76_clear(dev, MT_BBP(IBI, 9), BIT(11));
+               mt76_rmw_field(dev, MT_BBP(TXBE, 5), 3, 1);
+               mt76_rmw_field(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT, 0xc);
+               mt76_rmw_field(dev, MT_BBP(CORE, 32), GENMASK(21, 20), 1);
+               /* disable DAC 0 */
+               mt76_rmw_field(dev, MT_BBP(CORE, 33), GENMASK(12, 9), 1);
+
+               val &= ~BIT(3);
+               val |= BIT(0);
+               break;
+       case 3:
+       default:
+               /* enable mac DAC control */
+               mt76_set(dev, MT_BBP(IBI, 9), BIT(11));
+               mt76_set(dev, MT_BBP(TXBE, 5), 3);
+               mt76_rmw_field(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT, 0xf);
+               mt76_clear(dev, MT_BBP(CORE, 32), GENMASK(21, 20));
+               mt76_clear(dev, MT_BBP(CORE, 33), GENMASK(12, 9));
 
+               val &= ~BIT(0);
+               val |= BIT(3);
+               break;
+       }
        mt76_wr(dev, MT_BBP(AGC, 0), val);
 }
 
-static void
-mt76x2_set_tx_dac(struct mt76x2_dev *dev)
-{
-       if (dev->chainmask & BIT(1))
-               mt76_set(dev, MT_BBP(TXBE, 5), 3);
-       else
-               mt76_clear(dev, MT_BBP(TXBE, 5), 3);
-}
-
 static void
 mt76x2_get_agc_gain(struct mt76x2_dev *dev, u8 *dest)
 {
@@ -585,10 +608,8 @@ int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
        mt76x2_configure_tx_delay(dev, band, bw);
        mt76x2_phy_set_txpower(dev);
 
-       mt76x2_set_rx_chains(dev);
        mt76x2_phy_set_band(dev, chan->band, ch_group_index & 1);
        mt76x2_phy_set_bw(dev, chandef->width, ch_group_index);
-       mt76x2_set_tx_dac(dev);
 
        mt76_rmw(dev, MT_EXT_CCA_CFG,
                 (MT_EXT_CCA_CFG_CCA0 |
@@ -604,6 +625,8 @@ int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
 
        mt76x2_mcu_init_gain(dev, channel, dev->cal.rx.mcu_gain, true);
 
+       mt76x2_phy_set_antenna(dev);
+
        /* Enable LDPC Rx */
        if (mt76xx_rev(dev) >= MT76XX_REV_E3)
                mt76_set(dev, MT_BBP(RXO, 13), BIT(10));
index ce3ab85c8b0f5507ebe06f0b9e2eccd93b71a0a8..b9c334d9e5b81b1fe4ae0605664c1d040da3aed7 100644 (file)
 #define MT_TX_PWR_CFG_2                        0x131c
 #define MT_TX_PWR_CFG_3                        0x1320
 #define MT_TX_PWR_CFG_4                        0x1324
+#define MT_TX_PIN_CFG                  0x1328
+#define MT_TX_PIN_CFG_TXANT            GENMASK(3, 0)
 
 #define MT_TX_BAND_CFG                 0x132c
 #define MT_TX_BAND_CFG_UPPER_40M       BIT(0)
index da6faea092d6ba96f93d851e81001f7e59a52fec..76117b40288050eb2f4ab71625aae37bec9f9053 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/unaligned.h>
 #include "mt7601u.h"
 #include "eeprom.h"
+#include "mac.h"
 
 static bool
 field_valid(u8 val)
@@ -74,7 +75,7 @@ static int
 mt7601u_efuse_physical_size_check(struct mt7601u_dev *dev)
 {
        const int map_reads = DIV_ROUND_UP(MT_EFUSE_USAGE_MAP_SIZE, 16);
-       u8 data[map_reads * 16];
+       u8 data[round_up(MT_EFUSE_USAGE_MAP_SIZE, 16)];
        int ret, i;
        u32 start = 0, end = 0, cnt_free;
 
@@ -134,27 +135,6 @@ mt7601u_set_chip_cap(struct mt7601u_dev *dev, u8 *eeprom)
                        "Error: device has more than 1 RX/TX stream!\n");
 }
 
-static int
-mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *eeprom)
-{
-       const void *src = eeprom + MT_EE_MAC_ADDR;
-
-       ether_addr_copy(dev->macaddr, src);
-
-       if (!is_valid_ether_addr(dev->macaddr)) {
-               eth_random_addr(dev->macaddr);
-               dev_info(dev->dev,
-                        "Invalid MAC address, using random address %pM\n",
-                        dev->macaddr);
-       }
-
-       mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr));
-       mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) |
-               FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff));
-
-       return 0;
-}
-
 static void mt7601u_set_channel_target_power(struct mt7601u_dev *dev,
                                             u8 *eeprom, u8 max_pwr)
 {
@@ -400,7 +380,7 @@ mt7601u_eeprom_init(struct mt7601u_dev *dev)
        dev_info(dev->dev, "EEPROM ver:%02hhx fae:%02hhx\n",
                 eeprom[MT_EE_VERSION_EE], eeprom[MT_EE_VERSION_FAE]);
 
-       mt7601u_set_macaddr(dev, eeprom);
+       mt7601u_set_macaddr(dev, eeprom + MT_EE_MAC_ADDR);
        mt7601u_set_chip_cap(dev, eeprom);
        mt7601u_set_channel_power(dev, eeprom);
        mt7601u_set_country_reg(dev, eeprom);
index ec11ff66969d4414d19d3bc3ca735fdfa9c28c9b..2dc6b68e7fb9bfa4dfc2a71b4a4633452e69864f 100644 (file)
@@ -139,6 +139,7 @@ static const struct mt76_reg_pair mac_common_vals[] = {
        { MT_TXOP_HLDR_ET,              0x00000002 },
        { MT_XIFS_TIME_CFG,             0x33a41010 },
        { MT_PWR_PIN_CFG,               0x00000000 },
+       { MT_PN_PAD_MODE,               0x00000001 },
 };
 
 static const struct mt76_reg_pair mac_chip_vals[] = {
index d6dc59bb00df42e86dbd212793e118eb66d962fc..d55d7040a56d3bd01710f4ca89e9524baa479fb3 100644 (file)
 #include "trace.h"
 #include <linux/etherdevice.h>
 
+void mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *addr)
+{
+       ether_addr_copy(dev->macaddr, addr);
+
+       if (!is_valid_ether_addr(dev->macaddr)) {
+               eth_random_addr(dev->macaddr);
+               dev_info(dev->dev,
+                        "Invalid MAC address, using random address %pM\n",
+                        dev->macaddr);
+       }
+
+       mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr));
+       mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) |
+               FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff));
+}
+
 static void
 mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate)
 {
@@ -464,8 +480,16 @@ u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
 
        if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_DECRYPT)) {
                status->flag |= RX_FLAG_DECRYPTED;
-               status->flag |= RX_FLAG_IV_STRIPPED | RX_FLAG_MMIC_STRIPPED;
+               status->flag |= RX_FLAG_MMIC_STRIPPED;
+               status->flag |= RX_FLAG_MIC_STRIPPED;
+               status->flag |= RX_FLAG_ICV_STRIPPED;
+               status->flag |= RX_FLAG_IV_STRIPPED;
        }
+       /* let mac80211 take care of PN validation since apparently
+        * the hardware does not support it
+        */
+       if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_PN_LEN))
+               status->flag &= ~RX_FLAG_IV_STRIPPED;
 
        status->chains = BIT(0);
        rssi = mt7601u_phy_get_rssi(dev, rxwi, rate);
index 2c22d63c63a22454fa3bb40b13e37c7c8654f2aa..b7aa24656d0e8bf28eec3906e96912dee05f52fb 100644 (file)
@@ -174,5 +174,6 @@ u16 mt76_mac_tx_rate_val(struct mt7601u_dev *dev,
 struct mt76_tx_status
 mt7601u_mac_fetch_tx_status(struct mt7601u_dev *dev);
 void mt76_send_tx_status(struct mt7601u_dev *dev, struct mt76_tx_status *stat);
+void mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *addr);
 
 #endif
index 43ebd460ba861ab0054963f66641f4290abf2491..3c9ea40d9584e63e43fc7e95286021bdbcdb3447 100644 (file)
@@ -64,6 +64,9 @@ static int mt7601u_add_interface(struct ieee80211_hw *hw,
         */
        mvif->idx = idx;
 
+       if (!ether_addr_equal(dev->macaddr, vif->addr))
+               mt7601u_set_macaddr(dev, vif->addr);
+
        if (dev->wcid_mask[wcid / BITS_PER_LONG] & BIT(wcid % BITS_PER_LONG))
                return -ENOSPC;
        dev->wcid_mask[wcid / BITS_PER_LONG] |= BIT(wcid % BITS_PER_LONG);
index 65a8004418ea45b3f21cf2d746443c891470f881..d9d6fd7eff5ec3c58199af58197be83f7fc07163 100644 (file)
@@ -58,8 +58,7 @@ static inline void trace_mt_mcu_msg_send_cs(struct mt7601u_dev *dev,
        trace_mt_mcu_msg_send(dev, skb, csum, need_resp);
 }
 
-static struct sk_buff *
-mt7601u_mcu_msg_alloc(struct mt7601u_dev *dev, const void *data, int len)
+static struct sk_buff *mt7601u_mcu_msg_alloc(const void *data, int len)
 {
        struct sk_buff *skb;
 
@@ -171,7 +170,7 @@ static int mt7601u_mcu_function_select(struct mt7601u_dev *dev,
                .value = cpu_to_le32(val),
        };
 
-       skb = mt7601u_mcu_msg_alloc(dev, &msg, sizeof(msg));
+       skb = mt7601u_mcu_msg_alloc(&msg, sizeof(msg));
        if (!skb)
                return -ENOMEM;
        return mt7601u_mcu_msg_send(dev, skb, CMD_FUN_SET_OP, func == 5);
@@ -208,7 +207,7 @@ mt7601u_mcu_calibrate(struct mt7601u_dev *dev, enum mcu_calibrate cal, u32 val)
                .value = cpu_to_le32(val),
        };
 
-       skb = mt7601u_mcu_msg_alloc(dev, &msg, sizeof(msg));
+       skb = mt7601u_mcu_msg_alloc(&msg, sizeof(msg));
        if (!skb)
                return -ENOMEM;
        return mt7601u_mcu_msg_send(dev, skb, CMD_CALIBRATION_OP, true);
index c7ec40475a5f3a52e485f333ea86f7b0ad8ea9ed..9233744451a936162e1b069b9877d382eb930851 100644 (file)
@@ -147,7 +147,8 @@ enum {
  * @rx_lock:           protects @rx_q.
  * @con_mon_lock:      protects @ap_bssid, @bcn_*, @avg_rssi.
  * @mutex:             ensures exclusive access from mac80211 callbacks.
- * @vendor_req_mutex:  protects @vend_buf, ensures atomicity of split writes.
+ * @vendor_req_mutex:  protects @vend_buf, ensures atomicity of read/write
+ *                     accesses
  * @reg_atomic_mutex:  ensures atomicity of indirect register accesses
  *                     (accesses to RF and BBP).
  * @hw_atomic_mutex:   ensures exclusive access to HW during critical
index b9e4f679313852736ff2f5a2b103cf7ab494d711..d8b7863f79261a3275b6641ffdf7607e23fdd206 100644 (file)
@@ -129,15 +129,14 @@ void mt7601u_vendor_reset(struct mt7601u_dev *dev)
                               MT_VEND_DEV_MODE_RESET, 0, NULL, 0);
 }
 
-u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
+/* should be called with vendor_req_mutex held */
+static u32 __mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
 {
        int ret;
        u32 val = ~0;
 
        WARN_ONCE(offset > USHRT_MAX, "read high off:%08x", offset);
 
-       mutex_lock(&dev->vendor_req_mutex);
-
        ret = mt7601u_vendor_request(dev, MT_VEND_MULTI_READ, USB_DIR_IN,
                                     0, offset, dev->vend_buf, MT_VEND_BUF);
        if (ret == MT_VEND_BUF)
@@ -146,25 +145,41 @@ u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
                dev_err(dev->dev, "Error: wrong size read:%d off:%08x\n",
                        ret, offset);
 
-       mutex_unlock(&dev->vendor_req_mutex);
-
        trace_reg_read(dev, offset, val);
        return val;
 }
 
-int mt7601u_vendor_single_wr(struct mt7601u_dev *dev, const u8 req,
-                            const u16 offset, const u32 val)
+u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
 {
-       int ret;
+       u32 ret;
 
        mutex_lock(&dev->vendor_req_mutex);
+       ret = __mt7601u_rr(dev, offset);
+       mutex_unlock(&dev->vendor_req_mutex);
 
-       ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
-                                    val & 0xffff, offset, NULL, 0);
+       return ret;
+}
+
+/* should be called with vendor_req_mutex held */
+static int __mt7601u_vendor_single_wr(struct mt7601u_dev *dev, const u8 req,
+                                     const u16 offset, const u32 val)
+{
+       int ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
+                                        val & 0xffff, offset, NULL, 0);
        if (!ret)
                ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
                                             val >> 16, offset + 2, NULL, 0);
+       trace_reg_write(dev, offset, val);
+       return ret;
+}
+
+int mt7601u_vendor_single_wr(struct mt7601u_dev *dev, const u8 req,
+                            const u16 offset, const u32 val)
+{
+       int ret;
 
+       mutex_lock(&dev->vendor_req_mutex);
+       ret = __mt7601u_vendor_single_wr(dev, req, offset, val);
        mutex_unlock(&dev->vendor_req_mutex);
 
        return ret;
@@ -175,23 +190,30 @@ void mt7601u_wr(struct mt7601u_dev *dev, u32 offset, u32 val)
        WARN_ONCE(offset > USHRT_MAX, "write high off:%08x", offset);
 
        mt7601u_vendor_single_wr(dev, MT_VEND_WRITE, offset, val);
-       trace_reg_write(dev, offset, val);
 }
 
 u32 mt7601u_rmw(struct mt7601u_dev *dev, u32 offset, u32 mask, u32 val)
 {
-       val |= mt7601u_rr(dev, offset) & ~mask;
-       mt7601u_wr(dev, offset, val);
+       mutex_lock(&dev->vendor_req_mutex);
+       val |= __mt7601u_rr(dev, offset) & ~mask;
+       __mt7601u_vendor_single_wr(dev, MT_VEND_WRITE, offset, val);
+       mutex_unlock(&dev->vendor_req_mutex);
+
        return val;
 }
 
 u32 mt7601u_rmc(struct mt7601u_dev *dev, u32 offset, u32 mask, u32 val)
 {
-       u32 reg = mt7601u_rr(dev, offset);
+       u32 reg;
 
+       mutex_lock(&dev->vendor_req_mutex);
+       reg = __mt7601u_rr(dev, offset);
        val |= reg & ~mask;
        if (reg != val)
-               mt7601u_wr(dev, offset, val);
+               __mt7601u_vendor_single_wr(dev, MT_VEND_WRITE,
+                                          offset, val);
+       mutex_unlock(&dev->vendor_req_mutex);
+
        return val;
 }
 
index 30943656e9898cbdd41b4b111c8bb7ef526c6aa0..de84ce125c2673fc6416a898acf2799b8a56eced 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_QUANTENNA
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_QUANTENNA
index 56e5fed92a2a60190b3e7fa427de4d1a9bd3337b..0a1604683babefa1a7ac6e9c6f38a7806fe47442 100644 (file)
@@ -59,8 +59,9 @@ struct qtnf_bus {
        char fwname[32];
        struct napi_struct mux_napi;
        struct net_device mux_dev;
-       struct completion request_firmware_complete;
+       struct completion firmware_init_complete;
        struct workqueue_struct *workqueue;
+       struct work_struct fw_work;
        struct work_struct event_work;
        struct mutex bus_lock; /* lock during command/event processing */
        struct dentry *dbg_dir;
index 6f619096432046744ba86ee2a26aa88848ce9e41..f117904d9120564035dff4e27cc242f72fe95fc9 100644 (file)
@@ -127,7 +127,7 @@ static inline void qtnf_dis_txdone_irq(struct qtnf_pcie_bus_priv *priv)
        spin_unlock_irqrestore(&priv->irq_lock, flags);
 }
 
-static int qtnf_pcie_init_irq(struct qtnf_pcie_bus_priv *priv)
+static void qtnf_pcie_init_irq(struct qtnf_pcie_bus_priv *priv)
 {
        struct pci_dev *pdev = priv->pdev;
 
@@ -148,8 +148,6 @@ static int qtnf_pcie_init_irq(struct qtnf_pcie_bus_priv *priv)
                pr_warn("legacy PCIE interrupts enabled\n");
                pci_intx(pdev, 1);
        }
-
-       return 0;
 }
 
 static void qtnf_deassert_intx(struct qtnf_pcie_bus_priv *priv)
@@ -162,6 +160,17 @@ static void qtnf_deassert_intx(struct qtnf_pcie_bus_priv *priv)
        qtnf_non_posted_write(cfg, reg);
 }
 
+static void qtnf_reset_card(struct qtnf_pcie_bus_priv *priv)
+{
+       const u32 data = QTN_PEARL_IPC_IRQ_WORD(QTN_PEARL_LHOST_EP_RESET);
+       void __iomem *reg = priv->sysctl_bar +
+                           QTN_PEARL_SYSCTL_LHOST_IRQ_OFFSET;
+
+       qtnf_non_posted_write(data, reg);
+       msleep(QTN_EP_RESET_WAIT_MS);
+       pci_restore_state(priv->pdev);
+}
+
 static void qtnf_ipc_gen_ep_int(void *arg)
 {
        const struct qtnf_pcie_bus_priv *priv = arg;
@@ -478,10 +487,11 @@ static int alloc_rx_buffers(struct qtnf_pcie_bus_priv *priv)
 }
 
 /* all rx/tx activity should have ceased before calling this function */
-static void free_xfer_buffers(void *data)
+static void qtnf_free_xfer_buffers(struct qtnf_pcie_bus_priv *priv)
 {
-       struct qtnf_pcie_bus_priv *priv = (struct qtnf_pcie_bus_priv *)data;
+       struct qtnf_tx_bd *txbd;
        struct qtnf_rx_bd *rxbd;
+       struct sk_buff *skb;
        dma_addr_t paddr;
        int i;
 
@@ -489,19 +499,26 @@ static void free_xfer_buffers(void *data)
        for (i = 0; i < priv->rx_bd_num; i++) {
                if (priv->rx_skb && priv->rx_skb[i]) {
                        rxbd = &priv->rx_bd_vbase[i];
+                       skb = priv->rx_skb[i];
                        paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
                                              le32_to_cpu(rxbd->addr));
                        pci_unmap_single(priv->pdev, paddr, SKB_BUF_SIZE,
                                         PCI_DMA_FROMDEVICE);
-
-                       dev_kfree_skb_any(priv->rx_skb[i]);
+                       dev_kfree_skb_any(skb);
+                       priv->rx_skb[i] = NULL;
                }
        }
 
        /* free tx buffers */
        for (i = 0; i < priv->tx_bd_num; i++) {
                if (priv->tx_skb && priv->tx_skb[i]) {
-                       dev_kfree_skb_any(priv->tx_skb[i]);
+                       txbd = &priv->tx_bd_vbase[i];
+                       skb = priv->tx_skb[i];
+                       paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
+                                             le32_to_cpu(txbd->addr));
+                       pci_unmap_single(priv->pdev, paddr, skb->len,
+                                        PCI_DMA_TODEVICE);
+                       dev_kfree_skb_any(skb);
                        priv->tx_skb[i] = NULL;
                }
        }
@@ -937,6 +954,98 @@ static const struct qtnf_bus_ops qtnf_pcie_bus_ops = {
        .data_rx_stop           = qtnf_pcie_data_rx_stop,
 };
 
+static int qtnf_dbg_mps_show(struct seq_file *s, void *data)
+{
+       struct qtnf_bus *bus = dev_get_drvdata(s->private);
+       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+       seq_printf(s, "%d\n", priv->mps);
+
+       return 0;
+}
+
+static int qtnf_dbg_msi_show(struct seq_file *s, void *data)
+{
+       struct qtnf_bus *bus = dev_get_drvdata(s->private);
+       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+       seq_printf(s, "%u\n", priv->msi_enabled);
+
+       return 0;
+}
+
+static int qtnf_dbg_irq_stats(struct seq_file *s, void *data)
+{
+       struct qtnf_bus *bus = dev_get_drvdata(s->private);
+       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+       u32 reg = readl(PCIE_HDP_INT_EN(priv->pcie_reg_base));
+       u32 status;
+
+       seq_printf(s, "pcie_irq_count(%u)\n", priv->pcie_irq_count);
+       seq_printf(s, "pcie_irq_tx_count(%u)\n", priv->pcie_irq_tx_count);
+       status = reg &  PCIE_HDP_INT_TX_BITS;
+       seq_printf(s, "pcie_irq_tx_status(%s)\n",
+                  (status == PCIE_HDP_INT_TX_BITS) ? "EN" : "DIS");
+       seq_printf(s, "pcie_irq_rx_count(%u)\n", priv->pcie_irq_rx_count);
+       status = reg &  PCIE_HDP_INT_RX_BITS;
+       seq_printf(s, "pcie_irq_rx_status(%s)\n",
+                  (status == PCIE_HDP_INT_RX_BITS) ? "EN" : "DIS");
+       seq_printf(s, "pcie_irq_uf_count(%u)\n", priv->pcie_irq_uf_count);
+       status = reg &  PCIE_HDP_INT_HHBM_UF;
+       seq_printf(s, "pcie_irq_hhbm_uf_status(%s)\n",
+                  (status == PCIE_HDP_INT_HHBM_UF) ? "EN" : "DIS");
+
+       return 0;
+}
+
+static int qtnf_dbg_hdp_stats(struct seq_file *s, void *data)
+{
+       struct qtnf_bus *bus = dev_get_drvdata(s->private);
+       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+       seq_printf(s, "tx_full_count(%u)\n", priv->tx_full_count);
+       seq_printf(s, "tx_done_count(%u)\n", priv->tx_done_count);
+       seq_printf(s, "tx_reclaim_done(%u)\n", priv->tx_reclaim_done);
+       seq_printf(s, "tx_reclaim_req(%u)\n", priv->tx_reclaim_req);
+
+       seq_printf(s, "tx_bd_r_index(%u)\n", priv->tx_bd_r_index);
+       seq_printf(s, "tx_bd_p_index(%u)\n",
+                  readl(PCIE_HDP_RX0DMA_CNT(priv->pcie_reg_base))
+                       & (priv->tx_bd_num - 1));
+       seq_printf(s, "tx_bd_w_index(%u)\n", priv->tx_bd_w_index);
+       seq_printf(s, "tx queue len(%u)\n",
+                  CIRC_CNT(priv->tx_bd_w_index, priv->tx_bd_r_index,
+                           priv->tx_bd_num));
+
+       seq_printf(s, "rx_bd_r_index(%u)\n", priv->rx_bd_r_index);
+       seq_printf(s, "rx_bd_p_index(%u)\n",
+                  readl(PCIE_HDP_TX0DMA_CNT(priv->pcie_reg_base))
+                       & (priv->rx_bd_num - 1));
+       seq_printf(s, "rx_bd_w_index(%u)\n", priv->rx_bd_w_index);
+       seq_printf(s, "rx alloc queue len(%u)\n",
+                  CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
+                             priv->rx_bd_num));
+
+       return 0;
+}
+
+static int qtnf_dbg_shm_stats(struct seq_file *s, void *data)
+{
+       struct qtnf_bus *bus = dev_get_drvdata(s->private);
+       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+       seq_printf(s, "shm_ipc_ep_in.tx_packet_count(%zu)\n",
+                  priv->shm_ipc_ep_in.tx_packet_count);
+       seq_printf(s, "shm_ipc_ep_in.rx_packet_count(%zu)\n",
+                  priv->shm_ipc_ep_in.rx_packet_count);
+       seq_printf(s, "shm_ipc_ep_out.tx_packet_count(%zu)\n",
+                  priv->shm_ipc_ep_out.tx_timeout_count);
+       seq_printf(s, "shm_ipc_ep_out.rx_packet_count(%zu)\n",
+                  priv->shm_ipc_ep_out.rx_packet_count);
+
+       return 0;
+}
+
 static int qtnf_ep_fw_send(struct qtnf_pcie_bus_priv *priv, uint32_t size,
                           int blk, const u8 *pblk, const u8 *fw)
 {
@@ -1052,181 +1161,102 @@ qtnf_ep_fw_load(struct qtnf_pcie_bus_priv *priv, const u8 *fw, u32 fw_size)
        return 0;
 }
 
-static void qtnf_firmware_load(const struct firmware *fw, void *context)
-{
-       struct qtnf_pcie_bus_priv *priv = (void *)context;
-       struct pci_dev *pdev = priv->pdev;
-       struct qtnf_bus *bus = pci_get_drvdata(pdev);
-       int ret;
-
-       if (!fw) {
-               pr_err("failed to get firmware %s\n", bus->fwname);
-               goto fw_load_err;
-       }
-
-       ret = qtnf_ep_fw_load(priv, fw->data, fw->size);
-       if (ret) {
-               pr_err("FW upload error\n");
-               goto fw_load_err;
-       }
-
-       if (qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_DONE,
-                           QTN_FW_DL_TIMEOUT_MS)) {
-               pr_err("FW bringup timed out\n");
-               goto fw_load_err;
-       }
-
-       bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
-       pr_info("firmware is up and running\n");
-
-fw_load_err:
-
-       if (fw)
-               release_firmware(fw);
-
-       complete(&bus->request_firmware_complete);
-}
-
-static int qtnf_bringup_fw(struct qtnf_bus *bus)
+static void qtnf_fw_work_handler(struct work_struct *work)
 {
+       struct qtnf_bus *bus = container_of(work, struct qtnf_bus, fw_work);
        struct qtnf_pcie_bus_priv *priv = (void *)get_bus_priv(bus);
        struct pci_dev *pdev = priv->pdev;
+       const struct firmware *fw;
        int ret;
        u32 state = QTN_RC_FW_LOADRDY | QTN_RC_FW_QLINK;
 
-       if (flashboot)
+       if (flashboot) {
                state |= QTN_RC_FW_FLASHBOOT;
+       } else {
+               ret = request_firmware(&fw, bus->fwname, &pdev->dev);
+               if (ret < 0) {
+                       pr_err("failed to get firmware %s\n", bus->fwname);
+                       goto fw_load_fail;
+               }
+       }
 
        qtnf_set_state(&priv->bda->bda_rc_state, state);
 
        if (qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_LOADRDY,
                            QTN_FW_DL_TIMEOUT_MS)) {
                pr_err("card is not ready\n");
-               return -ETIMEDOUT;
+               goto fw_load_fail;
        }
 
        qtnf_clear_state(&priv->bda->bda_ep_state, QTN_EP_FW_LOADRDY);
 
        if (flashboot) {
-               pr_info("Booting FW from flash\n");
-
-               if (!qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_DONE,
-                                    QTN_FW_DL_TIMEOUT_MS))
-                       bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
+               pr_info("booting firmware from flash\n");
+       } else {
+               pr_info("starting firmware upload: %s\n", bus->fwname);
 
-               return 0;
+               ret = qtnf_ep_fw_load(priv, fw->data, fw->size);
+               release_firmware(fw);
+               if (ret) {
+                       pr_err("firmware upload error\n");
+                       goto fw_load_fail;
+               }
        }
 
-       pr_info("starting firmware upload: %s\n", bus->fwname);
-
-       ret = request_firmware_nowait(THIS_MODULE, 1, bus->fwname, &pdev->dev,
-                                     GFP_KERNEL, priv, qtnf_firmware_load);
-       if (ret < 0)
-               pr_err("request_firmware_nowait error %d\n", ret);
-       else
-               ret = 1;
-
-       return ret;
-}
-
-static void qtnf_reclaim_tasklet_fn(unsigned long data)
-{
-       struct qtnf_pcie_bus_priv *priv = (void *)data;
-
-       qtnf_pcie_data_tx_reclaim(priv);
-       qtnf_en_txdone_irq(priv);
-}
-
-static int qtnf_dbg_mps_show(struct seq_file *s, void *data)
-{
-       struct qtnf_bus *bus = dev_get_drvdata(s->private);
-       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+       if (qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_DONE,
+                           QTN_FW_DL_TIMEOUT_MS)) {
+               pr_err("firmware bringup timed out\n");
+               goto fw_load_fail;
+       }
 
-       seq_printf(s, "%d\n", priv->mps);
+       bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
+       pr_info("firmware is up and running\n");
 
-       return 0;
-}
+       if (qtnf_poll_state(&priv->bda->bda_ep_state,
+                           QTN_EP_FW_QLINK_DONE, QTN_FW_QLINK_TIMEOUT_MS)) {
+               pr_err("firmware runtime failure\n");
+               goto fw_load_fail;
+       }
 
-static int qtnf_dbg_msi_show(struct seq_file *s, void *data)
-{
-       struct qtnf_bus *bus = dev_get_drvdata(s->private);
-       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+       ret = qtnf_core_attach(bus);
+       if (ret) {
+               pr_err("failed to attach core\n");
+               goto fw_load_fail;
+       }
 
-       seq_printf(s, "%u\n", priv->msi_enabled);
+       qtnf_debugfs_init(bus, DRV_NAME);
+       qtnf_debugfs_add_entry(bus, "mps", qtnf_dbg_mps_show);
+       qtnf_debugfs_add_entry(bus, "msi_enabled", qtnf_dbg_msi_show);
+       qtnf_debugfs_add_entry(bus, "hdp_stats", qtnf_dbg_hdp_stats);
+       qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
+       qtnf_debugfs_add_entry(bus, "shm_stats", qtnf_dbg_shm_stats);
 
-       return 0;
-}
+       goto fw_load_exit;
 
-static int qtnf_dbg_irq_stats(struct seq_file *s, void *data)
-{
-       struct qtnf_bus *bus = dev_get_drvdata(s->private);
-       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-       u32 reg = readl(PCIE_HDP_INT_EN(priv->pcie_reg_base));
-       u32 status;
+fw_load_fail:
+       bus->fw_state = QTNF_FW_STATE_DEAD;
 
-       seq_printf(s, "pcie_irq_count(%u)\n", priv->pcie_irq_count);
-       seq_printf(s, "pcie_irq_tx_count(%u)\n", priv->pcie_irq_tx_count);
-       status = reg &  PCIE_HDP_INT_TX_BITS;
-       seq_printf(s, "pcie_irq_tx_status(%s)\n",
-                  (status == PCIE_HDP_INT_TX_BITS) ? "EN" : "DIS");
-       seq_printf(s, "pcie_irq_rx_count(%u)\n", priv->pcie_irq_rx_count);
-       status = reg &  PCIE_HDP_INT_RX_BITS;
-       seq_printf(s, "pcie_irq_rx_status(%s)\n",
-                  (status == PCIE_HDP_INT_RX_BITS) ? "EN" : "DIS");
-       seq_printf(s, "pcie_irq_uf_count(%u)\n", priv->pcie_irq_uf_count);
-       status = reg &  PCIE_HDP_INT_HHBM_UF;
-       seq_printf(s, "pcie_irq_hhbm_uf_status(%s)\n",
-                  (status == PCIE_HDP_INT_HHBM_UF) ? "EN" : "DIS");
-
-       return 0;
+fw_load_exit:
+       complete(&bus->firmware_init_complete);
+       put_device(&pdev->dev);
 }
 
-static int qtnf_dbg_hdp_stats(struct seq_file *s, void *data)
+static void qtnf_bringup_fw_async(struct qtnf_bus *bus)
 {
-       struct qtnf_bus *bus = dev_get_drvdata(s->private);
-       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-
-       seq_printf(s, "tx_full_count(%u)\n", priv->tx_full_count);
-       seq_printf(s, "tx_done_count(%u)\n", priv->tx_done_count);
-       seq_printf(s, "tx_reclaim_done(%u)\n", priv->tx_reclaim_done);
-       seq_printf(s, "tx_reclaim_req(%u)\n", priv->tx_reclaim_req);
-
-       seq_printf(s, "tx_bd_r_index(%u)\n", priv->tx_bd_r_index);
-       seq_printf(s, "tx_bd_p_index(%u)\n",
-                  readl(PCIE_HDP_RX0DMA_CNT(priv->pcie_reg_base))
-                       & (priv->tx_bd_num - 1));
-       seq_printf(s, "tx_bd_w_index(%u)\n", priv->tx_bd_w_index);
-       seq_printf(s, "tx queue len(%u)\n",
-                  CIRC_CNT(priv->tx_bd_w_index, priv->tx_bd_r_index,
-                           priv->tx_bd_num));
-
-       seq_printf(s, "rx_bd_r_index(%u)\n", priv->rx_bd_r_index);
-       seq_printf(s, "rx_bd_p_index(%u)\n",
-                  readl(PCIE_HDP_TX0DMA_CNT(priv->pcie_reg_base))
-                       & (priv->rx_bd_num - 1));
-       seq_printf(s, "rx_bd_w_index(%u)\n", priv->rx_bd_w_index);
-       seq_printf(s, "rx alloc queue len(%u)\n",
-                  CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
-                             priv->rx_bd_num));
+       struct qtnf_pcie_bus_priv *priv = (void *)get_bus_priv(bus);
+       struct pci_dev *pdev = priv->pdev;
 
-       return 0;
+       get_device(&pdev->dev);
+       INIT_WORK(&bus->fw_work, qtnf_fw_work_handler);
+       schedule_work(&bus->fw_work);
 }
 
-static int qtnf_dbg_shm_stats(struct seq_file *s, void *data)
+static void qtnf_reclaim_tasklet_fn(unsigned long data)
 {
-       struct qtnf_bus *bus = dev_get_drvdata(s->private);
-       struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-
-       seq_printf(s, "shm_ipc_ep_in.tx_packet_count(%zu)\n",
-                  priv->shm_ipc_ep_in.tx_packet_count);
-       seq_printf(s, "shm_ipc_ep_in.rx_packet_count(%zu)\n",
-                  priv->shm_ipc_ep_in.rx_packet_count);
-       seq_printf(s, "shm_ipc_ep_out.tx_packet_count(%zu)\n",
-                  priv->shm_ipc_ep_out.tx_timeout_count);
-       seq_printf(s, "shm_ipc_ep_out.rx_packet_count(%zu)\n",
-                  priv->shm_ipc_ep_out.rx_packet_count);
+       struct qtnf_pcie_bus_priv *priv = (void *)data;
 
-       return 0;
+       qtnf_pcie_data_tx_reclaim(priv);
+       qtnf_en_txdone_irq(priv);
 }
 
 static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -1237,10 +1267,8 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        bus = devm_kzalloc(&pdev->dev,
                           sizeof(*bus) + sizeof(*pcie_priv), GFP_KERNEL);
-       if (!bus) {
-               ret = -ENOMEM;
-               goto err_init;
-       }
+       if (!bus)
+               return -ENOMEM;
 
        pcie_priv = get_bus_priv(bus);
 
@@ -1251,7 +1279,7 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        pcie_priv->pdev = pdev;
 
        strcpy(bus->fwname, QTN_PCI_PEARL_FW_NAME);
-       init_completion(&bus->request_firmware_complete);
+       init_completion(&bus->firmware_init_complete);
        mutex_init(&bus->bus_lock);
        spin_lock_init(&pcie_priv->tx0_lock);
        spin_lock_init(&pcie_priv->irq_lock);
@@ -1267,11 +1295,18 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        pcie_priv->tx_reclaim_done = 0;
        pcie_priv->tx_reclaim_req = 0;
 
+       tasklet_init(&pcie_priv->reclaim_tq, qtnf_reclaim_tasklet_fn,
+                    (unsigned long)pcie_priv);
+
+       init_dummy_netdev(&bus->mux_dev);
+       netif_napi_add(&bus->mux_dev, &bus->mux_napi,
+                      qtnf_rx_poll, 10);
+
        pcie_priv->workqueue = create_singlethread_workqueue("QTNF_PEARL_PCIE");
        if (!pcie_priv->workqueue) {
                pr_err("failed to alloc bus workqueue\n");
                ret = -ENODEV;
-               goto err_priv;
+               goto err_init;
        }
 
        if (!pci_is_pcie(pdev)) {
@@ -1300,14 +1335,8 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_base;
        }
 
-       pcim_pin_device(pdev);
        pci_set_master(pdev);
-
-       ret = qtnf_pcie_init_irq(pcie_priv);
-       if (ret < 0) {
-               pr_err("irq init failed\n");
-               goto err_base;
-       }
+       qtnf_pcie_init_irq(pcie_priv);
 
        ret = qtnf_pcie_init_memory(pcie_priv);
        if (ret < 0) {
@@ -1315,22 +1344,18 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_base;
        }
 
+       pci_save_state(pdev);
+
        ret = qtnf_pcie_init_shm_ipc(pcie_priv);
        if (ret < 0) {
                pr_err("PCIE SHM IPC init failed\n");
                goto err_base;
        }
 
-       ret = devm_add_action(&pdev->dev, free_xfer_buffers, (void *)pcie_priv);
-       if (ret) {
-               pr_err("custom release callback init failed\n");
-               goto err_base;
-       }
-
        ret = qtnf_pcie_init_xfer(pcie_priv);
        if (ret) {
                pr_err("PCIE xfer init failed\n");
-               goto err_base;
+               goto err_ipc;
        }
 
        /* init default irq settings */
@@ -1343,58 +1368,28 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                               "qtnf_pcie_irq", (void *)bus);
        if (ret) {
                pr_err("failed to request pcie irq %d\n", pdev->irq);
-               goto err_base;
-       }
-
-       tasklet_init(&pcie_priv->reclaim_tq, qtnf_reclaim_tasklet_fn,
-                    (unsigned long)pcie_priv);
-       init_dummy_netdev(&bus->mux_dev);
-       netif_napi_add(&bus->mux_dev, &bus->mux_napi,
-                      qtnf_rx_poll, 10);
-
-       ret = qtnf_bringup_fw(bus);
-       if (ret < 0)
-               goto err_bringup_fw;
-       else if (ret)
-               wait_for_completion(&bus->request_firmware_complete);
-
-       if (bus->fw_state != QTNF_FW_STATE_FW_DNLD_DONE) {
-               pr_err("failed to start FW\n");
-               goto err_bringup_fw;
-       }
-
-       if (qtnf_poll_state(&pcie_priv->bda->bda_ep_state, QTN_EP_FW_QLINK_DONE,
-                           QTN_FW_QLINK_TIMEOUT_MS)) {
-               pr_err("FW runtime failure\n");
-               goto err_bringup_fw;
+               goto err_xfer;
        }
 
-       ret = qtnf_core_attach(bus);
-       if (ret) {
-               pr_err("failed to attach core\n");
-               goto err_bringup_fw;
-       }
-
-       qtnf_debugfs_init(bus, DRV_NAME);
-       qtnf_debugfs_add_entry(bus, "mps", qtnf_dbg_mps_show);
-       qtnf_debugfs_add_entry(bus, "msi_enabled", qtnf_dbg_msi_show);
-       qtnf_debugfs_add_entry(bus, "hdp_stats", qtnf_dbg_hdp_stats);
-       qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
-       qtnf_debugfs_add_entry(bus, "shm_stats", qtnf_dbg_shm_stats);
+       qtnf_bringup_fw_async(bus);
 
        return 0;
 
-err_bringup_fw:
-       netif_napi_del(&bus->mux_napi);
+err_xfer:
+       qtnf_free_xfer_buffers(pcie_priv);
+
+err_ipc:
+       qtnf_pcie_free_shm_ipc(pcie_priv);
 
 err_base:
        flush_workqueue(pcie_priv->workqueue);
        destroy_workqueue(pcie_priv->workqueue);
+       netif_napi_del(&bus->mux_napi);
 
-err_priv:
+err_init:
+       tasklet_kill(&pcie_priv->reclaim_tq);
        pci_set_drvdata(pdev, NULL);
 
-err_init:
        return ret;
 }
 
@@ -1407,18 +1402,23 @@ static void qtnf_pcie_remove(struct pci_dev *pdev)
        if (!bus)
                return;
 
+       wait_for_completion(&bus->firmware_init_complete);
+
+       if (bus->fw_state == QTNF_FW_STATE_ACTIVE)
+               qtnf_core_detach(bus);
+
        priv = get_bus_priv(bus);
 
-       qtnf_core_detach(bus);
        netif_napi_del(&bus->mux_napi);
-
        flush_workqueue(priv->workqueue);
        destroy_workqueue(priv->workqueue);
        tasklet_kill(&priv->reclaim_tq);
 
+       qtnf_free_xfer_buffers(priv);
        qtnf_debugfs_remove(bus);
 
        qtnf_pcie_free_shm_ipc(priv);
+       qtnf_reset_card(priv);
 }
 
 #ifdef CONFIG_PM_SLEEP
index c5a4e46d26efe55f9bebeec930e732cdcbe2ab6e..00bb21a1c47ae601c027afb106e971fdadd1065c 100644 (file)
@@ -46,6 +46,7 @@
 /* state transition timeouts */
 #define QTN_FW_DL_TIMEOUT_MS   3000
 #define QTN_FW_QLINK_TIMEOUT_MS        30000
+#define QTN_EP_RESET_WAIT_MS   1000
 
 #define PCIE_HDP_INT_RX_BITS (0                \
        | PCIE_HDP_INT_EP_TXDMA         \
index 5b48b425fa7f95eb4e590c6d81c17f5ba12eb90a..0bfe285b6b48bb3d62a436a6a8e32e65f98280d1 100644 (file)
 
 #define QTN_PEARL_IPC_IRQ_WORD(irq)    (BIT(irq) | BIT(irq + 16))
 #define QTN_PEARL_LHOST_IPC_IRQ                (6)
+#define QTN_PEARL_LHOST_EP_RESET       (7)
 
 #endif /* __PEARL_PCIE_H */
index 41dbf3130e2b8670c503b3d1062ade1290fd6f2a..9b79e59ee97b3784723d5de8c76be8da574ca96d 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_RALINK
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_RALINK
index 8a8ba200396400f9b5ac4906be76845a7d7a31d0..3db988e689d788642eb8c5f65375c2fc4ed5414a 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_REALTEK
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_REALTEK
index 121b94f09714858a49c04dca090192bf057730f0..9a1d15b3ce4535540184d34a5600be131ba329dd 100644 (file)
@@ -1450,6 +1450,7 @@ static int rtl8187_probe(struct usb_interface *intf,
                goto err_free_dev;
        }
        mutex_init(&priv->io_mutex);
+       mutex_init(&priv->conf_mutex);
 
        SET_IEEE80211_DEV(dev, &intf->dev);
        usb_set_intfdata(intf, dev);
@@ -1625,7 +1626,6 @@ static int rtl8187_probe(struct usb_interface *intf,
                printk(KERN_ERR "rtl8187: Cannot register device\n");
                goto err_free_dmabuf;
        }
-       mutex_init(&priv->conf_mutex);
        skb_queue_head_init(&priv->b_tx_status.queue);
 
        wiphy_info(dev->wiphy, "hwaddr %pM, %s V%d + %s, rfkill mask %d\n",
index d6c03bd5cc65e2aa96ef6b2fd25b5d97b20a9652..6db3389e2ceded11a0311dde6c8d52c3b070cda3 100644 (file)
@@ -244,6 +244,9 @@ static void _rtl_init_hw_vht_capab(struct ieee80211_hw *hw,
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
 
+       if (!(rtlpriv->cfg->spec_ver & RTL_SPEC_SUPPORT_VHT))
+               return;
+
        if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE ||
            rtlhal->hw_type == HARDWARE_TYPE_RTL8822BE) {
                u16 mcs_map;
@@ -397,6 +400,7 @@ static void _rtl_init_mac80211(struct ieee80211_hw *hw)
        ieee80211_hw_set(hw, MFP_CAPABLE);
        ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS);
        ieee80211_hw_set(hw, SUPPORTS_AMSDU_IN_AMPDU);
+       ieee80211_hw_set(hw, SUPPORT_FAST_XMIT);
 
        /* swlps or hwlps has been set in diff chip in init_sw_vars */
        if (rtlpriv->psc.swctrl_lps) {
@@ -886,8 +890,7 @@ static void _rtl_query_bandwidth_mode(struct ieee80211_hw *hw,
 
        tcb_desc->packet_bw = HT_CHANNEL_WIDTH_20_40;
 
-       if (rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8812AE ||
-           rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8821AE) {
+       if (rtlpriv->cfg->spec_ver & RTL_SPEC_SUPPORT_VHT) {
                if (mac->opmode == NL80211_IFTYPE_AP ||
                    mac->opmode == NL80211_IFTYPE_ADHOC ||
                    mac->opmode == NL80211_IFTYPE_MESH_POINT) {
@@ -1594,7 +1597,11 @@ static u16 rtl_get_tx_report_sn(struct ieee80211_hw *hw)
        struct rtl_tx_report *tx_report = &rtlpriv->tx_report;
        u16 sn;
 
-       sn = atomic_inc_return(&tx_report->sn) & 0x0FFF;
+       /* SW_DEFINE[11:8] are reserved (driver fills zeros)
+        * SW_DEFINE[7:2] are used by driver
+        * SW_DEFINE[1:0] are reserved for firmware (driver fills zeros)
+        */
+       sn = (atomic_inc_return(&tx_report->sn) & 0x003F) << 2;
 
        tx_report->last_sent_sn = sn;
        tx_report->last_sent_time = jiffies;
@@ -1622,14 +1629,23 @@ void rtl_tx_report_handler(struct ieee80211_hw *hw, u8 *tmp_buf, u8 c2h_cmd_len)
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rtl_tx_report *tx_report = &rtlpriv->tx_report;
        u16 sn;
+       u8 st, retry;
 
-       sn = ((tmp_buf[7] & 0x0F) << 8) | tmp_buf[6];
+       if (rtlpriv->cfg->spec_ver & RTL_SPEC_EXT_C2H) {
+               sn = GET_TX_REPORT_SN_V2(tmp_buf);
+               st = GET_TX_REPORT_ST_V2(tmp_buf);
+               retry = GET_TX_REPORT_RETRY_V2(tmp_buf);
+       } else {
+               sn = GET_TX_REPORT_SN_V1(tmp_buf);
+               st = GET_TX_REPORT_ST_V1(tmp_buf);
+               retry = GET_TX_REPORT_RETRY_V1(tmp_buf);
+       }
 
        tx_report->last_recv_sn = sn;
 
        RT_TRACE(rtlpriv, COMP_TX_REPORT, DBG_DMESG,
                 "Recv TX-Report st=0x%02X sn=0x%X retry=0x%X\n",
-                tmp_buf[0], sn, tmp_buf[2]);
+                st, sn, retry);
 }
 EXPORT_SYMBOL_GPL(rtl_tx_report_handler);
 
@@ -1643,7 +1659,8 @@ bool rtl_check_tx_report_acked(struct ieee80211_hw *hw)
 
        if (time_before(tx_report->last_sent_time + 3 * HZ, jiffies)) {
                RT_TRACE(rtlpriv, COMP_TX_REPORT, DBG_WARNING,
-                        "Check TX-Report timeout!!\n");
+                        "Check TX-Report timeout!! s_sn=0x%X r_sn=0x%X\n",
+                        tx_report->last_sent_sn, tx_report->last_recv_sn);
                return true;    /* 3 sec. (timeout) seen as acked */
        }
 
@@ -2629,6 +2646,11 @@ EXPORT_SYMBOL_GPL(rtl_global_var);
 
 static int __init rtl_core_module_init(void)
 {
+       BUILD_BUG_ON(TX_PWR_BY_RATE_NUM_RATE < TX_PWR_BY_RATE_NUM_SECTION);
+       BUILD_BUG_ON(MAX_RATE_SECTION_NUM != MAX_RATE_SECTION);
+       BUILD_BUG_ON(MAX_BASE_NUM_IN_PHY_REG_PG_24G != MAX_RATE_SECTION);
+       BUILD_BUG_ON(MAX_BASE_NUM_IN_PHY_REG_PG_5G != (MAX_RATE_SECTION - 1));
+
        if (rtl_rate_control_register())
                pr_err("rtl: Unable to register rtl_rc, use default RC !!\n");
 
index fd3b1fb35dff26121ecbd0bbb0724f8d93e0e6db..05beb16f0a0ab5f018ed5038021648b5412575db 100644 (file)
@@ -1104,7 +1104,7 @@ static void halbtc8723b1ant_ps_tdma(struct btc_coexist *btcoexist,
        }
 
        if ((type == 1) || (type == 2) || (type == 9) || (type == 11) ||
-           (type == 101) || (type == 102) || (type == 109) || (type == 101)) {
+           (type == 101) || (type == 102) || (type == 109) || (type == 111)) {
                if (!coex_sta->force_lps_on) {
                        /* Native power save TDMA, only for A2DP-only case
                         * 1/2/9/11 while wifi noisy threshold > 30
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c
new file mode 100644 (file)
index 0000000..951b8c1
--- /dev/null
@@ -0,0 +1,55 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2016-2017  Realtek Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ *****************************************************************************/
+#include "halbt_precomp.h"
+
+void ex_hal8822b_wifi_only_hw_config(struct wifi_only_cfg *wifionlycfg)
+{
+       /*BB control*/
+       halwifionly_phy_set_bb_reg(wifionlycfg, 0x4c, 0x01800000, 0x2);
+       /*SW control*/
+       halwifionly_phy_set_bb_reg(wifionlycfg, 0xcb4, 0xff, 0x77);
+       /*antenna mux switch */
+       halwifionly_phy_set_bb_reg(wifionlycfg, 0x974, 0x300, 0x3);
+
+       halwifionly_phy_set_bb_reg(wifionlycfg, 0x1990, 0x300, 0x0);
+
+       halwifionly_phy_set_bb_reg(wifionlycfg, 0xcbc, 0x80000, 0x0);
+       /*switch to WL side controller and gnt_wl gnt_bt debug signal */
+       halwifionly_phy_set_bb_reg(wifionlycfg, 0x70, 0xff000000, 0x0e);
+       /*gnt_wl=1 , gnt_bt=0*/
+       halwifionly_phy_set_bb_reg(wifionlycfg, 0x1704, 0xffffffff, 0x7700);
+       halwifionly_phy_set_bb_reg(wifionlycfg, 0x1700, 0xffffffff, 0xc00f0038);
+}
+
+void ex_hal8822b_wifi_only_scannotify(struct wifi_only_cfg *wifionlycfg,
+                                     u8 is_5g)
+{
+       hal8822b_wifi_only_switch_antenna(wifionlycfg, is_5g);
+}
+
+void ex_hal8822b_wifi_only_switchbandnotify(struct wifi_only_cfg *wifionlycfg,
+                                           u8 is_5g)
+{
+       hal8822b_wifi_only_switch_antenna(wifionlycfg, is_5g);
+}
+
+void hal8822b_wifi_only_switch_antenna(struct wifi_only_cfg *wifionlycfg,
+                                      u8 is_5g)
+{
+       if (is_5g)
+               halwifionly_phy_set_bb_reg(wifionlycfg, 0xcbc, 0x300, 0x1);
+       else
+               halwifionly_phy_set_bb_reg(wifionlycfg, 0xcbc, 0x300, 0x2);
+}
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h
new file mode 100644 (file)
index 0000000..6ec3565
--- /dev/null
@@ -0,0 +1,25 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2016-2017  Realtek Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ *****************************************************************************/
+#ifndef __INC_HAL8822BWIFIONLYHWCFG_H
+#define __INC_HAL8822BWIFIONLYHWCFG_H
+
+void ex_hal8822b_wifi_only_hw_config(struct wifi_only_cfg *wifionlycfg);
+void ex_hal8822b_wifi_only_scannotify(struct wifi_only_cfg *wifionlycfg,
+                                     u8 is_5g);
+void ex_hal8822b_wifi_only_switchbandnotify(struct wifi_only_cfg *wifionlycfg,
+                                           u8 is_5g);
+void hal8822b_wifi_only_switch_antenna(struct wifi_only_cfg *wifionlycfg,
+                                      u8 is_5g);
+#endif
index 1404729441a28f13592199c948f1e01a07d8297a..823694cb4fdb82fbe20f2ed7841be51326deae76 100644 (file)
@@ -1039,6 +1039,28 @@ static void halbtc_fill_h2c_cmd(void *bt_context, u8 element_id,
                                        cmd_len, cmd_buf);
 }
 
+void halbtc_send_wifi_port_id_cmd(void *bt_context)
+{
+       struct btc_coexist *btcoexist = (struct btc_coexist *)bt_context;
+       struct rtl_priv *rtlpriv = btcoexist->adapter;
+       u8 cmd_buf[1] = {0};    /* port id [2:0] = 0 */
+
+       rtlpriv->cfg->ops->fill_h2c_cmd(rtlpriv->mac80211.hw, H2C_BT_PORT_ID,
+                                       1, cmd_buf);
+}
+
+void halbtc_set_default_port_id_cmd(void *bt_context)
+{
+       struct btc_coexist *btcoexist = (struct btc_coexist *)bt_context;
+       struct rtl_priv *rtlpriv = btcoexist->adapter;
+       struct ieee80211_hw *hw = rtlpriv->mac80211.hw;
+
+       if (!rtlpriv->cfg->ops->set_default_port_id_cmd)
+               return;
+
+       rtlpriv->cfg->ops->set_default_port_id_cmd(hw);
+}
+
 static
 void halbtc_set_bt_reg(void *btc_context, u8 reg_type, u32 offset, u32 set_val)
 {
index 8ed2176565396e3cf2cdadec895e9dce41a60982..f5d8159a88eb4d7d8168195ba66d61f6d8521c2e 100644 (file)
@@ -691,6 +691,8 @@ void exhalbtc_lps_leave(struct btc_coexist *btcoexist);
 void exhalbtc_low_wifi_traffic_notify(struct btc_coexist *btcoexist);
 void exhalbtc_set_single_ant_path(struct btc_coexist *btcoexist,
                                  u8 single_ant_path);
+void halbtc_send_wifi_port_id_cmd(void *bt_context);
+void halbtc_set_default_port_id_cmd(void *bt_context);
 
 /* The following are used by wifi_only case */
 enum wifionly_chip_interface {
index 35b50be633f1cfdc7af283b33541a2465cbf10a5..fd13d4ef53b80fa173d6a1c1d8c2fd95ee4da533 100644 (file)
@@ -50,6 +50,11 @@ static const struct efuse_map RTL8712_SDIO_EFUSE_TABLE[] = {
        {11, 0, 0, 28}
 };
 
+static const struct rtl_efuse_ops efuse_ops = {
+       .efuse_onebyte_read = efuse_one_byte_read,
+       .efuse_logical_map_read = efuse_shadow_read,
+};
+
 static void efuse_shadow_read_1byte(struct ieee80211_hw *hw, u16 offset,
                                    u8 *value);
 static void efuse_shadow_read_2byte(struct ieee80211_hw *hw, u16 offset,
@@ -1364,3 +1369,11 @@ void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen)
        *pfwlen = fwlen;
 }
 EXPORT_SYMBOL_GPL(rtl_fill_dummy);
+
+void rtl_efuse_ops_init(struct ieee80211_hw *hw)
+{
+       struct rtl_priv *rtlpriv = rtl_priv(hw);
+
+       rtlpriv->efuse.efuse_ops = &efuse_ops;
+}
+EXPORT_SYMBOL_GPL(rtl_efuse_ops_init);
index 952fdc288f0e6f0d2e74551840456e7f83160192..dfa31c13fc7a04dea3d695a6448e2dda8ae5fabf 100644 (file)
@@ -116,5 +116,5 @@ void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen);
 void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
                       u32 size);
 void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size);
-
+void rtl_efuse_ops_init(struct ieee80211_hw *hw);
 #endif
index 01ccf88848315de213b5de7a319e457195bd9398..2437422625bf506d1d43197a311a7b9848bbfc70 100644 (file)
@@ -2238,6 +2238,7 @@ int rtl_pci_probe(struct pci_dev *pdev,
        rtlpriv->cfg = (struct rtl_hal_cfg *)(id->driver_data);
        rtlpriv->intf_ops = &rtl_pci_ops;
        rtlpriv->glb_var = &rtl_global_var;
+       rtl_efuse_ops_init(hw);
 
        /* MEM map */
        err = pci_request_regions(pdev, KBUILD_MODNAME);
index d1cb7d405618f8795ff26442c4ae6337b23bf4eb..6c78c6dabbdfed7a4b3eb8397d5a243beb3600ac 100644 (file)
@@ -42,6 +42,23 @@ static u8 _rtl_rc_get_highest_rix(struct rtl_priv *rtlpriv,
        struct rtl_phy *rtlphy = &(rtlpriv->phy);
        struct rtl_sta_info *sta_entry = NULL;
        u16 wireless_mode = 0;
+       u8 nss;
+       struct ieee80211_tx_rate rate;
+
+       switch (get_rf_type(rtlphy)) {
+       case RF_4T4R:
+               nss = 4;
+               break;
+       case RF_3T3R:
+               nss = 3;
+               break;
+       case RF_2T2R:
+               nss = 2;
+               break;
+       default:
+               nss = 1;
+               break;
+       }
 
        /*
         *this rate is no use for true rate, firmware
@@ -66,28 +83,51 @@ static u8 _rtl_rc_get_highest_rix(struct rtl_priv *rtlpriv,
                        } else if (wireless_mode == WIRELESS_MODE_G) {
                                return G_MODE_MAX_RIX;
                        } else if (wireless_mode == WIRELESS_MODE_N_24G) {
-                               if (get_rf_type(rtlphy) != RF_2T2R)
+                               if (nss == 1)
                                        return N_MODE_MCS7_RIX;
                                else
                                        return N_MODE_MCS15_RIX;
                        } else if (wireless_mode == WIRELESS_MODE_AC_24G) {
-                               return AC_MODE_MCS9_RIX;
+                               if (sta->bandwidth == IEEE80211_STA_RX_BW_20) {
+                                       ieee80211_rate_set_vht(&rate,
+                                                              AC_MODE_MCS8_RIX,
+                                                              nss);
+                                       goto out;
+                               } else {
+                                       ieee80211_rate_set_vht(&rate,
+                                                              AC_MODE_MCS9_RIX,
+                                                              nss);
+                                       goto out;
+                               }
                        }
                        return 0;
                } else {
                        if (wireless_mode == WIRELESS_MODE_A) {
                                return A_MODE_MAX_RIX;
                        } else if (wireless_mode == WIRELESS_MODE_N_5G) {
-                               if (get_rf_type(rtlphy) != RF_2T2R)
+                               if (nss == 1)
                                        return N_MODE_MCS7_RIX;
                                else
                                        return N_MODE_MCS15_RIX;
                        } else if (wireless_mode == WIRELESS_MODE_AC_5G) {
-                               return AC_MODE_MCS9_RIX;
+                               if (sta->bandwidth == IEEE80211_STA_RX_BW_20) {
+                                       ieee80211_rate_set_vht(&rate,
+                                                              AC_MODE_MCS8_RIX,
+                                                              nss);
+                                       goto out;
+                               } else {
+                                       ieee80211_rate_set_vht(&rate,
+                                                              AC_MODE_MCS9_RIX,
+                                                              nss);
+                                       goto out;
+                               }
                        }
                        return 0;
                }
        }
+
+out:
+       return rate.idx;
 }
 
 static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
@@ -111,9 +151,6 @@ static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
        }
        rate->count = tries;
        rate->idx = rix >= 0x00 ? rix : 0x00;
-       if (rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8812AE &&
-           wireless_mode == WIRELESS_MODE_AC_5G)
-               rate->idx += 0x10;/*2NSS for 8812AE*/
 
        if (!not_data) {
                if (txrc->short_preamble)
@@ -126,10 +163,10 @@ static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
                        if (sta && sta->vht_cap.vht_supported)
                                rate->flags |= IEEE80211_TX_RC_80_MHZ_WIDTH;
                } else {
-                       if (mac->bw_40)
-                               rate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
                        if (mac->bw_80)
                                rate->flags |= IEEE80211_TX_RC_80_MHZ_WIDTH;
+                       else if (mac->bw_40)
+                               rate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
                }
 
                if (sgi_20 || sgi_40 || sgi_80)
index 9cff6bc4049c993a78ab7db6b512110a213834a0..cf551785eb089d1a695148c825ddd9b33d252dee 100644 (file)
@@ -299,9 +299,6 @@ static void _rtl92c_get_txpower_writeval_by_regulatory(struct ieee80211_hw *hw,
                        writeVal = 0x00000000;
                if (rtlpriv->dm.dynamic_txhighpower_lvl == TXHIGHPWRLEVEL_BT1)
                        writeVal = writeVal - 0x06060606;
-               else if (rtlpriv->dm.dynamic_txhighpower_lvl ==
-                        TXHIGHPWRLEVEL_BT2)
-                       writeVal = writeVal;
                *(p_outwriteval + rf) = writeVal;
        }
 }
index ac4a82de40c7b0c2f1163f655b828042e44b8a62..9ab56827124ec0ed7be44ccf4a259bfa1b512785 100644 (file)
@@ -427,7 +427,6 @@ static void _rtl_rx_process(struct ieee80211_hw *hw, struct sk_buff *skb)
                 (u32)hdr->addr1[0], (u32)hdr->addr1[1],
                 (u32)hdr->addr1[2], (u32)hdr->addr1[3],
                 (u32)hdr->addr1[4], (u32)hdr->addr1[5]);
-       memcpy(IEEE80211_SKB_RXCB(skb), rx_status, sizeof(*rx_status));
        ieee80211_rx(hw, skb);
 }
 
index f9ccd13c79f94e230bfaaa3b40d23e78dc09226c..e7bbbc95cdb1f6ef5611c93f4bf97e269624bdc3 100644 (file)
@@ -1125,7 +1125,8 @@ static void _rtl8723be_enable_aspm_back_door(struct ieee80211_hw *hw)
 
        /* Configuration Space offset 0x70f BIT7 is used to control L0S */
        tmp8 = _rtl8723be_dbi_read(rtlpriv, 0x70f);
-       _rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7));
+       _rtl8723be_dbi_write(rtlpriv, 0x70f, tmp8 | BIT(7) |
+                            ASPM_L1_LATENCY << 3);
 
        /* Configuration Space offset 0x719 Bit3 is for L1
         * BIT4 is for clock request
index ab5d462b1a3a1224c47e47ec7d09b0ee642b8499..9bb3d9dfce791d7d9dcb73ac9c6c41567cbec41f 100644 (file)
@@ -328,6 +328,7 @@ static const struct rtl_hal_cfg rtl8821ae_hal_cfg = {
        .alt_fw_name = "rtlwifi/rtl8821aefw.bin",
        .ops = &rtl8821ae_hal_ops,
        .mod_params = &rtl8821ae_mod_params,
+       .spec_ver = RTL_SPEC_SUPPORT_VHT,
        .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL,
        .maps[SYS_FUNC_EN] = REG_SYS_FUNC_EN,
        .maps[SYS_CLK] = REG_SYS_CLKR,
index 46dcb7fef19541b73e67db2172d6c96d90da4365..4f48b934ec01842c9d2f4c9ce37828f0f145df77 100644 (file)
@@ -154,10 +154,21 @@ enum rtl8192c_h2c_cmd {
        MAX_H2CCMD
 };
 
+enum {
+       H2C_BT_PORT_ID = 0x71,
+};
+
+#define GET_TX_REPORT_SN_V1(c2h)       (c2h[6])
+#define GET_TX_REPORT_ST_V1(c2h)       (c2h[0] & 0xC0)
+#define GET_TX_REPORT_RETRY_V1(c2h)    (c2h[2] & 0x3F)
+#define GET_TX_REPORT_SN_V2(c2h)       (c2h[6])
+#define GET_TX_REPORT_ST_V2(c2h)       (c2h[7] & 0xC0)
+#define GET_TX_REPORT_RETRY_V2(c2h)    (c2h[8] & 0x3F)
+
 #define MAX_TX_COUNT                   4
 #define MAX_REGULATION_NUM             4
 #define MAX_RF_PATH_NUM                        4
-#define MAX_RATE_SECTION_NUM           6
+#define MAX_RATE_SECTION_NUM           6       /* = MAX_RATE_SECTION */
 #define MAX_2_4G_BANDWIDTH_NUM         4
 #define MAX_5G_BANDWIDTH_NUM           4
 #define        MAX_RF_PATH                     4
@@ -167,8 +178,9 @@ enum rtl8192c_h2c_cmd {
 #define TX_PWR_BY_RATE_NUM_BAND                2
 #define TX_PWR_BY_RATE_NUM_RF          4
 #define TX_PWR_BY_RATE_NUM_SECTION     12
-#define MAX_BASE_NUM_IN_PHY_REG_PG_24G  6
-#define MAX_BASE_NUM_IN_PHY_REG_PG_5G  5
+#define TX_PWR_BY_RATE_NUM_RATE                84 /* >= TX_PWR_BY_RATE_NUM_SECTION */
+#define MAX_BASE_NUM_IN_PHY_REG_PG_24G 6  /* MAX_RATE_SECTION */
+#define MAX_BASE_NUM_IN_PHY_REG_PG_5G  5  /* MAX_RATE_SECTION -1 */
 
 #define BUFDESC_SEG_NUM                1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */
 
@@ -264,6 +276,7 @@ enum rate_section {
        HT_MCS8_MCS15,
        VHT_1SSMCS0_1SSMCS9,
        VHT_2SSMCS0_2SSMCS9,
+       MAX_RATE_SECTION,
 };
 
 enum intf_type {
@@ -278,6 +291,13 @@ enum radio_path {
        RF90_PATH_D = 3,
 };
 
+enum radio_mask {
+       RF_MASK_A = BIT(0),
+       RF_MASK_B = BIT(1),
+       RF_MASK_C = BIT(2),
+       RF_MASK_D = BIT(3),
+};
+
 enum regulation_txpwr_lmt {
        TXPWR_LMT_FCC = 0,
        TXPWR_LMT_MKK = 1,
@@ -571,6 +591,7 @@ enum ht_channel_width {
        HT_CHANNEL_WIDTH_20 = 0,
        HT_CHANNEL_WIDTH_20_40 = 1,
        HT_CHANNEL_WIDTH_80 = 2,
+       HT_CHANNEL_WIDTH_MAX,
 };
 
 /* Ref: 802.11i sepc D10.0 7.3.2.25.1
@@ -952,6 +973,8 @@ enum package_type {
 
 enum rtl_spec_ver {
        RTL_SPEC_NEW_RATEID = BIT(0),   /* use ratr_table_mode_new */
+       RTL_SPEC_SUPPORT_VHT = BIT(1),  /* support VHT */
+       RTL_SPEC_EXT_C2H = BIT(2),      /* extend FW C2H (e.g. TX REPORT) */
 };
 
 struct octet_string {
@@ -1277,7 +1300,7 @@ struct rtl_phy {
        u32 tx_power_by_rate_offset[TX_PWR_BY_RATE_NUM_BAND]
                                   [TX_PWR_BY_RATE_NUM_RF]
                                   [TX_PWR_BY_RATE_NUM_RF]
-                                  [TX_PWR_BY_RATE_NUM_SECTION];
+                                  [TX_PWR_BY_RATE_NUM_RATE];
        u8 txpwr_by_rate_base_24g[TX_PWR_BY_RATE_NUM_RF]
                                 [TX_PWR_BY_RATE_NUM_RF]
                                 [MAX_BASE_NUM_IN_PHY_REG_PG_24G];
@@ -1794,6 +1817,7 @@ struct rtl_dm {
 #define        EFUSE_MAX_LOGICAL_SIZE                  512
 
 struct rtl_efuse {
+       const struct rtl_efuse_ops *efuse_ops;
        bool autoLoad_ok;
        bool bootfromefuse;
        u16 max_physical_size;
@@ -1899,6 +1923,12 @@ struct rtl_efuse {
        u8 channel_plan;
 };
 
+struct rtl_efuse_ops {
+       int (*efuse_onebyte_read)(struct ieee80211_hw *hw, u16 addr, u8 *data);
+       void (*efuse_logical_map_read)(struct ieee80211_hw *hw, u8 type,
+                                      u16 offset, u32 *value);
+};
+
 struct rtl_tx_report {
        atomic_t sn;
        u16 last_sent_sn;
@@ -2231,6 +2261,7 @@ struct rtl_hal_ops {
        void (*bt_coex_off_before_lps) (struct ieee80211_hw *hw);
        void (*fill_h2c_cmd) (struct ieee80211_hw *hw, u8 element_id,
                              u32 cmd_len, u8 *p_cmdbuffer);
+       void (*set_default_port_id_cmd)(struct ieee80211_hw *hw);
        bool (*get_btc_status) (void);
        bool (*is_fw_header)(struct rtlwifi_firmware_header *hdr);
        u32 (*rx_command_packet)(struct ieee80211_hw *hw,
index 7c5e4ca4e3d02053c69329085f7407f0e57c3e3c..f004be33fcfa38d48af3d4073130452c817ad9b1 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_RSI
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_RSI
@@ -42,4 +42,13 @@ config RSI_USB
          This option enables the USB bus support in rsi drivers.
          Select M (recommended), if you have a RSI 1x1 wireless module.
 
+config RSI_COEX
+       bool "Redpine Signals WLAN BT Coexistence support"
+       depends on BT_HCIRSI && RSI_91X
+       default y
+       ---help---
+         This option enables the WLAN BT coex support in rsi drivers.
+         Select M (recommended), if you have want to use this feature
+         and you have RS9113 module.
+
 endif # WLAN_VENDOR_RSI
index 47c45908d8941251819da15045f0b024d4d5e52f..ff87121a592840057a6e7852f09cc95f4d9bb030 100644 (file)
@@ -5,6 +5,7 @@ rsi_91x-y                       += rsi_91x_mac80211.o
 rsi_91x-y                      += rsi_91x_mgmt.o
 rsi_91x-y                      += rsi_91x_hal.o
 rsi_91x-y                      += rsi_91x_ps.o
+rsi_91x-$(CONFIG_RSI_COEX)     += rsi_91x_coex.o
 rsi_91x-$(CONFIG_RSI_DEBUGFS)  += rsi_91x_debugfs.o
 
 rsi_usb-y                      += rsi_91x_usb.o rsi_91x_usb_ops.o
diff --git a/drivers/net/wireless/rsi/rsi_91x_coex.c b/drivers/net/wireless/rsi/rsi_91x_coex.c
new file mode 100644 (file)
index 0000000..d055099
--- /dev/null
@@ -0,0 +1,179 @@
+/**
+ * Copyright (c) 2018 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "rsi_main.h"
+#include "rsi_coex.h"
+#include "rsi_mgmt.h"
+#include "rsi_hal.h"
+
+static enum rsi_coex_queues rsi_coex_determine_coex_q
+                       (struct rsi_coex_ctrl_block *coex_cb)
+{
+       enum rsi_coex_queues q_num = RSI_COEX_Q_INVALID;
+
+       if (skb_queue_len(&coex_cb->coex_tx_qs[RSI_COEX_Q_COMMON]) > 0)
+               q_num = RSI_COEX_Q_COMMON;
+       if (skb_queue_len(&coex_cb->coex_tx_qs[RSI_COEX_Q_BT]) > 0)
+               q_num = RSI_COEX_Q_BT;
+       if (skb_queue_len(&coex_cb->coex_tx_qs[RSI_COEX_Q_WLAN]) > 0)
+               q_num = RSI_COEX_Q_WLAN;
+
+       return q_num;
+}
+
+static void rsi_coex_sched_tx_pkts(struct rsi_coex_ctrl_block *coex_cb)
+{
+       enum rsi_coex_queues coex_q = RSI_COEX_Q_INVALID;
+       struct sk_buff *skb;
+
+       do {
+               coex_q = rsi_coex_determine_coex_q(coex_cb);
+               rsi_dbg(INFO_ZONE, "queue = %d\n", coex_q);
+
+               if (coex_q == RSI_COEX_Q_BT) {
+                       skb = skb_dequeue(&coex_cb->coex_tx_qs[RSI_COEX_Q_BT]);
+                       rsi_send_bt_pkt(coex_cb->priv, skb);
+               }
+       } while (coex_q != RSI_COEX_Q_INVALID);
+}
+
+static void rsi_coex_scheduler_thread(struct rsi_common *common)
+{
+       struct rsi_coex_ctrl_block *coex_cb =
+               (struct rsi_coex_ctrl_block *)common->coex_cb;
+       u32 timeout = EVENT_WAIT_FOREVER;
+
+       do {
+               rsi_wait_event(&coex_cb->coex_tx_thread.event, timeout);
+               rsi_reset_event(&coex_cb->coex_tx_thread.event);
+
+               rsi_coex_sched_tx_pkts(coex_cb);
+       } while (atomic_read(&coex_cb->coex_tx_thread.thread_done) == 0);
+
+       complete_and_exit(&coex_cb->coex_tx_thread.completion, 0);
+}
+
+int rsi_coex_recv_pkt(struct rsi_common *common, u8 *msg)
+{
+       u8 msg_type = msg[RSI_RX_DESC_MSG_TYPE_OFFSET];
+
+       switch (msg_type) {
+       case COMMON_CARD_READY_IND:
+               rsi_dbg(INFO_ZONE, "common card ready received\n");
+               rsi_handle_card_ready(common, msg);
+               break;
+       case SLEEP_NOTIFY_IND:
+               rsi_dbg(INFO_ZONE, "sleep notify received\n");
+               rsi_mgmt_pkt_recv(common, msg);
+               break;
+       }
+
+       return 0;
+}
+
+static inline int rsi_map_coex_q(u8 hal_queue)
+{
+       switch (hal_queue) {
+       case RSI_COEX_Q:
+               return RSI_COEX_Q_COMMON;
+       case RSI_WLAN_Q:
+               return RSI_COEX_Q_WLAN;
+       case RSI_BT_Q:
+               return RSI_COEX_Q_BT;
+       }
+       return RSI_COEX_Q_INVALID;
+}
+
+int rsi_coex_send_pkt(void *priv, struct sk_buff *skb, u8 hal_queue)
+{
+       struct rsi_common *common = (struct rsi_common *)priv;
+       struct rsi_coex_ctrl_block *coex_cb =
+               (struct rsi_coex_ctrl_block *)common->coex_cb;
+       struct skb_info *tx_params = NULL;
+       enum rsi_coex_queues coex_q;
+       int status;
+
+       coex_q = rsi_map_coex_q(hal_queue);
+       if (coex_q == RSI_COEX_Q_INVALID) {
+               rsi_dbg(ERR_ZONE, "Invalid coex queue\n");
+               return -EINVAL;
+       }
+       if (coex_q != RSI_COEX_Q_COMMON &&
+           coex_q != RSI_COEX_Q_WLAN) {
+               skb_queue_tail(&coex_cb->coex_tx_qs[coex_q], skb);
+               rsi_set_event(&coex_cb->coex_tx_thread.event);
+               return 0;
+       }
+       if (common->iface_down) {
+               tx_params =
+                       (struct skb_info *)&IEEE80211_SKB_CB(skb)->driver_data;
+
+               if (!(tx_params->flags & INTERNAL_MGMT_PKT)) {
+                       rsi_indicate_tx_status(common->priv, skb, -EINVAL);
+                       return 0;
+               }
+       }
+
+       /* Send packet to hal */
+       if (skb->priority == MGMT_SOFT_Q)
+               status = rsi_send_mgmt_pkt(common, skb);
+       else
+               status = rsi_send_data_pkt(common, skb);
+
+       return status;
+}
+
+int rsi_coex_attach(struct rsi_common *common)
+{
+       struct rsi_coex_ctrl_block *coex_cb;
+       int cnt;
+
+       coex_cb = kzalloc(sizeof(*coex_cb), GFP_KERNEL);
+       if (!coex_cb)
+               return -ENOMEM;
+
+       common->coex_cb = (void *)coex_cb;
+       coex_cb->priv = common;
+
+       /* Initialize co-ex queues */
+       for (cnt = 0; cnt < NUM_COEX_TX_QUEUES; cnt++)
+               skb_queue_head_init(&coex_cb->coex_tx_qs[cnt]);
+       rsi_init_event(&coex_cb->coex_tx_thread.event);
+
+       /* Initialize co-ex thread */
+       if (rsi_create_kthread(common,
+                              &coex_cb->coex_tx_thread,
+                              rsi_coex_scheduler_thread,
+                              "Coex-Tx-Thread")) {
+               rsi_dbg(ERR_ZONE, "%s: Unable to init tx thrd\n", __func__);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+void rsi_coex_detach(struct rsi_common *common)
+{
+       struct rsi_coex_ctrl_block *coex_cb =
+               (struct rsi_coex_ctrl_block *)common->coex_cb;
+       int cnt;
+
+       rsi_kill_thread(&coex_cb->coex_tx_thread);
+
+       for (cnt = 0; cnt < NUM_COEX_TX_QUEUES; cnt++)
+               skb_queue_purge(&coex_cb->coex_tx_qs[cnt]);
+
+       kfree(coex_cb);
+}
index d0d2201830e8bbeaf8fd13cf2dd86d03d151f6fb..5dafd2e1306cbb76998a2a7f12fc890542129efc 100644 (file)
@@ -17,6 +17,7 @@
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
 #include "rsi_hal.h"
+#include "rsi_coex.h"
 
 /**
  * rsi_determine_min_weight_queue() - This function determines the queue with
@@ -301,14 +302,23 @@ void rsi_core_qos_processor(struct rsi_common *common)
                        mutex_unlock(&common->tx_lock);
                        break;
                }
-
-               if (q_num == MGMT_SOFT_Q) {
-                       status = rsi_send_mgmt_pkt(common, skb);
-               } else if (q_num == MGMT_BEACON_Q) {
+               if (q_num == MGMT_BEACON_Q) {
                        status = rsi_send_pkt_to_bus(common, skb);
                        dev_kfree_skb(skb);
                } else {
-                       status = rsi_send_data_pkt(common, skb);
+#ifdef CONFIG_RSI_COEX
+                       if (common->coex_mode > 1) {
+                               status = rsi_coex_send_pkt(common, skb,
+                                                          RSI_WLAN_Q);
+                       } else {
+#endif
+                               if (q_num == MGMT_SOFT_Q)
+                                       status = rsi_send_mgmt_pkt(common, skb);
+                               else
+                                       status = rsi_send_data_pkt(common, skb);
+#ifdef CONFIG_RSI_COEX
+                       }
+#endif
                }
 
                if (status) {
index 1176de64694297e5ec26600dbc831a570f43bd45..de608ae365a45fa4fe20cb17c6c5fddaa2669621 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include <linux/firmware.h>
+#include <net/bluetooth/bluetooth.h>
 #include "rsi_mgmt.h"
 #include "rsi_hal.h"
 #include "rsi_sdio.h"
@@ -24,6 +25,7 @@
 static struct ta_metadata metadata_flash_content[] = {
        {"flash_content", 0x00010000},
        {"rsi/rs9113_wlan_qspi.rps", 0x00010000},
+       {"rsi/rs9113_wlan_bt_dual_mode.rps", 0x00010000},
 };
 
 int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb)
@@ -31,8 +33,15 @@ int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb)
        struct rsi_hw *adapter = common->priv;
        int status;
 
+       if (common->coex_mode > 1)
+               mutex_lock(&common->tx_bus_mutex);
+
        status = adapter->host_intf_ops->write_pkt(common->priv,
                                                   skb->data, skb->len);
+
+       if (common->coex_mode > 1)
+               mutex_unlock(&common->tx_bus_mutex);
+
        return status;
 }
 
@@ -296,8 +305,7 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
        if (status)
                goto err;
 
-       status = adapter->host_intf_ops->write_pkt(common->priv, skb->data,
-                                                  skb->len);
+       status = rsi_send_pkt_to_bus(common, skb);
        if (status)
                rsi_dbg(ERR_ZONE, "%s: Failed to write pkt\n", __func__);
 
@@ -342,8 +350,7 @@ int rsi_send_mgmt_pkt(struct rsi_common *common,
                goto err;
 
        rsi_prepare_mgmt_desc(common, skb);
-       status = adapter->host_intf_ops->write_pkt(common->priv,
-                                                  (u8 *)skb->data, skb->len);
+       status = rsi_send_pkt_to_bus(common, skb);
        if (status)
                rsi_dbg(ERR_ZONE, "%s: Failed to write the packet\n", __func__);
 
@@ -352,6 +359,43 @@ int rsi_send_mgmt_pkt(struct rsi_common *common,
        return status;
 }
 
+int rsi_send_bt_pkt(struct rsi_common *common, struct sk_buff *skb)
+{
+       int status = -EINVAL;
+       u8 header_size = 0;
+       struct rsi_bt_desc *bt_desc;
+       u8 queueno = ((skb->data[1] >> 4) & 0xf);
+
+       if (queueno == RSI_BT_MGMT_Q) {
+               status = rsi_send_pkt_to_bus(common, skb);
+               if (status)
+                       rsi_dbg(ERR_ZONE, "%s: Failed to write bt mgmt pkt\n",
+                               __func__);
+               goto out;
+       }
+       header_size = FRAME_DESC_SZ;
+       if (header_size > skb_headroom(skb)) {
+               rsi_dbg(ERR_ZONE, "%s: Not enough headroom\n", __func__);
+               status = -ENOSPC;
+               goto out;
+       }
+       skb_push(skb, header_size);
+       memset(skb->data, 0, header_size);
+       bt_desc = (struct rsi_bt_desc *)skb->data;
+
+       rsi_set_len_qno(&bt_desc->len_qno, (skb->len - FRAME_DESC_SZ),
+                       RSI_BT_DATA_Q);
+       bt_desc->bt_pkt_type = cpu_to_le16(bt_cb(skb)->pkt_type);
+
+       status = rsi_send_pkt_to_bus(common, skb);
+       if (status)
+               rsi_dbg(ERR_ZONE, "%s: Failed to write bt pkt\n", __func__);
+
+out:
+       dev_kfree_skb(skb);
+       return status;
+}
+
 int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb)
 {
        struct rsi_hw *adapter = (struct rsi_hw *)common->priv;
@@ -926,10 +970,6 @@ int rsi_hal_device_init(struct rsi_hw *adapter)
 {
        struct rsi_common *common = adapter->priv;
 
-       common->coex_mode = RSI_DEV_COEX_MODE_WIFI_ALONE;
-       common->oper_mode = RSI_DEV_OPMODE_WIFI_ALONE;
-       adapter->device_model = RSI_DEV_9113;
-
        switch (adapter->device_model) {
        case RSI_DEV_9113:
                if (rsi_load_firmware(adapter)) {
index 0cb8e68bab58010bf8be8a34f48fbbebece2538d..1485a0c89df2440a4bf49070ba09d1686483292e 100644 (file)
 
 #include <linux/module.h>
 #include <linux/firmware.h>
+#include <net/rsi_91x.h>
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
+#include "rsi_coex.h"
 #include "rsi_hal.h"
 
 u32 rsi_zone_enabled = /* INFO_ZONE |
@@ -34,6 +36,14 @@ u32 rsi_zone_enabled = /* INFO_ZONE |
                        0;
 EXPORT_SYMBOL_GPL(rsi_zone_enabled);
 
+#ifdef CONFIG_RSI_COEX
+static struct rsi_proto_ops g_proto_ops = {
+       .coex_send_pkt = rsi_coex_send_pkt,
+       .get_host_intf = rsi_get_host_intf,
+       .set_bt_context = rsi_set_bt_context,
+};
+#endif
+
 /**
  * rsi_dbg() - This function outputs informational messages.
  * @zone: Zone of interest for output message.
@@ -60,8 +70,24 @@ EXPORT_SYMBOL_GPL(rsi_dbg);
 static char *opmode_str(int oper_mode)
 {
        switch (oper_mode) {
-       case RSI_DEV_OPMODE_WIFI_ALONE:
+       case DEV_OPMODE_WIFI_ALONE:
                return "Wi-Fi alone";
+       case DEV_OPMODE_BT_ALONE:
+               return "BT EDR alone";
+       case DEV_OPMODE_BT_LE_ALONE:
+               return "BT LE alone";
+       case DEV_OPMODE_BT_DUAL:
+               return "BT Dual";
+       case DEV_OPMODE_STA_BT:
+               return "Wi-Fi STA + BT EDR";
+       case DEV_OPMODE_STA_BT_LE:
+               return "Wi-Fi STA + BT LE";
+       case DEV_OPMODE_STA_BT_DUAL:
+               return "Wi-Fi STA + BT DUAL";
+       case DEV_OPMODE_AP_BT:
+               return "Wi-Fi AP + BT EDR";
+       case DEV_OPMODE_AP_BT_DUAL:
+               return "Wi-Fi AP + BT DUAL";
        }
 
        return "Unknown";
@@ -137,16 +163,19 @@ static struct sk_buff *rsi_prepare_skb(struct rsi_common *common,
  *
  * Return: 0 on success, -1 on failure.
  */
-int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len)
+int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len)
 {
        u8 *frame_desc = NULL, extended_desc = 0;
        u32 index, length = 0, queueno = 0;
        u16 actual_length = 0, offset;
        struct sk_buff *skb = NULL;
+#ifdef CONFIG_RSI_COEX
+       u8 bt_pkt_type;
+#endif
 
        index = 0;
        do {
-               frame_desc = &common->rx_data_pkt[index];
+               frame_desc = &rx_pkt[index];
                actual_length = *(u16 *)&frame_desc[0];
                offset = *(u16 *)&frame_desc[2];
 
@@ -160,8 +189,15 @@ int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len)
 
                switch (queueno) {
                case RSI_COEX_Q:
-                       rsi_mgmt_pkt_recv(common, (frame_desc + offset));
+#ifdef CONFIG_RSI_COEX
+                       if (common->coex_mode > 1)
+                               rsi_coex_recv_pkt(common, frame_desc + offset);
+                       else
+#endif
+                               rsi_mgmt_pkt_recv(common,
+                                                 (frame_desc + offset));
                        break;
+
                case RSI_WIFI_DATA_Q:
                        skb = rsi_prepare_skb(common,
                                              (frame_desc + offset),
@@ -177,6 +213,25 @@ int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len)
                        rsi_mgmt_pkt_recv(common, (frame_desc + offset));
                        break;
 
+#ifdef CONFIG_RSI_COEX
+               case RSI_BT_MGMT_Q:
+               case RSI_BT_DATA_Q:
+#define BT_RX_PKT_TYPE_OFST    14
+#define BT_CARD_READY_IND      0x89
+                       bt_pkt_type = frame_desc[offset + BT_RX_PKT_TYPE_OFST];
+                       if (bt_pkt_type == BT_CARD_READY_IND) {
+                               rsi_dbg(INFO_ZONE, "BT Card ready recvd\n");
+                               if (rsi_bt_ops.attach(common, &g_proto_ops))
+                                       rsi_dbg(ERR_ZONE,
+                                               "Failed to attach BT module\n");
+                       } else {
+                               if (common->bt_adapter)
+                                       rsi_bt_ops.recv_pkt(common->bt_adapter,
+                                                       frame_desc + offset);
+                       }
+                       break;
+#endif
+
                default:
                        rsi_dbg(ERR_ZONE, "%s: pkt from invalid queue: %d\n",
                                __func__,   queueno);
@@ -217,13 +272,29 @@ static void rsi_tx_scheduler_thread(struct rsi_common *common)
        complete_and_exit(&common->tx_thread.completion, 0);
 }
 
+#ifdef CONFIG_RSI_COEX
+enum rsi_host_intf rsi_get_host_intf(void *priv)
+{
+       struct rsi_common *common = (struct rsi_common *)priv;
+
+       return common->priv->rsi_host_intf;
+}
+
+void rsi_set_bt_context(void *priv, void *bt_context)
+{
+       struct rsi_common *common = (struct rsi_common *)priv;
+
+       common->bt_adapter = bt_context;
+}
+#endif
+
 /**
  * rsi_91x_init() - This function initializes os interface operations.
  * @void: Void.
  *
  * Return: Pointer to the adapter structure on success, NULL on failure .
  */
-struct rsi_hw *rsi_91x_init(void)
+struct rsi_hw *rsi_91x_init(u16 oper_mode)
 {
        struct rsi_hw *adapter = NULL;
        struct rsi_common *common = NULL;
@@ -251,6 +322,7 @@ struct rsi_hw *rsi_91x_init(void)
        mutex_init(&common->mutex);
        mutex_init(&common->tx_lock);
        mutex_init(&common->rx_lock);
+       mutex_init(&common->tx_bus_mutex);
 
        if (rsi_create_kthread(common,
                               &common->tx_thread,
@@ -265,6 +337,43 @@ struct rsi_hw *rsi_91x_init(void)
        timer_setup(&common->roc_timer, rsi_roc_timeout, 0);
        init_completion(&common->wlan_init_completion);
        common->init_done = true;
+       adapter->device_model = RSI_DEV_9113;
+       common->oper_mode = oper_mode;
+
+       /* Determine coex mode */
+       switch (common->oper_mode) {
+       case DEV_OPMODE_STA_BT_DUAL:
+       case DEV_OPMODE_STA_BT:
+       case DEV_OPMODE_STA_BT_LE:
+       case DEV_OPMODE_BT_ALONE:
+       case DEV_OPMODE_BT_LE_ALONE:
+       case DEV_OPMODE_BT_DUAL:
+               common->coex_mode = 2;
+               break;
+       case DEV_OPMODE_AP_BT_DUAL:
+       case DEV_OPMODE_AP_BT:
+               common->coex_mode = 4;
+               break;
+       case DEV_OPMODE_WIFI_ALONE:
+               common->coex_mode = 1;
+               break;
+       default:
+               common->oper_mode = 1;
+               common->coex_mode = 1;
+       }
+       rsi_dbg(INFO_ZONE, "%s: oper_mode = %d, coex_mode = %d\n",
+               __func__, common->oper_mode, common->coex_mode);
+
+       adapter->device_model = RSI_DEV_9113;
+#ifdef CONFIG_RSI_COEX
+       if (common->coex_mode > 1) {
+               if (rsi_coex_attach(common)) {
+                       rsi_dbg(ERR_ZONE, "Failed to init coex module\n");
+                       goto err;
+               }
+       }
+#endif
+
        return adapter;
 
 err:
@@ -292,6 +401,16 @@ void rsi_91x_deinit(struct rsi_hw *adapter)
        for (ii = 0; ii < NUM_SOFT_QUEUES; ii++)
                skb_queue_purge(&common->tx_queue[ii]);
 
+#ifdef CONFIG_RSI_COEX
+       if (common->coex_mode > 1) {
+               if (common->bt_adapter) {
+                       rsi_bt_ops.detach(common->bt_adapter);
+                       common->bt_adapter = NULL;
+               }
+               rsi_coex_detach(common);
+       }
+#endif
+
        common->init_done = false;
 
        kfree(common);
index 46c9d5470dfb599fb8746c38a942a0096af65025..c21fca750fd471f5efbcc0f12fd32b0502681b72 100644 (file)
@@ -1791,7 +1791,7 @@ static int rsi_handle_ta_confirm_type(struct rsi_common *common,
        return -EINVAL;
 }
 
-static int rsi_handle_card_ready(struct rsi_common *common, u8 *msg)
+int rsi_handle_card_ready(struct rsi_common *common, u8 *msg)
 {
        switch (common->fsm_state) {
        case FSM_CARD_NOT_READY:
index b0cf41195051d4d872187c0bbdce94569c4bfe6c..98c7d1dae18e12d65b8e4095dce4bdbda4f1611e 100644 (file)
 #include <linux/module.h>
 #include "rsi_sdio.h"
 #include "rsi_common.h"
+#include "rsi_coex.h"
 #include "rsi_hal.h"
 
+/* Default operating mode is wlan STA + BT */
+static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL;
+module_param(dev_oper_mode, ushort, 0444);
+MODULE_PARM_DESC(dev_oper_mode,
+                "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n"
+                "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n"
+                "6[AP + BT classic], 14[AP + BT classic + BT LE]");
+
 /**
  * rsi_sdio_set_cmd52_arg() - This function prepares cmd 52 read/write arg.
  * @rw: Read/write
@@ -754,6 +763,8 @@ static int rsi_sdio_host_intf_write_pkt(struct rsi_hw *adapter,
        int status;
 
        queueno = ((pkt[1] >> 4) & 0xf);
+       if (queueno == RSI_BT_MGMT_Q || queueno == RSI_BT_DATA_Q)
+               queueno = RSI_BT_Q;
 
        num_blocks = len / block_size;
 
@@ -922,14 +933,16 @@ static int rsi_probe(struct sdio_func *pfunction,
                     const struct sdio_device_id *id)
 {
        struct rsi_hw *adapter;
+       struct rsi_91x_sdiodev *sdev;
+       int status;
 
        rsi_dbg(INIT_ZONE, "%s: Init function called\n", __func__);
 
-       adapter = rsi_91x_init();
+       adapter = rsi_91x_init(dev_oper_mode);
        if (!adapter) {
                rsi_dbg(ERR_ZONE, "%s: Failed to init os intf ops\n",
                        __func__);
-               return 1;
+               return -EINVAL;
        }
        adapter->rsi_host_intf = RSI_HOST_INTF_SDIO;
        adapter->host_intf_ops = &sdio_host_intf_ops;
@@ -937,39 +950,58 @@ static int rsi_probe(struct sdio_func *pfunction,
        if (rsi_init_sdio_interface(adapter, pfunction)) {
                rsi_dbg(ERR_ZONE, "%s: Failed to init sdio interface\n",
                        __func__);
-               goto fail;
+               status = -EIO;
+               goto fail_free_adapter;
+       }
+       sdev = (struct rsi_91x_sdiodev *)adapter->rsi_dev;
+       rsi_init_event(&sdev->rx_thread.event);
+       status = rsi_create_kthread(adapter->priv, &sdev->rx_thread,
+                                   rsi_sdio_rx_thread, "SDIO-RX-Thread");
+       if (status) {
+               rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
+               goto fail_free_adapter;
        }
+       skb_queue_head_init(&sdev->rx_q.head);
+       sdev->rx_q.num_rx_pkts = 0;
+
        sdio_claim_host(pfunction);
        if (sdio_claim_irq(pfunction, rsi_handle_interrupt)) {
                rsi_dbg(ERR_ZONE, "%s: Failed to request IRQ\n", __func__);
                sdio_release_host(pfunction);
-               goto fail;
+               status = -EIO;
+               goto fail_kill_thread;
        }
        sdio_release_host(pfunction);
        rsi_dbg(INIT_ZONE, "%s: Registered Interrupt handler\n", __func__);
 
        if (rsi_hal_device_init(adapter)) {
                rsi_dbg(ERR_ZONE, "%s: Failed in device init\n", __func__);
-               sdio_claim_host(pfunction);
-               sdio_release_irq(pfunction);
-               sdio_disable_func(pfunction);
-               sdio_release_host(pfunction);
-               goto fail;
+               status = -EINVAL;
+               goto fail_kill_thread;
        }
        rsi_dbg(INFO_ZONE, "===> RSI Device Init Done <===\n");
 
        if (rsi_sdio_master_access_msword(adapter, MISC_CFG_BASE_ADDR)) {
                rsi_dbg(ERR_ZONE, "%s: Unable to set ms word reg\n", __func__);
-               return -EIO;
+               status = -EIO;
+               goto fail_dev_init;
        }
 
        adapter->priv->hibernate_resume = false;
        adapter->priv->reinit_hw = false;
        return 0;
-fail:
+
+fail_dev_init:
+       sdio_claim_host(pfunction);
+       sdio_release_irq(pfunction);
+       sdio_disable_func(pfunction);
+       sdio_release_host(pfunction);
+fail_kill_thread:
+       rsi_kill_thread(&sdev->rx_thread);
+fail_free_adapter:
        rsi_91x_deinit(adapter);
        rsi_dbg(ERR_ZONE, "%s: Failed in probe...Exiting\n", __func__);
-       return 1;
+       return status;
 }
 
 static void ulp_read_write(struct rsi_hw *adapter, u16 addr, u32 data,
@@ -1065,6 +1097,8 @@ static void rsi_disconnect(struct sdio_func *pfunction)
                return;
 
        dev = (struct rsi_91x_sdiodev *)adapter->rsi_dev;
+
+       rsi_kill_thread(&dev->rx_thread);
        sdio_claim_host(pfunction);
        sdio_release_irq(pfunction);
        sdio_release_host(pfunction);
index 8e2a95c486b06f990ee80338ea5b8ac0b9a8d08a..612c211e21a153a370e80f1070a3399d450de71c 100644 (file)
@@ -16,6 +16,7 @@
  */
 
 #include <linux/firmware.h>
+#include <net/rsi_91x.h>
 #include "rsi_sdio.h"
 #include "rsi_common.h"
 
@@ -59,6 +60,43 @@ int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word)
        return status;
 }
 
+void rsi_sdio_rx_thread(struct rsi_common *common)
+{
+       struct rsi_hw *adapter = common->priv;
+       struct rsi_91x_sdiodev *sdev = adapter->rsi_dev;
+       struct sk_buff *skb;
+       int status;
+
+       do {
+               rsi_wait_event(&sdev->rx_thread.event, EVENT_WAIT_FOREVER);
+               rsi_reset_event(&sdev->rx_thread.event);
+
+               while (true) {
+                       if (atomic_read(&sdev->rx_thread.thread_done))
+                               goto out;
+
+                       skb = skb_dequeue(&sdev->rx_q.head);
+                       if (!skb)
+                               break;
+                       if (sdev->rx_q.num_rx_pkts > 0)
+                               sdev->rx_q.num_rx_pkts--;
+                       status = rsi_read_pkt(common, skb->data, skb->len);
+                       if (status) {
+                               rsi_dbg(ERR_ZONE, "Failed to read the packet\n");
+                               dev_kfree_skb(skb);
+                               break;
+                       }
+                       dev_kfree_skb(skb);
+               }
+       } while (1);
+
+out:
+       rsi_dbg(INFO_ZONE, "%s: Terminated SDIO RX thread\n", __func__);
+       skb_queue_purge(&sdev->rx_q.head);
+       atomic_inc(&sdev->rx_thread.thread_done);
+       complete_and_exit(&sdev->rx_thread.completion, 0);
+}
+
 /**
  * rsi_process_pkt() - This Function reads rx_blocks register and figures out
  *                    the size of the rx pkt.
@@ -75,6 +113,10 @@ static int rsi_process_pkt(struct rsi_common *common)
        u32 rcv_pkt_len = 0;
        int status = 0;
        u8 value = 0;
+       struct sk_buff *skb;
+
+       if (dev->rx_q.num_rx_pkts >= RSI_MAX_RX_PKTS)
+               return 0;
 
        num_blks = ((adapter->interrupt_status & 1) |
                        ((adapter->interrupt_status >> RECV_NUM_BLOCKS) << 1));
@@ -102,27 +144,24 @@ static int rsi_process_pkt(struct rsi_common *common)
 
        rcv_pkt_len = (num_blks * 256);
 
-       common->rx_data_pkt = kmalloc(rcv_pkt_len, GFP_KERNEL);
-       if (!common->rx_data_pkt) {
-               rsi_dbg(ERR_ZONE, "%s: Failed in memory allocation\n",
-                       __func__);
+       skb = dev_alloc_skb(rcv_pkt_len);
+       if (!skb)
                return -ENOMEM;
-       }
 
-       status = rsi_sdio_host_intf_read_pkt(adapter,
-                                            common->rx_data_pkt,
-                                            rcv_pkt_len);
+       status = rsi_sdio_host_intf_read_pkt(adapter, skb->data, rcv_pkt_len);
        if (status) {
                rsi_dbg(ERR_ZONE, "%s: Failed to read packet from card\n",
                        __func__);
-               goto fail;
+               dev_kfree_skb(skb);
+               return status;
        }
+       skb_put(skb, rcv_pkt_len);
+       skb_queue_tail(&dev->rx_q.head, skb);
+       dev->rx_q.num_rx_pkts++;
 
-       status = rsi_read_pkt(common, rcv_pkt_len);
+       rsi_set_event(&dev->rx_thread.event);
 
-fail:
-       kfree(common->rx_data_pkt);
-       return status;
+       return 0;
 }
 
 /**
index 8f84438333482c8c51ee32648f7aa37be5f5dc06..be8236f404b51406ec1268adb9b08648a5b5aad2 100644 (file)
  */
 
 #include <linux/module.h>
+#include <net/rsi_91x.h>
 #include "rsi_usb.h"
 #include "rsi_hal.h"
+#include "rsi_coex.h"
+
+/* Default operating mode is wlan STA + BT */
+static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL;
+module_param(dev_oper_mode, ushort, 0444);
+MODULE_PARM_DESC(dev_oper_mode,
+                "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n"
+                "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n"
+                "6[AP + BT classic], 14[AP + BT classic + BT LE]");
+
+static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num);
 
 /**
  * rsi_usb_card_write() - This function writes to the USB Card.
@@ -103,41 +115,42 @@ static int rsi_find_bulk_in_and_out_endpoints(struct usb_interface *interface,
        struct usb_host_interface *iface_desc;
        struct usb_endpoint_descriptor *endpoint;
        __le16 buffer_size;
-       int ii, bep_found = 0;
+       int ii, bin_found = 0, bout_found = 0;
 
        iface_desc = &(interface->altsetting[0]);
 
        for (ii = 0; ii < iface_desc->desc.bNumEndpoints; ++ii) {
                endpoint = &(iface_desc->endpoint[ii].desc);
 
-               if ((!(dev->bulkin_endpoint_addr)) &&
+               if (!dev->bulkin_endpoint_addr[bin_found] &&
                    (endpoint->bEndpointAddress & USB_DIR_IN) &&
-                   ((endpoint->bmAttributes &
-                   USB_ENDPOINT_XFERTYPE_MASK) ==
+                   ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
                    USB_ENDPOINT_XFER_BULK)) {
                        buffer_size = endpoint->wMaxPacketSize;
-                       dev->bulkin_size = buffer_size;
-                       dev->bulkin_endpoint_addr =
+                       dev->bulkin_size[bin_found] = buffer_size;
+                       dev->bulkin_endpoint_addr[bin_found] =
                                endpoint->bEndpointAddress;
+                       bin_found++;
                }
 
-               if (!dev->bulkout_endpoint_addr[bep_found] &&
+               if (!dev->bulkout_endpoint_addr[bout_found] &&
                    !(endpoint->bEndpointAddress & USB_DIR_IN) &&
                    ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
-                     USB_ENDPOINT_XFER_BULK)) {
-                       dev->bulkout_endpoint_addr[bep_found] =
+                   USB_ENDPOINT_XFER_BULK)) {
+                       buffer_size = endpoint->wMaxPacketSize;
+                       dev->bulkout_endpoint_addr[bout_found] =
                                endpoint->bEndpointAddress;
                        buffer_size = endpoint->wMaxPacketSize;
-                       dev->bulkout_size[bep_found] = buffer_size;
-                       bep_found++;
+                       dev->bulkout_size[bout_found] = buffer_size;
+                       bout_found++;
                }
 
-               if (bep_found >= MAX_BULK_EP)
+               if (bin_found >= MAX_BULK_EP || bout_found >= MAX_BULK_EP)
                        break;
        }
 
-       if (!(dev->bulkin_endpoint_addr) &&
-           (dev->bulkout_endpoint_addr[0]))
+       if (!(dev->bulkin_endpoint_addr[0]) &&
+           dev->bulkout_endpoint_addr[0])
                return -EINVAL;
 
        return 0;
@@ -247,13 +260,33 @@ static int rsi_usb_reg_write(struct usb_device *usbdev,
  */
 static void rsi_rx_done_handler(struct urb *urb)
 {
-       struct rsi_hw *adapter = urb->context;
-       struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
+       struct rx_usb_ctrl_block *rx_cb = urb->context;
+       struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)rx_cb->data;
+       int status = -EINVAL;
 
        if (urb->status)
-               return;
+               goto out;
+
+       if (urb->actual_length <= 0) {
+               rsi_dbg(INFO_ZONE, "%s: Zero length packet\n", __func__);
+               goto out;
+       }
+       if (skb_queue_len(&dev->rx_q) >= RSI_MAX_RX_PKTS) {
+               rsi_dbg(INFO_ZONE, "Max RX packets reached\n");
+               goto out;
+       }
+       skb_put(rx_cb->rx_skb, urb->actual_length);
+       skb_queue_tail(&dev->rx_q, rx_cb->rx_skb);
 
        rsi_set_event(&dev->rx_thread.event);
+       status = 0;
+
+out:
+       if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num))
+               rsi_dbg(ERR_ZONE, "%s: Failed in urb submission", __func__);
+
+       if (status)
+               dev_kfree_skb(rx_cb->rx_skb);
 }
 
 /**
@@ -262,20 +295,34 @@ static void rsi_rx_done_handler(struct urb *urb)
  *
  * Return: 0 on success, a negative error code on failure.
  */
-static int rsi_rx_urb_submit(struct rsi_hw *adapter)
+static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num)
 {
        struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
-       struct urb *urb = dev->rx_usb_urb[0];
+       struct rx_usb_ctrl_block *rx_cb = &dev->rx_cb[ep_num - 1];
+       struct urb *urb = rx_cb->rx_urb;
        int status;
+       struct sk_buff *skb;
+       u8 dword_align_bytes = 0;
+
+#define RSI_MAX_RX_USB_PKT_SIZE        3000
+       skb = dev_alloc_skb(RSI_MAX_RX_USB_PKT_SIZE);
+       if (!skb)
+               return -ENOMEM;
+       skb_reserve(skb, MAX_DWORD_ALIGN_BYTES);
+       dword_align_bytes = (unsigned long)skb->data & 0x3f;
+       if (dword_align_bytes > 0)
+               skb_push(skb, dword_align_bytes);
+       urb->transfer_buffer = skb->data;
+       rx_cb->rx_skb = skb;
 
        usb_fill_bulk_urb(urb,
                          dev->usbdev,
                          usb_rcvbulkpipe(dev->usbdev,
-                               dev->bulkin_endpoint_addr),
+                         dev->bulkin_endpoint_addr[ep_num - 1]),
                          urb->transfer_buffer,
-                         3000,
+                         RSI_MAX_RX_USB_PKT_SIZE,
                          rsi_rx_done_handler,
-                         adapter);
+                         rx_cb);
 
        status = usb_submit_urb(urb, GFP_KERNEL);
        if (status)
@@ -487,11 +534,51 @@ static void rsi_deinit_usb_interface(struct rsi_hw *adapter)
        struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
 
        rsi_kill_thread(&dev->rx_thread);
-       usb_free_urb(dev->rx_usb_urb[0]);
-       kfree(adapter->priv->rx_data_pkt);
+
+       usb_free_urb(dev->rx_cb[0].rx_urb);
+       if (adapter->priv->coex_mode > 1)
+               usb_free_urb(dev->rx_cb[1].rx_urb);
+
        kfree(dev->tx_buffer);
 }
 
+static int rsi_usb_init_rx(struct rsi_hw *adapter)
+{
+       struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
+       struct rx_usb_ctrl_block *rx_cb;
+       u8 idx, num_rx_cb;
+
+       num_rx_cb = (adapter->priv->coex_mode > 1 ? 2 : 1);
+
+       for (idx = 0; idx < num_rx_cb; idx++) {
+               rx_cb = &dev->rx_cb[idx];
+
+               rx_cb->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
+               if (!rx_cb->rx_urb) {
+                       rsi_dbg(ERR_ZONE, "Failed alloc rx urb[%d]\n", idx);
+                       goto err;
+               }
+               rx_cb->ep_num = idx + 1;
+               rx_cb->data = (void *)dev;
+       }
+       skb_queue_head_init(&dev->rx_q);
+       rsi_init_event(&dev->rx_thread.event);
+       if (rsi_create_kthread(adapter->priv, &dev->rx_thread,
+                              rsi_usb_rx_thread, "RX-Thread")) {
+               rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
+               goto err;
+       }
+
+       return 0;
+
+err:
+       usb_free_urb(dev->rx_cb[0].rx_urb);
+       if (adapter->priv->coex_mode > 1)
+               usb_free_urb(dev->rx_cb[1].rx_urb);
+
+       return -1;
+}
+
 /**
  * rsi_init_usb_interface() - This function initializes the usb interface.
  * @adapter: Pointer to the adapter structure.
@@ -503,7 +590,6 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
                                  struct usb_interface *pfunction)
 {
        struct rsi_91x_usbdev *rsi_dev;
-       struct rsi_common *common = adapter->priv;
        int status;
 
        rsi_dev = kzalloc(sizeof(*rsi_dev), GFP_KERNEL);
@@ -512,49 +598,37 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 
        adapter->rsi_dev = rsi_dev;
        rsi_dev->usbdev = interface_to_usbdev(pfunction);
+       rsi_dev->priv = (void *)adapter;
 
-       if (rsi_find_bulk_in_and_out_endpoints(pfunction, adapter))
-               return -EINVAL;
+       if (rsi_find_bulk_in_and_out_endpoints(pfunction, adapter)) {
+               status = -EINVAL;
+               goto fail_eps;
+       }
 
        adapter->device = &pfunction->dev;
        usb_set_intfdata(pfunction, adapter);
 
-       common->rx_data_pkt = kmalloc(2048, GFP_KERNEL);
-       if (!common->rx_data_pkt) {
-               rsi_dbg(ERR_ZONE, "%s: Failed to allocate memory\n",
-                       __func__);
-               return -ENOMEM;
-       }
-
        rsi_dev->tx_buffer = kmalloc(2048, GFP_KERNEL);
        if (!rsi_dev->tx_buffer) {
                status = -ENOMEM;
-               goto fail_tx;
+               goto fail_eps;
        }
-       rsi_dev->rx_usb_urb[0] = usb_alloc_urb(0, GFP_KERNEL);
-       if (!rsi_dev->rx_usb_urb[0]) {
+
+       if (rsi_usb_init_rx(adapter)) {
+               rsi_dbg(ERR_ZONE, "Failed to init RX handle\n");
                status = -ENOMEM;
                goto fail_rx;
        }
-       rsi_dev->rx_usb_urb[0]->transfer_buffer = adapter->priv->rx_data_pkt;
+
        rsi_dev->tx_blk_size = 252;
        adapter->block_size = rsi_dev->tx_blk_size;
 
        /* Initializing function callbacks */
-       adapter->rx_urb_submit = rsi_rx_urb_submit;
        adapter->check_hw_queue_status = rsi_usb_check_queue_status;
        adapter->determine_event_timeout = rsi_usb_event_timeout;
        adapter->rsi_host_intf = RSI_HOST_INTF_USB;
        adapter->host_intf_ops = &usb_host_intf_ops;
 
-       rsi_init_event(&rsi_dev->rx_thread.event);
-       status = rsi_create_kthread(common, &rsi_dev->rx_thread,
-                                   rsi_usb_rx_thread, "RX-Thread");
-       if (status) {
-               rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
-               goto fail_thread;
-       }
-
 #ifdef CONFIG_RSI_DEBUGFS
        /* In USB, one less than the MAX_DEBUGFS_ENTRIES entries is required */
        adapter->num_debugfs_entries = (MAX_DEBUGFS_ENTRIES - 1);
@@ -563,12 +637,12 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
        rsi_dbg(INIT_ZONE, "%s: Enabled the interface\n", __func__);
        return 0;
 
-fail_thread:
-       usb_free_urb(rsi_dev->rx_usb_urb[0]);
 fail_rx:
        kfree(rsi_dev->tx_buffer);
-fail_tx:
-       kfree(common->rx_data_pkt);
+
+fail_eps:
+       kfree(rsi_dev);
+
        return status;
 }
 
@@ -662,7 +736,7 @@ static int rsi_probe(struct usb_interface *pfunction,
 
        rsi_dbg(INIT_ZONE, "%s: Init function called\n", __func__);
 
-       adapter = rsi_91x_init();
+       adapter = rsi_91x_init(dev_oper_mode);
        if (!adapter) {
                rsi_dbg(ERR_ZONE, "%s: Failed to init os intf ops\n",
                        __func__);
@@ -698,10 +772,16 @@ static int rsi_probe(struct usb_interface *pfunction,
                rsi_dbg(INIT_ZONE, "%s: Device Init Done\n", __func__);
        }
 
-       status = rsi_rx_urb_submit(adapter);
+       status = rsi_rx_urb_submit(adapter, WLAN_EP);
        if (status)
                goto err1;
 
+       if (adapter->priv->coex_mode > 1) {
+               status = rsi_rx_urb_submit(adapter, BT_EP);
+               if (status)
+                       goto err1;
+       }
+
        return 0;
 err1:
        rsi_deinit_usb_interface(adapter);
index 465692b3c3514e022379654e74032b344c18c49c..b1687d22f73f58c14c2e204bda3d97a737fa4b54 100644 (file)
@@ -30,31 +30,32 @@ void rsi_usb_rx_thread(struct rsi_common *common)
        struct rsi_hw *adapter = common->priv;
        struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
        int status;
+       struct sk_buff *skb;
 
        do {
                rsi_wait_event(&dev->rx_thread.event, EVENT_WAIT_FOREVER);
+               rsi_reset_event(&dev->rx_thread.event);
 
-               if (atomic_read(&dev->rx_thread.thread_done))
-                       goto out;
+               while (true) {
+                       if (atomic_read(&dev->rx_thread.thread_done))
+                               goto out;
 
-               mutex_lock(&common->rx_lock);
-               status = rsi_read_pkt(common, 0);
-               if (status) {
-                       rsi_dbg(ERR_ZONE, "%s: Failed To read data", __func__);
-                       mutex_unlock(&common->rx_lock);
-                       return;
-               }
-               mutex_unlock(&common->rx_lock);
-               rsi_reset_event(&dev->rx_thread.event);
-               if (adapter->rx_urb_submit(adapter)) {
-                       rsi_dbg(ERR_ZONE,
-                               "%s: Failed in urb submission", __func__);
-                       return;
+                       skb = skb_dequeue(&dev->rx_q);
+                       if (!skb)
+                               break;
+                       status = rsi_read_pkt(common, skb->data, 0);
+                       if (status) {
+                               rsi_dbg(ERR_ZONE, "%s: Failed To read data",
+                                       __func__);
+                               break;
+                       }
+                       dev_kfree_skb(skb);
                }
        } while (1);
 
 out:
        rsi_dbg(INFO_ZONE, "%s: Terminated thread\n", __func__);
+       skb_queue_purge(&dev->rx_q);
        complete_and_exit(&dev->rx_thread.completion, 0);
 }
 
diff --git a/drivers/net/wireless/rsi/rsi_coex.h b/drivers/net/wireless/rsi/rsi_coex.h
new file mode 100644 (file)
index 0000000..0fdc67f
--- /dev/null
@@ -0,0 +1,37 @@
+/**
+ * Copyright (c) 2018 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __RSI_COEX_H__
+#define __RSI_COEX_H__
+
+#include "rsi_common.h"
+
+#ifdef CONFIG_RSI_COEX
+#define COMMON_CARD_READY_IND           0
+#define NUM_COEX_TX_QUEUES              5
+
+struct rsi_coex_ctrl_block {
+       struct rsi_common *priv;
+       struct sk_buff_head coex_tx_qs[NUM_COEX_TX_QUEUES];
+       struct rsi_thread coex_tx_thread;
+};
+
+int rsi_coex_attach(struct rsi_common *common);
+void rsi_coex_detach(struct rsi_common *common);
+int rsi_coex_send_pkt(void *priv, struct sk_buff *skb, u8 proto_type);
+int rsi_coex_recv_pkt(struct rsi_common *common, u8 *msg);
+#endif
+#endif
index d07dbba61727e816717b05dd670782f6a65dd87f..d9ff3b8be86ee19156ed3bb7c4d322a0aeea8515 100644 (file)
@@ -62,6 +62,7 @@ static inline int rsi_create_kthread(struct rsi_common *common,
                                     u8 *name)
 {
        init_completion(&thread->completion);
+       atomic_set(&thread->thread_done, 0);
        thread->task = kthread_run(func_ptr, common, "%s", name);
        if (IS_ERR(thread->task))
                return (int)PTR_ERR(thread->task);
@@ -80,9 +81,9 @@ static inline int rsi_kill_thread(struct rsi_thread *handle)
 
 void rsi_mac80211_detach(struct rsi_hw *hw);
 u16 rsi_get_connected_channel(struct ieee80211_vif *vif);
-struct rsi_hw *rsi_91x_init(void);
+struct rsi_hw *rsi_91x_init(u16 oper_mode);
 void rsi_91x_deinit(struct rsi_hw *adapter);
-int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len);
+int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len);
 #ifdef CONFIG_PM
 int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan);
 #endif
index a09d36b6b765b3d865e6a6e6ae7f432175988e68..786dccd0b732d2f34c094f942e73cc278987734f 100644 (file)
 #ifndef __RSI_HAL_H__
 #define __RSI_HAL_H__
 
+/* Device Operating modes */
+#define DEV_OPMODE_WIFI_ALONE          1
+#define DEV_OPMODE_BT_ALONE            4
+#define DEV_OPMODE_BT_LE_ALONE         8
+#define DEV_OPMODE_BT_DUAL             12
+#define DEV_OPMODE_STA_BT              5
+#define DEV_OPMODE_STA_BT_LE           9
+#define DEV_OPMODE_STA_BT_DUAL         13
+#define DEV_OPMODE_AP_BT               6
+#define DEV_OPMODE_AP_BT_DUAL          14
+
 #define FLASH_WRITE_CHUNK_SIZE         (4 * 1024)
 #define FLASH_SECTOR_SIZE              (4 * 1024)
 
 
 #define FW_FLASH_OFFSET                        0x820
 #define LMAC_VER_OFFSET                        (FW_FLASH_OFFSET + 0x200)
+#define MAX_DWORD_ALIGN_BYTES          64
 
 struct bl_header {
        __le32 flags;
@@ -145,8 +157,18 @@ struct rsi_data_desc {
        u8 sta_id;
 } __packed;
 
+struct rsi_bt_desc {
+       __le16 len_qno;
+       __le16 reserved1;
+       __le32 reserved2;
+       __le32 reserved3;
+       __le16 reserved4;
+       __le16 bt_pkt_type;
+} __packed;
+
 int rsi_hal_device_init(struct rsi_hw *adapter);
 int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb);
 int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb);
+int rsi_send_bt_pkt(struct rsi_common *common, struct sk_buff *skb);
 
 #endif
index 8cab630af4a5b039ed52f5c1ed666b3ff295533a..ef4fa323694b8d6d7b41844928ffa25028b477be 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/skbuff.h>
 #include <net/mac80211.h>
+#include <net/rsi_91x.h>
 
 struct rsi_sta {
        struct ieee80211_sta *sta;
@@ -85,10 +86,6 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
 #define MGMT_HW_Q                      10
 #define BEACON_HW_Q                    11
 
-/* Queue information */
-#define RSI_COEX_Q                     0x0
-#define RSI_WIFI_MGMT_Q                 0x4
-#define RSI_WIFI_DATA_Q                 0x5
 #define IEEE80211_MGMT_FRAME            0x00
 #define IEEE80211_CTL_FRAME             0x04
 
@@ -115,6 +112,7 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
 #define RSI_WOW_NO_CONNECTION          BIT(1)
 
 #define RSI_DEV_9113           1
+#define RSI_MAX_RX_PKTS                64
 
 struct version_info {
        u16 major;
@@ -209,6 +207,7 @@ struct rsi_common {
        struct rsi_hw *priv;
        struct vif_priv vif_info[RSI_MAX_VIFS];
 
+       void *coex_cb;
        bool mgmt_q_block;
        struct version_info lmac_ver;
 
@@ -273,6 +272,8 @@ struct rsi_common {
        u8 obm_ant_sel_val;
        int tx_power;
        u8 ant_in_use;
+       /* Mutex used for writing packet to bus */
+       struct mutex tx_bus_mutex;
        bool hibernate_resume;
        bool reinit_hw;
        u8 wow_flags;
@@ -291,11 +292,8 @@ struct rsi_common {
        bool p2p_enabled;
        struct timer_list roc_timer;
        struct ieee80211_vif *roc_vif;
-};
 
-enum host_intf {
-       RSI_HOST_INTF_SDIO = 0,
-       RSI_HOST_INTF_USB
+       void *bt_adapter;
 };
 
 struct eepromrw_info {
@@ -322,7 +320,7 @@ struct rsi_hw {
        struct device *device;
        u8 sc_nvifs;
 
-       enum host_intf rsi_host_intf;
+       enum rsi_host_intf rsi_host_intf;
        u16 block_size;
        enum ps_state ps_state;
        struct rsi_ps_info ps_info;
@@ -343,7 +341,6 @@ struct rsi_hw {
        void *rsi_dev;
        struct rsi_host_intf_ops *host_intf_ops;
        int (*check_hw_queue_status)(struct rsi_hw *adapter, u8 q_num);
-       int (*rx_urb_submit)(struct rsi_hw *adapter);
        int (*determine_event_timeout)(struct rsi_hw *adapter);
 };
 
@@ -367,4 +364,8 @@ struct rsi_host_intf_ops {
                                      u8 *fw);
        int (*reinit_device)(struct rsi_hw *adapter);
 };
+
+enum rsi_host_intf rsi_get_host_intf(void *priv);
+void rsi_set_bt_context(void *priv, void *bt_context);
+
 #endif
index 389094a3f91cfcad21091980bbddd1166220417a..cf6567ae5bbef411b6913e0c96b0531adc5029a4 100644 (file)
 #define WOW_PATTERN_SIZE 256
 
 /* Receive Frame Types */
+#define RSI_RX_DESC_MSG_TYPE_OFFSET    2
 #define TA_CONFIRM_TYPE                 0x01
 #define RX_DOT11_MGMT                   0x02
 #define TX_STATUS_IND                   0x04
 #define BEACON_EVENT_IND               0x08
 #define PROBEREQ_CONFIRM                2
 #define CARD_READY_IND                  0x00
+#define SLEEP_NOTIFY_IND                0x06
 
 #define RSI_DELETE_PEER                 0x0
 #define RSI_ADD_PEER                    0x1
@@ -638,6 +640,7 @@ static inline void rsi_set_len_qno(__le16 *addr, u16 len, u8 qno)
        *addr = cpu_to_le16(len | ((qno & 7) << 12));
 }
 
+int rsi_handle_card_ready(struct rsi_common *common, u8 *msg);
 int rsi_mgmt_pkt_recv(struct rsi_common *common, u8 *msg);
 int rsi_set_vap_capabilities(struct rsi_common *common, enum opmode mode,
                             u8 *mac_addr, u8 vap_id, u8 vap_status);
index 49c549ba6682935b909d3873a67fefdd5beebf28..ba649be284afb6e88518337f666b1aeee823e644 100644 (file)
@@ -105,6 +105,11 @@ struct receive_info {
        u32 buf_available_counter;
 };
 
+struct rsi_sdio_rx_q {
+       u8 num_rx_pkts;
+       struct sk_buff_head head;
+};
+
 struct rsi_91x_sdiodev {
        struct sdio_func *pfunction;
        struct task_struct *sdio_irq_task;
@@ -117,6 +122,8 @@ struct rsi_91x_sdiodev {
        u16 tx_blk_size;
        u8 write_fail;
        bool buff_status_updated;
+       struct rsi_sdio_rx_q rx_q;
+       struct rsi_thread rx_thread;
 };
 
 void rsi_interrupt_handler(struct rsi_hw *adapter);
@@ -131,4 +138,5 @@ int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word);
 void rsi_sdio_ack_intr(struct rsi_hw *adapter, u8 int_bit);
 int rsi_sdio_determine_event_timeout(struct rsi_hw *adapter);
 int rsi_sdio_check_buffer_status(struct rsi_hw *adapter, u8 q_num);
+void rsi_sdio_rx_thread(struct rsi_common *common);
 #endif
index 891daea2d932c033bb5a1ad08cebaf2de8eb041e..a88d59295a985a764592605fa4904010f5bde0f0 100644 (file)
@@ -31,7 +31,7 @@
 #define USB_VENDOR_REGISTER_WRITE    0x16
 #define RSI_USB_TX_HEAD_ROOM         128
 
-#define MAX_RX_URBS                  1
+#define MAX_RX_URBS                  2
 #define MAX_BULK_EP                  8
 #define WLAN_EP                      1
 #define BT_EP                        2
 #define RSI_USB_BUF_SIZE            4096
 #define RSI_USB_CTRL_BUF_SIZE       0x04
 
+struct rx_usb_ctrl_block {
+       u8 *data;
+       struct urb *rx_urb;
+       struct sk_buff *rx_skb;
+       u8 ep_num;
+};
+
 struct rsi_91x_usbdev {
+       void *priv;
        struct rsi_thread rx_thread;
        u8 endpoint;
        struct usb_device *usbdev;
        struct usb_interface *pfunction;
-       struct urb *rx_usb_urb[MAX_RX_URBS];
+       struct rx_usb_ctrl_block rx_cb[MAX_RX_URBS];
        u8 *tx_buffer;
-       __le16 bulkin_size;
-       u8 bulkin_endpoint_addr;
+       __le16 bulkin_size[MAX_BULK_EP];
+       u8 bulkin_endpoint_addr[MAX_BULK_EP];
        __le16 bulkout_size[MAX_BULK_EP];
        u8 bulkout_endpoint_addr[MAX_BULK_EP];
        u32 tx_blk_size;
        u8 write_fail;
+       struct sk_buff_head rx_q;
 };
 
 static inline int rsi_usb_check_queue_status(struct rsi_hw *adapter, u8 q_num)
index 969b4f6e53b53b9325a75bdd7dfdc736608b53c6..ff69a80a963399caf051a38a8059cbace4f93224 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ST
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_ST
index 92fbd6597e34bb84f81dc1ada10972f958c6eee5..366c687445add67f8c1f8d55efefae3b3140b692 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_TI
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_TI
index 037defd10b91800a5210c4f115584d30278d9e5f..bd8641ad953b95e830bf0f220fe82d1fc610cd12 100644 (file)
@@ -122,8 +122,7 @@ static int wl1251_fetch_nvs(struct wl1251 *wl)
                goto out;
        }
 
-       wl->nvs_len = fw->size;
-       wl->nvs = kmemdup(fw->data, wl->nvs_len, GFP_KERNEL);
+       wl->nvs = kmemdup(fw->data, fw->size, GFP_KERNEL);
 
        if (!wl->nvs) {
                wl1251_error("could not allocate memory for the nvs file");
@@ -131,6 +130,8 @@ static int wl1251_fetch_nvs(struct wl1251 *wl)
                goto out;
        }
 
+       wl->nvs_len = fw->size;
+
        ret = 0;
 
 out:
@@ -202,13 +203,6 @@ static int wl1251_chip_wakeup(struct wl1251 *wl)
                        goto out;
        }
 
-       if (wl->nvs == NULL && !wl->use_eeprom) {
-               /* No NVS from netlink, try to get it from the filesystem */
-               ret = wl1251_fetch_nvs(wl);
-               if (ret < 0)
-                       goto out;
-       }
-
 out:
        return ret;
 }
@@ -1446,6 +1440,61 @@ static int wl1251_read_eeprom_mac(struct wl1251 *wl)
        return 0;
 }
 
+#define NVS_OFF_MAC_LEN 0x19
+#define NVS_OFF_MAC_ADDR_LO 0x1a
+#define NVS_OFF_MAC_ADDR_HI 0x1b
+#define NVS_OFF_MAC_DATA 0x1c
+
+static int wl1251_check_nvs_mac(struct wl1251 *wl)
+{
+       if (wl->nvs_len < 0x24)
+               return -ENODATA;
+
+       /* length is 2 and data address is 0x546c (ANDed with 0xfffe) */
+       if (wl->nvs[NVS_OFF_MAC_LEN] != 2 ||
+           wl->nvs[NVS_OFF_MAC_ADDR_LO] != 0x6d ||
+           wl->nvs[NVS_OFF_MAC_ADDR_HI] != 0x54)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int wl1251_read_nvs_mac(struct wl1251 *wl)
+{
+       u8 mac[ETH_ALEN];
+       int i, ret;
+
+       ret = wl1251_check_nvs_mac(wl);
+       if (ret)
+               return ret;
+
+       /* MAC is stored in reverse order */
+       for (i = 0; i < ETH_ALEN; i++)
+               mac[i] = wl->nvs[NVS_OFF_MAC_DATA + ETH_ALEN - i - 1];
+
+       /* 00:00:20:07:03:09 is in example file wl1251-nvs.bin, so invalid */
+       if (ether_addr_equal_unaligned(mac, "\x00\x00\x20\x07\x03\x09"))
+               return -EINVAL;
+
+       memcpy(wl->mac_addr, mac, ETH_ALEN);
+       return 0;
+}
+
+static int wl1251_write_nvs_mac(struct wl1251 *wl)
+{
+       int i, ret;
+
+       ret = wl1251_check_nvs_mac(wl);
+       if (ret)
+               return ret;
+
+       /* MAC is stored in reverse order */
+       for (i = 0; i < ETH_ALEN; i++)
+               wl->nvs[NVS_OFF_MAC_DATA + i] = wl->mac_addr[ETH_ALEN - i - 1];
+
+       return 0;
+}
+
 static int wl1251_register_hw(struct wl1251 *wl)
 {
        int ret;
@@ -1489,8 +1538,33 @@ int wl1251_init_ieee80211(struct wl1251 *wl)
 
        wl->hw->queues = 4;
 
+       if (wl->nvs == NULL && !wl->use_eeprom) {
+               ret = wl1251_fetch_nvs(wl);
+               if (ret < 0)
+                       goto out;
+       }
+
        if (wl->use_eeprom)
-               wl1251_read_eeprom_mac(wl);
+               ret = wl1251_read_eeprom_mac(wl);
+       else
+               ret = wl1251_read_nvs_mac(wl);
+
+       if (ret == 0 && !is_valid_ether_addr(wl->mac_addr))
+               ret = -EINVAL;
+
+       if (ret < 0) {
+               /*
+                * In case our MAC address is not correctly set,
+                * we use a random but Nokia MAC.
+                */
+               static const u8 nokia_oui[3] = {0x00, 0x1f, 0xdf};
+               memcpy(wl->mac_addr, nokia_oui, 3);
+               get_random_bytes(wl->mac_addr + 3, 3);
+               if (!wl->use_eeprom)
+                       wl1251_write_nvs_mac(wl);
+               wl1251_warning("MAC address in eeprom or nvs data is not valid");
+               wl1251_warning("Setting random MAC address: %pM", wl->mac_addr);
+       }
 
        ret = wl1251_register_hw(wl);
        if (ret)
@@ -1511,7 +1585,6 @@ struct ieee80211_hw *wl1251_alloc_hw(void)
        struct ieee80211_hw *hw;
        struct wl1251 *wl;
        int i;
-       static const u8 nokia_oui[3] = {0x00, 0x1f, 0xdf};
 
        hw = ieee80211_alloc_hw(sizeof(*wl), &wl1251_ops);
        if (!hw) {
@@ -1561,13 +1634,6 @@ struct ieee80211_hw *wl1251_alloc_hw(void)
        INIT_WORK(&wl->irq_work, wl1251_irq_work);
        INIT_WORK(&wl->tx_work, wl1251_tx_work);
 
-       /*
-        * In case our MAC address is not correctly set,
-        * we use a random but Nokia MAC.
-        */
-       memcpy(wl->mac_addr, nokia_oui, 3);
-       get_random_bytes(wl->mac_addr + 3, 3);
-
        wl->state = WL1251_STATE_OFF;
        mutex_init(&wl->mutex);
        spin_lock_init(&wl->wl_lock);
index de2fa67055745cd935625eace6a00e4c6c94bd92..12ed14ebc3074345fea7ca16b3f2a24efaabe5af 100644 (file)
@@ -221,10 +221,8 @@ static int wl1251_tx_send_packet(struct wl1251 *wl, struct sk_buff *skb,
                        struct sk_buff *newskb = skb_copy_expand(skb, 0, 3,
                                                                 GFP_KERNEL);
 
-                       if (unlikely(newskb == NULL)) {
-                               wl1251_error("Can't allocate skb!");
+                       if (unlikely(newskb == NULL))
                                return -EINVAL;
-                       }
 
                        tx_hdr = (struct tx_double_buffer_desc *) newskb->data;
 
index a58c0f65e3766ddf2373163a99a4090a14838524..b327f86f05be119a7e65f4240ccde12cfdc7af28 100644 (file)
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ZYDAS
          If you have a wireless card belonging to this class, say Y.
 
          Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         the questions about  cards. If you say Y, you will be asked for
+         kernel: saying N will just cause the configurator to skip all the
+         questions about these cards. If you say Y, you will be asked for
          your specific card in the following questions.
 
 if WLAN_VENDOR_ZYDAS
index b785742bfd9e097b9aa283c5aa9354ef1daaa403..b01b44a5d16ead6b9dddc71c198f28d385027621 100644 (file)
@@ -509,7 +509,6 @@ void zd_mac_tx_failed(struct urb *urb)
        int found = 0;
        int i, position = 0;
 
-       q = &mac->ack_wait_queue;
        spin_lock_irqsave(&q->lock, flags);
 
        skb_queue_walk(q, skb) {
index b1cf7c6f407a9ecf45bfaac50695fd6c691c158d..ef5887037b225251cfa77ccac587f79ad338dd9d 100644 (file)
@@ -419,7 +419,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
        BUG();
 }
 
-void xenvif_rx_skb(struct xenvif_queue *queue)
+static void xenvif_rx_skb(struct xenvif_queue *queue)
 {
        struct xenvif_pkt_state pkt;
 
index 8328d395e332919b5f5736dc2fc1a519b8951896..3127bc8633ca511889e8098d1a996c30c6d28b3f 100644 (file)
@@ -2005,7 +2005,10 @@ static void netback_changed(struct xenbus_device *dev,
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
        case XenbusStateReconfigured:
+               break;
+
        case XenbusStateUnknown:
+               wake_up_all(&module_unload_q);
                break;
 
        case XenbusStateInitWait:
@@ -2136,7 +2139,9 @@ static int xennet_remove(struct xenbus_device *dev)
                xenbus_switch_state(dev, XenbusStateClosing);
                wait_event(module_unload_q,
                           xenbus_read_driver_state(dev->otherend) ==
-                          XenbusStateClosing);
+                          XenbusStateClosing ||
+                          xenbus_read_driver_state(dev->otherend) ==
+                          XenbusStateUnknown);
 
                xenbus_switch_state(dev, XenbusStateClosed);
                wait_event(module_unload_q,
index 345acca576b3c077b68e339437dd2e54ae76384a..1bd7b3734751c36820bd8f3a3cc107a1cac0b8ef 100644 (file)
@@ -278,8 +278,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
        disk->queue             = q;
        disk->flags             = GENHD_FL_EXT_DEVT;
        nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
-       set_capacity(disk, 0);
-       device_add_disk(dev, disk);
 
        if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
                return -ENOMEM;
@@ -292,6 +290,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
        }
 
        set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
+       device_add_disk(dev, disk);
        revalidate_disk(disk);
        return 0;
 }
index 2ef544f10ec8a3b2016bac8e9bc38475113eb5ef..4b95ac513de2131e480445bae0f7b6b319b5bf7f 100644 (file)
@@ -1545,8 +1545,6 @@ static int btt_blk_init(struct btt *btt)
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
        btt->btt_queue->queuedata = btt;
 
-       set_capacity(btt->btt_disk, 0);
-       device_add_disk(&btt->nd_btt->dev, btt->btt_disk);
        if (btt_meta_size(btt)) {
                int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
 
@@ -1558,6 +1556,7 @@ static int btt_blk_init(struct btt *btt)
                }
        }
        set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+       device_add_disk(&btt->nd_btt->dev, btt->btt_disk);
        btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
        revalidate_disk(btt->btt_disk);
 
index f5c4e8c6e29d49bc5d42be0cf6c5ceffd98da1f5..2f4d18752c9772b08aa4d8e22a023fc526a7248b 100644 (file)
@@ -304,7 +304,7 @@ static const struct attribute_group *nd_pfn_attribute_groups[] = {
 struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
                struct nd_namespace_common *ndns)
 {
-       struct device *dev = &nd_pfn->dev;
+       struct device *dev;
 
        if (!nd_pfn)
                return NULL;
index 10041ac4032c038db09109b8f757a8719d26f902..06f8dcc52ca648983de46b035dfcbeee6a29fca6 100644 (file)
@@ -335,8 +335,7 @@ static int pmem_attach_disk(struct device *dev,
                dev_warn(dev, "unable to guarantee persistence of writes\n");
                fua = 0;
        }
-       wbc = nvdimm_has_cache(nd_region) &&
-               !test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
+       wbc = nvdimm_has_cache(nd_region);
 
        if (!devm_request_mem_region(dev, res->start, resource_size(res),
                                dev_name(&ndns->dev))) {
index e6d01911e0920db0ed1b577b5422d64d81a129ed..1593e1806b16c6b413ea1e5555987b88c6740286 100644 (file)
@@ -532,11 +532,13 @@ static ssize_t persistence_domain_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
        struct nd_region *nd_region = to_nd_region(dev);
-       unsigned long flags = nd_region->flags;
 
-       return sprintf(buf, "%s%s\n",
-                       flags & BIT(ND_REGION_PERSIST_CACHE) ? "cpu_cache " : "",
-                       flags & BIT(ND_REGION_PERSIST_MEMCTRL) ? "memory_controller " : "");
+       if (test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags))
+               return sprintf(buf, "cpu_cache\n");
+       else if (test_bit(ND_REGION_PERSIST_MEMCTRL, &nd_region->flags))
+               return sprintf(buf, "memory_controller\n");
+       else
+               return sprintf(buf, "\n");
 }
 static DEVICE_ATTR_RO(persistence_domain);
 
@@ -593,6 +595,13 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
                        return 0;
        }
 
+       if (a == &dev_attr_persistence_domain.attr) {
+               if ((nd_region->flags & (BIT(ND_REGION_PERSIST_CACHE)
+                                       | BIT(ND_REGION_PERSIST_MEMCTRL))) == 0)
+                       return 0;
+               return a->mode;
+       }
+
        if (a != &dev_attr_set_cookie.attr
                        && a != &dev_attr_available_size.attr)
                return a->mode;
index 0fe7ea35c2217406af6f8c071a417e08e98dab6a..7aeca5db791613f345f733513f2558d4de14e2ae 100644 (file)
@@ -2844,7 +2844,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 }
 
 static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
-               struct nvme_id_ns *id, bool *new)
+               struct nvme_id_ns *id)
 {
        struct nvme_ctrl *ctrl = ns->ctrl;
        bool is_shared = id->nmic & (1 << 0);
@@ -2860,8 +2860,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
                        ret = PTR_ERR(head);
                        goto out_unlock;
                }
-
-               *new = true;
        } else {
                struct nvme_ns_ids ids;
 
@@ -2873,8 +2871,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
                        ret = -EINVAL;
                        goto out_unlock;
                }
-
-               *new = false;
        }
 
        list_add_tail(&ns->siblings, &head->list);
@@ -2945,7 +2941,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        struct nvme_id_ns *id;
        char disk_name[DISK_NAME_LEN];
        int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
-       bool new = true;
 
        ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
        if (!ns)
@@ -2971,7 +2966,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        if (id->ncap == 0)
                goto out_free_id;
 
-       if (nvme_init_ns_head(ns, nsid, id, &new))
+       if (nvme_init_ns_head(ns, nsid, id))
                goto out_free_id;
        nvme_setup_streams_ns(ctrl, ns);
        
@@ -3037,9 +3032,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
                pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
                        ns->disk->disk_name);
 
-       if (new)
-               nvme_mpath_add_disk(ns->head);
-       nvme_mpath_add_disk_links(ns);
+       nvme_mpath_add_disk(ns->head);
        return;
  out_unlink_ns:
        mutex_lock(&ctrl->subsys->lock);
@@ -3059,7 +3052,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                return;
 
        if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-               nvme_mpath_remove_disk_links(ns);
                sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
                                        &nvme_ns_id_attr_group);
                if (ns->ndev)
index 5dd4ceefed8fe0d0897aa8dadb1d266174b2eb02..8f0f34d06d46965168e4472ea2f9f5b5daca6a20 100644 (file)
@@ -493,7 +493,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
  */
 int nvmf_register_transport(struct nvmf_transport_ops *ops)
 {
-       if (!ops->create_ctrl || !ops->module)
+       if (!ops->create_ctrl)
                return -EINVAL;
 
        down_write(&nvmf_transports_rwsem);
@@ -650,6 +650,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                                ret = -EINVAL;
                                goto out;
                        }
+                       if (opts->discovery_nqn) {
+                               pr_debug("Ignoring nr_io_queues value for discovery controller\n");
+                               break;
+                       }
+
                        opts->nr_io_queues = min_t(unsigned int,
                                        num_online_cpus(), token);
                        break;
index 7f51f8414b97238e647ef37f13942755e4b83a16..1dc1387b71342e67bb0f6848104e2ee4ab901661 100644 (file)
@@ -1206,7 +1206,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
                                sizeof(struct fcnvme_lsdesc_cr_assoc_cmd));
 
        assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
-       assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize);
+       assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1);
        /* Linux supports only Dynamic controllers */
        assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
        uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
@@ -1321,7 +1321,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
                                sizeof(struct fcnvme_lsdesc_cr_conn_cmd));
        conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio);
        conn_rqst->connect_cmd.qid  = cpu_to_be16(queue->qnum);
-       conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize);
+       conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1);
 
        lsop->queue = queue;
        lsreq->rqstaddr = conn_rqst;
@@ -2481,11 +2481,11 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
                goto out_free_tag_set;
        }
 
-       ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+       ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
        if (ret)
                goto out_cleanup_blk_queue;
 
-       ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+       ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
        if (ret)
                goto out_delete_hw_queues;
 
@@ -2532,11 +2532,11 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
        if (ret)
                goto out_free_io_queues;
 
-       ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+       ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
        if (ret)
                goto out_free_io_queues;
 
-       ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+       ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
        if (ret)
                goto out_delete_hw_queues;
 
@@ -2632,13 +2632,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
        nvme_fc_init_queue(ctrl, 0);
 
        ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
-                               NVME_AQ_BLK_MQ_DEPTH);
+                               NVME_AQ_DEPTH);
        if (ret)
                goto out_free_queue;
 
        ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
-                               NVME_AQ_BLK_MQ_DEPTH,
-                               (NVME_AQ_BLK_MQ_DEPTH / 4));
+                               NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4));
        if (ret)
                goto out_delete_hw_queue;
 
@@ -2666,7 +2665,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
        }
 
        ctrl->ctrl.sqsize =
-               min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize);
+               min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
        ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
        if (ret)
@@ -2699,6 +2698,14 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                opts->queue_size = ctrl->ctrl.maxcmd;
        }
 
+       if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
+               /* warn if sqsize is lower than queue_size */
+               dev_warn(ctrl->ctrl.device,
+                       "queue_size %zu > ctrl sqsize %u, clamping down\n",
+                       opts->queue_size, ctrl->ctrl.sqsize + 1);
+               opts->queue_size = ctrl->ctrl.sqsize + 1;
+       }
+
        ret = nvme_fc_init_aen_ops(ctrl);
        if (ret)
                goto out_term_aen_ops;
index 3b211d9e58b8419855b29a87f1019b0d687714d4..060f69e0342761c4767c90afc09a73a10546e1dd 100644 (file)
@@ -198,30 +198,16 @@ void nvme_mpath_add_disk(struct nvme_ns_head *head)
 {
        if (!head->disk)
                return;
-       device_add_disk(&head->subsys->dev, head->disk);
-       if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
-                       &nvme_ns_id_attr_group))
-               pr_warn("%s: failed to create sysfs group for identification\n",
-                       head->disk->disk_name);
-}
-
-void nvme_mpath_add_disk_links(struct nvme_ns *ns)
-{
-       struct kobject *slave_disk_kobj, *holder_disk_kobj;
-
-       if (!ns->head->disk)
-               return;
-
-       slave_disk_kobj = &disk_to_dev(ns->disk)->kobj;
-       if (sysfs_create_link(ns->head->disk->slave_dir, slave_disk_kobj,
-                       kobject_name(slave_disk_kobj)))
-               return;
 
-       holder_disk_kobj = &disk_to_dev(ns->head->disk)->kobj;
-       if (sysfs_create_link(ns->disk->part0.holder_dir, holder_disk_kobj,
-                       kobject_name(holder_disk_kobj)))
-               sysfs_remove_link(ns->head->disk->slave_dir,
-                       kobject_name(slave_disk_kobj));
+       mutex_lock(&head->subsys->lock);
+       if (!(head->disk->flags & GENHD_FL_UP)) {
+               device_add_disk(&head->subsys->dev, head->disk);
+               if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
+                               &nvme_ns_id_attr_group))
+                       pr_warn("%s: failed to create sysfs group for identification\n",
+                               head->disk->disk_name);
+       }
+       mutex_unlock(&head->subsys->lock);
 }
 
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -238,14 +224,3 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
        blk_cleanup_queue(head->disk->queue);
        put_disk(head->disk);
 }
-
-void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
-{
-       if (!ns->head->disk)
-               return;
-
-       sysfs_remove_link(ns->disk->part0.holder_dir,
-                       kobject_name(&disk_to_dev(ns->head->disk)->kobj));
-       sysfs_remove_link(ns->head->disk->slave_dir,
-                       kobject_name(&disk_to_dev(ns->disk)->kobj));
-}
index 0521e4707d1cfe193a2193d48e1332e174087e3a..d733b14ede9dc10022e0ae14da8cb4550c46831b 100644 (file)
@@ -410,9 +410,7 @@ bool nvme_req_needs_failover(struct request *req, blk_status_t error);
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
 void nvme_mpath_add_disk(struct nvme_ns_head *head);
-void nvme_mpath_add_disk_links(struct nvme_ns *ns);
 void nvme_mpath_remove_disk(struct nvme_ns_head *head);
-void nvme_mpath_remove_disk_links(struct nvme_ns *ns);
 
 static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
@@ -454,12 +452,6 @@ static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
 static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 {
 }
-static inline void nvme_mpath_add_disk_links(struct nvme_ns *ns)
-{
-}
-static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
-{
-}
 static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
 }
index 73036d2fbbd58da19fbcd880accafdcb3f8165e7..b6f43b738f03ae3b6008cd188d467623c1e70ecf 100644 (file)
@@ -1153,12 +1153,6 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
        if (!(csts & NVME_CSTS_CFS) && !nssro)
                return false;
 
-       /* If PCI error recovery process is happening, we cannot reset or
-        * the recovery mechanism will surely fail.
-        */
-       if (pci_channel_offline(to_pci_dev(dev->dev)))
-               return false;
-
        return true;
 }
 
@@ -1189,6 +1183,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
        struct nvme_command cmd;
        u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
+       /* If PCI error recovery process is happening, we cannot reset or
+        * the recovery mechanism will surely fail.
+        */
+       mb();
+       if (pci_channel_offline(to_pci_dev(dev->dev)))
+               return BLK_EH_RESET_TIMER;
+
        /*
         * Reset immediately if the controller is failed
         */
@@ -1459,7 +1460,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
        nvmeq->cq_vector = qid - 1;
        result = adapter_alloc_cq(dev, qid, nvmeq);
        if (result < 0)
-               return result;
+               goto release_vector;
 
        result = adapter_alloc_sq(dev, qid, nvmeq);
        if (result < 0)
@@ -1473,9 +1474,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
        return result;
 
  release_sq:
+       dev->online_queues--;
        adapter_delete_sq(dev, qid);
  release_cq:
        adapter_delete_cq(dev, qid);
+ release_vector:
+       nvmeq->cq_vector = -1;
        return result;
 }
 
@@ -1910,7 +1914,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
        int result, nr_io_queues;
        unsigned long size;
 
-       nr_io_queues = num_present_cpus();
+       nr_io_queues = num_possible_cpus();
        result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
        if (result < 0)
                return result;
index 3a51ed50eff24a4c2541b6051a2918f41920467d..4d84a73ee12d06907ea94ef77e3e654d6e2b2033 100644 (file)
@@ -1051,7 +1051,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
        struct nvme_rdma_device *dev = queue->device;
        struct ib_device *ibdev = dev->dev;
 
-       if (!blk_rq_bytes(rq))
+       if (!blk_rq_payload_bytes(rq))
                return;
 
        if (req->mr) {
@@ -1166,7 +1166,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 
        c->common.flags |= NVME_CMD_SGL_METABUF;
 
-       if (!blk_rq_bytes(rq))
+       if (!blk_rq_payload_bytes(rq))
                return nvme_rdma_set_sg_null(c);
 
        req->sg_table.sgl = req->first_sgl;
index 0bd737117a80a172745aab1868fb8a4ecf6f1e8c..a78029e4e5f481b58bd628537315d3e7e9882f1a 100644 (file)
@@ -520,9 +520,12 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
                goto fail;
        }
 
-       /* either variant of SGLs is fine, as we don't support metadata */
-       if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
-                    (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
+       /*
+        * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
+        * contains an address of a single contiguous physical buffer that is
+        * byte aligned.
+        */
+       if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
                status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
                goto fail;
        }
index 7991ec3a17db9238c4fddaadc2faaa9fb32ce0cb..861d1509b22bf412e2a9bfaee408a4fcb3ad3de4 100644 (file)
@@ -184,7 +184,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
                return BLK_STS_OK;
        }
 
-       if (blk_rq_bytes(req)) {
+       if (blk_rq_payload_bytes(req)) {
                iod->sg_table.sgl = iod->first_sgl;
                if (sg_alloc_table_chained(&iod->sg_table,
                                blk_rq_nr_phys_segments(req),
@@ -193,7 +193,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
                iod->req.sg = iod->sg_table.sgl;
                iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
-               iod->req.transfer_len = blk_rq_bytes(req);
+               iod->req.transfer_len = blk_rq_payload_bytes(req);
        }
 
        blk_mq_start_request(req);
index 8de2d5c69b1d9a6b892f97f7a240099dac9cf988..dc9303abda4242f8ab42997b1a062e5ba1d599a2 100644 (file)
@@ -613,7 +613,7 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
        /* setup bus numbers */
        val = dw_pcie_readl_dbi(pci, PCI_PRIMARY_BUS);
        val &= 0xff000000;
-       val |= 0x00010100;
+       val |= 0x00ff0100;
        dw_pcie_writel_dbi(pci, PCI_PRIMARY_BUS, val);
 
        /* setup command register */
index 8b14bd326d4af32fbdaedd1258e2bb7c3ea489f5..46d47bd6ca1fce28e294a82374e6572e59a5cb50 100644 (file)
@@ -3908,6 +3908,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230,
                         quirk_dma_func1_alias);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642,
                         quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0645,
+                        quirk_dma_func1_alias);
 /* https://bugs.gentoo.org/show_bug.cgi?id=497630 */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_JMICRON,
                         PCI_DEVICE_ID_JMICRON_JMB388_ESD,
index 369d48d6c6f1a53d4bc6ab5dd0b13a33eeccd90b..365447240d95fe2bb2d484bddca71da3572daaa9 100644 (file)
@@ -401,6 +401,10 @@ void pci_release_resource(struct pci_dev *dev, int resno)
        struct resource *res = dev->resource + resno;
 
        pci_info(dev, "BAR %d: releasing %pR\n", resno, res);
+
+       if (!res->parent)
+               return;
+
        release_resource(res);
        res->end = resource_size(res) - 1;
        res->start = 0;
index 0c2ed11c0603015e10c3995f21a768801fe90427..f63db346c21970489c5892847891926b926a50f4 100644 (file)
@@ -638,7 +638,7 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
                if (irq_is_percpu_devid(irq))
                        disable_percpu_irq(irq);
                else
-                       disable_irq(irq);
+                       disable_irq_nosync(irq);
        }
 
        per_cpu(cpu_armpmu, cpu) = NULL;
index c5ff4525edef517fb36bb417a31a6c8db544b276..c5493ea5128287c12a572f0383fdc5cc742a7970 100644 (file)
@@ -675,3 +675,8 @@ int ufs_qcom_phy_power_off(struct phy *generic_phy)
        return 0;
 }
 EXPORT_SYMBOL_GPL(ufs_qcom_phy_power_off);
+
+MODULE_AUTHOR("Yaniv Gardi <ygardi@codeaurora.org>");
+MODULE_AUTHOR("Vivek Gautam <vivek.gautam@codeaurora.org>");
+MODULE_DESCRIPTION("Universal Flash Storage (UFS) QCOM PHY");
+MODULE_LICENSE("GPL v2");
index 1fda9d6c7ea3f39b3768f142136b7e11f9e0c870..4b91ff74779bead16a60f96bf2566e947989e179 100644 (file)
@@ -716,7 +716,7 @@ static const char * const uart_b_groups[] = {
        "uart_tx_b_x", "uart_rx_b_x", "uart_cts_b_x", "uart_rts_b_x",
 };
 
-static const char * const uart_ao_b_gpioz_groups[] = {
+static const char * const uart_ao_b_z_groups[] = {
        "uart_ao_tx_b_z", "uart_ao_rx_b_z",
        "uart_ao_cts_b_z", "uart_ao_rts_b_z",
 };
@@ -855,7 +855,7 @@ static struct meson_pmx_func meson_axg_periphs_functions[] = {
        FUNCTION(nand),
        FUNCTION(uart_a),
        FUNCTION(uart_b),
-       FUNCTION(uart_ao_b_gpioz),
+       FUNCTION(uart_ao_b_z),
        FUNCTION(i2c0),
        FUNCTION(i2c1),
        FUNCTION(i2c2),
index 6dec6ab1330074227a0361f9a25fcb7713b5213c..d8599736a41a268eba0eeeabb594d4d21499901f 100644 (file)
@@ -423,7 +423,7 @@ static int chromeos_laptop_probe(struct platform_device *pdev)
        return ret;
 }
 
-static const struct chromeos_laptop samsung_series_5_550 = {
+static struct chromeos_laptop samsung_series_5_550 = {
        .i2c_peripherals = {
                /* Touchpad. */
                { .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
@@ -432,14 +432,14 @@ static const struct chromeos_laptop samsung_series_5_550 = {
        },
 };
 
-static const struct chromeos_laptop samsung_series_5 = {
+static struct chromeos_laptop samsung_series_5 = {
        .i2c_peripherals = {
                /* Light Sensor. */
                { .add = setup_tsl2583_als, I2C_ADAPTER_SMBUS },
        },
 };
 
-static const struct chromeos_laptop chromebook_pixel = {
+static struct chromeos_laptop chromebook_pixel = {
        .i2c_peripherals = {
                /* Touch Screen. */
                { .add = setup_atmel_1664s_ts, I2C_ADAPTER_PANEL },
@@ -450,14 +450,14 @@ static const struct chromeos_laptop chromebook_pixel = {
        },
 };
 
-static const struct chromeos_laptop hp_chromebook_14 = {
+static struct chromeos_laptop hp_chromebook_14 = {
        .i2c_peripherals = {
                /* Touchpad. */
                { .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
        },
 };
 
-static const struct chromeos_laptop dell_chromebook_11 = {
+static struct chromeos_laptop dell_chromebook_11 = {
        .i2c_peripherals = {
                /* Touchpad. */
                { .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
@@ -466,28 +466,28 @@ static const struct chromeos_laptop dell_chromebook_11 = {
        },
 };
 
-static const struct chromeos_laptop toshiba_cb35 = {
+static struct chromeos_laptop toshiba_cb35 = {
        .i2c_peripherals = {
                /* Touchpad. */
                { .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
        },
 };
 
-static const struct chromeos_laptop acer_c7_chromebook = {
+static struct chromeos_laptop acer_c7_chromebook = {
        .i2c_peripherals = {
                /* Touchpad. */
                { .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
        },
 };
 
-static const struct chromeos_laptop acer_ac700 = {
+static struct chromeos_laptop acer_ac700 = {
        .i2c_peripherals = {
                /* Light Sensor. */
                { .add = setup_tsl2563_als, I2C_ADAPTER_SMBUS },
        },
 };
 
-static const struct chromeos_laptop acer_c720 = {
+static struct chromeos_laptop acer_c720 = {
        .i2c_peripherals = {
                /* Touchscreen. */
                { .add = setup_atmel_1664s_ts, I2C_ADAPTER_DESIGNWARE_1 },
@@ -500,14 +500,14 @@ static const struct chromeos_laptop acer_c720 = {
        },
 };
 
-static const struct chromeos_laptop hp_pavilion_14_chromebook = {
+static struct chromeos_laptop hp_pavilion_14_chromebook = {
        .i2c_peripherals = {
                /* Touchpad. */
                { .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
        },
 };
 
-static const struct chromeos_laptop cr48 = {
+static struct chromeos_laptop cr48 = {
        .i2c_peripherals = {
                /* Light Sensor. */
                { .add = setup_tsl2563_als, I2C_ADAPTER_SMBUS },
index 9a8f96465cdc3c04783f6296dcc778128c231df5..51ebc5a6053fbf847618f4fa9c07e016c3c50f09 100644 (file)
@@ -105,31 +105,45 @@ config ASUS_LAPTOP
 
          If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
+#
+# The DELL_SMBIOS driver depends on ACPI_WMI and/or DCDBAS if those
+# backends are selected. The "depends" line prevents a configuration
+# where DELL_SMBIOS=y while either of those dependencies =m.
+#
 config DELL_SMBIOS
-       tristate
+       tristate "Dell SMBIOS driver"
+       depends on DCDBAS || DCDBAS=n
+       depends on ACPI_WMI || ACPI_WMI=n
+       ---help---
+       This provides support for the Dell SMBIOS calling interface.
+       If you have a Dell computer you should enable this option.
+
+       Be sure to select at least one backend for it to work properly.
 
 config DELL_SMBIOS_WMI
-       tristate "Dell SMBIOS calling interface (WMI implementation)"
+       bool "Dell SMBIOS driver WMI backend"
+       default y
        depends on ACPI_WMI
        select DELL_WMI_DESCRIPTOR
-       select DELL_SMBIOS
+       depends on DELL_SMBIOS
        ---help---
        This provides an implementation for the Dell SMBIOS calling interface
        communicated over ACPI-WMI.
 
-       If you have a Dell computer from >2007 you should say Y or M here.
+       If you have a Dell computer from >2007 you should say Y here.
        If you aren't sure and this module doesn't work for your computer
        it just won't load.
 
 config DELL_SMBIOS_SMM
-       tristate "Dell SMBIOS calling interface (SMM implementation)"
+       bool "Dell SMBIOS driver SMM backend"
+       default y
        depends on DCDBAS
-       select DELL_SMBIOS
+       depends on DELL_SMBIOS
        ---help---
        This provides an implementation for the Dell SMBIOS calling interface
        communicated over SMI/SMM.
 
-       If you have a Dell computer from <=2017 you should say Y or M here.
+       If you have a Dell computer from <=2017 you should say Y here.
        If you aren't sure and this module doesn't work for your computer
        it just won't load.
 
index c388608ad2a3942ef2c949c87b18f76a33f0c66f..2ba6cb7953384e8af9b150adca7775c3bea6ec29 100644 (file)
@@ -13,8 +13,9 @@ obj-$(CONFIG_MSI_LAPTOP)      += msi-laptop.o
 obj-$(CONFIG_ACPI_CMPC)                += classmate-laptop.o
 obj-$(CONFIG_COMPAL_LAPTOP)    += compal-laptop.o
 obj-$(CONFIG_DELL_SMBIOS)      += dell-smbios.o
-obj-$(CONFIG_DELL_SMBIOS_WMI)  += dell-smbios-wmi.o
-obj-$(CONFIG_DELL_SMBIOS_SMM)  += dell-smbios-smm.o
+dell-smbios-objs               := dell-smbios-base.o
+dell-smbios-$(CONFIG_DELL_SMBIOS_WMI)  += dell-smbios-wmi.o
+dell-smbios-$(CONFIG_DELL_SMBIOS_SMM)  += dell-smbios-smm.o
 obj-$(CONFIG_DELL_LAPTOP)      += dell-laptop.o
 obj-$(CONFIG_DELL_WMI)         += dell-wmi.o
 obj-$(CONFIG_DELL_WMI_DESCRIPTOR)      += dell-wmi-descriptor.o
similarity index 95%
rename from drivers/platform/x86/dell-smbios.c
rename to drivers/platform/x86/dell-smbios-base.c
index 8541cde4cb7d3c4febde205b639428b89215bdc0..2485c80a9fddb772c5085f4e2074a14981b26ba4 100644 (file)
@@ -36,7 +36,7 @@ static DEFINE_MUTEX(smbios_mutex);
 struct smbios_device {
        struct list_head list;
        struct device *device;
-       int (*call_fn)(struct calling_interface_buffer *);
+       int (*call_fn)(struct calling_interface_buffer *arg);
 };
 
 struct smbios_call {
@@ -352,8 +352,10 @@ static void __init parse_da_table(const struct dmi_header *dm)
        struct calling_interface_structure *table =
                container_of(dm, struct calling_interface_structure, header);
 
-       /* 4 bytes of table header, plus 7 bytes of Dell header, plus at least
-          6 bytes of entry */
+       /*
+        * 4 bytes of table header, plus 7 bytes of Dell header
+        * plus at least 6 bytes of entry
+        */
 
        if (dm->length < 17)
                return;
@@ -554,7 +556,7 @@ static void free_group(struct platform_device *pdev)
 static int __init dell_smbios_init(void)
 {
        const struct dmi_device *valid;
-       int ret;
+       int ret, wmi, smm;
 
        valid = dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Dell System", NULL);
        if (!valid) {
@@ -589,8 +591,24 @@ static int __init dell_smbios_init(void)
        if (ret)
                goto fail_create_group;
 
+       /* register backends */
+       wmi = init_dell_smbios_wmi();
+       if (wmi)
+               pr_debug("Failed to initialize WMI backend: %d\n", wmi);
+       smm = init_dell_smbios_smm();
+       if (smm)
+               pr_debug("Failed to initialize SMM backend: %d\n", smm);
+       if (wmi && smm) {
+               pr_err("No SMBIOS backends available (wmi: %d, smm: %d)\n",
+                       wmi, smm);
+               goto fail_sysfs;
+       }
+
        return 0;
 
+fail_sysfs:
+       free_group(platform_device);
+
 fail_create_group:
        platform_device_del(platform_device);
 
@@ -607,6 +625,8 @@ static int __init dell_smbios_init(void)
 
 static void __exit dell_smbios_exit(void)
 {
+       exit_dell_smbios_wmi();
+       exit_dell_smbios_smm();
        mutex_lock(&smbios_mutex);
        if (platform_device) {
                free_group(platform_device);
@@ -617,11 +637,12 @@ static void __exit dell_smbios_exit(void)
        mutex_unlock(&smbios_mutex);
 }
 
-subsys_initcall(dell_smbios_init);
+module_init(dell_smbios_init);
 module_exit(dell_smbios_exit);
 
 MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
 MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
 MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
+MODULE_AUTHOR("Mario Limonciello <mario.limonciello@dell.com>");
 MODULE_DESCRIPTION("Common functions for kernel modules using Dell SMBIOS");
 MODULE_LICENSE("GPL");
index 89f65c4651a04151aa1002afd65f8a0736f7a776..e9e9da556318703275ac6cb9eef59ae7426d4776 100644 (file)
@@ -58,7 +58,7 @@ static const struct dmi_system_id dell_device_table[] __initconst = {
 };
 MODULE_DEVICE_TABLE(dmi, dell_device_table);
 
-static void __init parse_da_table(const struct dmi_header *dm)
+static void parse_da_table(const struct dmi_header *dm)
 {
        struct calling_interface_structure *table =
                container_of(dm, struct calling_interface_structure, header);
@@ -73,7 +73,7 @@ static void __init parse_da_table(const struct dmi_header *dm)
        da_command_code = table->cmdIOCode;
 }
 
-static void __init find_cmd_address(const struct dmi_header *dm, void *dummy)
+static void find_cmd_address(const struct dmi_header *dm, void *dummy)
 {
        switch (dm->type) {
        case 0xda: /* Calling interface */
@@ -128,7 +128,7 @@ static bool test_wsmt_enabled(void)
        return false;
 }
 
-static int __init dell_smbios_smm_init(void)
+int init_dell_smbios_smm(void)
 {
        int ret;
        /*
@@ -176,7 +176,7 @@ static int __init dell_smbios_smm_init(void)
        return ret;
 }
 
-static void __exit dell_smbios_smm_exit(void)
+void exit_dell_smbios_smm(void)
 {
        if (platform_device) {
                dell_smbios_unregister_device(&platform_device->dev);
@@ -184,13 +184,3 @@ static void __exit dell_smbios_smm_exit(void)
                free_page((unsigned long)buffer);
        }
 }
-
-subsys_initcall(dell_smbios_smm_init);
-module_exit(dell_smbios_smm_exit);
-
-MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
-MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
-MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
-MODULE_AUTHOR("Mario Limonciello <mario.limonciello@dell.com>");
-MODULE_DESCRIPTION("Dell SMBIOS communications over SMI");
-MODULE_LICENSE("GPL");
index 609557aa58684375b7534a39e21f9fe9eadd97e6..fbefedb1c17237c5a926446bb9162ef2a8fcb993 100644 (file)
@@ -228,7 +228,7 @@ static const struct wmi_device_id dell_smbios_wmi_id_table[] = {
        { },
 };
 
-static void __init parse_b1_table(const struct dmi_header *dm)
+static void parse_b1_table(const struct dmi_header *dm)
 {
        struct misc_bios_flags_structure *flags =
        container_of(dm, struct misc_bios_flags_structure, header);
@@ -242,7 +242,7 @@ static void __init parse_b1_table(const struct dmi_header *dm)
                wmi_supported = 1;
 }
 
-static void __init find_b1(const struct dmi_header *dm, void *dummy)
+static void find_b1(const struct dmi_header *dm, void *dummy)
 {
        switch (dm->type) {
        case 0xb1: /* misc bios flags */
@@ -261,7 +261,7 @@ static struct wmi_driver dell_smbios_wmi_driver = {
        .filter_callback = dell_smbios_wmi_filter,
 };
 
-static int __init init_dell_smbios_wmi(void)
+int init_dell_smbios_wmi(void)
 {
        dmi_walk(find_b1, NULL);
 
@@ -271,15 +271,9 @@ static int __init init_dell_smbios_wmi(void)
        return wmi_driver_register(&dell_smbios_wmi_driver);
 }
 
-static void __exit exit_dell_smbios_wmi(void)
+void exit_dell_smbios_wmi(void)
 {
        wmi_driver_unregister(&dell_smbios_wmi_driver);
 }
 
-module_init(init_dell_smbios_wmi);
-module_exit(exit_dell_smbios_wmi);
-
 MODULE_ALIAS("wmi:" DELL_WMI_SMBIOS_GUID);
-MODULE_AUTHOR("Mario Limonciello <mario.limonciello@dell.com>");
-MODULE_DESCRIPTION("Dell SMBIOS communications over WMI");
-MODULE_LICENSE("GPL");
index 138d478d9adc91bdceee7194567cbe40718ec1f7..d8adaf9597409e1ee4ebf9cc6043e6e1f7095cb4 100644 (file)
@@ -75,4 +75,29 @@ int dell_laptop_register_notifier(struct notifier_block *nb);
 int dell_laptop_unregister_notifier(struct notifier_block *nb);
 void dell_laptop_call_notifier(unsigned long action, void *data);
 
-#endif
+/* for the supported backends */
+#ifdef CONFIG_DELL_SMBIOS_WMI
+int init_dell_smbios_wmi(void);
+void exit_dell_smbios_wmi(void);
+#else /* CONFIG_DELL_SMBIOS_WMI */
+static inline int init_dell_smbios_wmi(void)
+{
+       return -ENODEV;
+}
+static inline void exit_dell_smbios_wmi(void)
+{}
+#endif /* CONFIG_DELL_SMBIOS_WMI */
+
+#ifdef CONFIG_DELL_SMBIOS_SMM
+int init_dell_smbios_smm(void);
+void exit_dell_smbios_smm(void);
+#else /* CONFIG_DELL_SMBIOS_SMM */
+static inline int init_dell_smbios_smm(void)
+{
+       return -ENODEV;
+}
+static inline void exit_dell_smbios_smm(void)
+{}
+#endif /* CONFIG_DELL_SMBIOS_SMM */
+
+#endif /* _DELL_SMBIOS_H_ */
index 2c9927430d8525bc7df26c50de85e5a2fd8863d1..8d102195a3927fa4bf76f4230d069d6038414923 100644 (file)
@@ -714,7 +714,7 @@ static int __init dell_wmi_init(void)
 
        return wmi_driver_register(&dell_wmi_driver);
 }
-module_init(dell_wmi_init);
+late_initcall(dell_wmi_init);
 
 static void __exit dell_wmi_exit(void)
 {
index d1a01311c1a2937b6863bea6b696566e376f8ac1..5e3df194723e1a1f1757fff08dd3c89069381350 100644 (file)
@@ -376,6 +376,7 @@ static int intel_hid_remove(struct platform_device *device)
 {
        acpi_handle handle = ACPI_HANDLE(&device->dev);
 
+       device_init_wakeup(&device->dev, false);
        acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
        intel_hid_set_enable(&device->dev, false);
        intel_button_array_enable(&device->dev, false);
index b703d6f5b099b3d0d74b77d60d2e5401ff3b816e..c13780b8dabbe089c24a2b5c323f1510f589153e 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/input.h>
 #include <linux/input/sparse-keymap.h>
 #include <linux/kernel.h>
@@ -97,9 +98,35 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
        dev_dbg(&device->dev, "unknown event index 0x%x\n", event);
 }
 
-static int intel_vbtn_probe(struct platform_device *device)
+static void detect_tablet_mode(struct platform_device *device)
 {
+       const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE);
+       struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
+       acpi_handle handle = ACPI_HANDLE(&device->dev);
        struct acpi_buffer vgbs_output = { ACPI_ALLOCATE_BUFFER, NULL };
+       union acpi_object *obj;
+       acpi_status status;
+       int m;
+
+       if (!(chassis_type && strcmp(chassis_type, "31") == 0))
+               goto out;
+
+       status = acpi_evaluate_object(handle, "VGBS", NULL, &vgbs_output);
+       if (ACPI_FAILURE(status))
+               goto out;
+
+       obj = vgbs_output.pointer;
+       if (!(obj && obj->type == ACPI_TYPE_INTEGER))
+               goto out;
+
+       m = !(obj->integer.value & TABLET_MODE_FLAG);
+       input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
+out:
+       kfree(vgbs_output.pointer);
+}
+
+static int intel_vbtn_probe(struct platform_device *device)
+{
        acpi_handle handle = ACPI_HANDLE(&device->dev);
        struct intel_vbtn_priv *priv;
        acpi_status status;
@@ -122,22 +149,7 @@ static int intel_vbtn_probe(struct platform_device *device)
                return err;
        }
 
-       /*
-        * VGBS being present and returning something means we have
-        * a tablet mode switch.
-        */
-       status = acpi_evaluate_object(handle, "VGBS", NULL, &vgbs_output);
-       if (ACPI_SUCCESS(status)) {
-               union acpi_object *obj = vgbs_output.pointer;
-
-               if (obj && obj->type == ACPI_TYPE_INTEGER) {
-                       int m = !(obj->integer.value & TABLET_MODE_FLAG);
-
-                       input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
-               }
-       }
-
-       kfree(vgbs_output.pointer);
+       detect_tablet_mode(device);
 
        status = acpi_install_notify_handler(handle,
                                             ACPI_DEVICE_NOTIFY,
@@ -154,6 +166,7 @@ static int intel_vbtn_remove(struct platform_device *device)
 {
        acpi_handle handle = ACPI_HANDLE(&device->dev);
 
+       device_init_wakeup(&device->dev, false);
        acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
 
        /*
index c0c8945603cbbdd6284ff60b5df4d118502305d5..8796211ef24acdab6e20f976716c18577d5a8fed 100644 (file)
@@ -945,7 +945,7 @@ static int wmi_dev_probe(struct device *dev)
                wblock->char_dev.mode = 0444;
                ret = misc_register(&wblock->char_dev);
                if (ret) {
-                       dev_warn(dev, "failed to register char dev: %d", ret);
+                       dev_warn(dev, "failed to register char dev: %d\n", ret);
                        ret = -ENOMEM;
                        goto probe_misc_failure;
                }
@@ -1048,7 +1048,7 @@ static int wmi_create_device(struct device *wmi_bus_dev,
 
        if (result) {
                dev_warn(wmi_bus_dev,
-                        "%s data block query control method not found",
+                        "%s data block query control method not found\n",
                         method);
                return result;
        }
@@ -1198,7 +1198,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device)
 
                retval = device_add(&wblock->dev.dev);
                if (retval) {
-                       dev_err(wmi_bus_dev, "failed to register %pULL\n",
+                       dev_err(wmi_bus_dev, "failed to register %pUL\n",
                                wblock->gblock.guid);
                        if (debug_event)
                                wmi_method_enable(wblock, 0);
index dd4708c58480d277e0346f9b287831b080f8af6b..1fc0c0811da4e9f2e528c21cbc4578bbe5a32077 100644 (file)
@@ -4310,7 +4310,7 @@ static int _regulator_resume_early(struct device *dev, void *data)
 
        rstate = regulator_get_suspend_state(rdev, *state);
        if (rstate == NULL)
-               return -EINVAL;
+               return 0;
 
        mutex_lock(&rdev->mutex);
 
index 72c8b3e1022b4da5c83e90d20b0166f577aeced6..e0a9c445ed67ba39554e26d29dd9dc5dec3a5efd 100644 (file)
@@ -51,7 +51,7 @@ static int stm32_vrefbuf_enable(struct regulator_dev *rdev)
         * arbitrary timeout.
         */
        ret = readl_poll_timeout(priv->base + STM32_VREFBUF_CSR, val,
-                                !(val & STM32_VRR), 650, 10000);
+                                val & STM32_VRR, 650, 10000);
        if (ret) {
                dev_err(&rdev->dev, "stm32 vrefbuf timed out!\n");
                val = readl_relaxed(priv->base + STM32_VREFBUF_CSR);
index a7c15f0085e2999787a391327db0f6398d4c2cf0..ecef8e73d40b2f845a9048151870778032fb6f18 100644 (file)
@@ -2581,8 +2581,6 @@ int dasd_cancel_req(struct dasd_ccw_req *cqr)
        case DASD_CQR_QUEUED:
                /* request was not started - just set to cleared */
                cqr->status = DASD_CQR_CLEARED;
-               if (cqr->callback_data == DASD_SLEEPON_START_TAG)
-                       cqr->callback_data = DASD_SLEEPON_END_TAG;
                break;
        case DASD_CQR_IN_IO:
                /* request in IO - terminate IO and release again */
@@ -3902,9 +3900,12 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device)
                wait_event(dasd_flush_wq,
                           (cqr->status != DASD_CQR_CLEAR_PENDING));
 
-               /* mark sleepon requests as ended */
-               if (cqr->callback_data == DASD_SLEEPON_START_TAG)
-                       cqr->callback_data = DASD_SLEEPON_END_TAG;
+               /*
+                * requeue requests to blocklayer will only work
+                * for block device requests
+                */
+               if (_dasd_requeue_request(cqr))
+                       continue;
 
                /* remove requests from device and block queue */
                list_del_init(&cqr->devlist);
@@ -3917,13 +3918,6 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device)
                        cqr = refers;
                }
 
-               /*
-                * requeue requests to blocklayer will only work
-                * for block device requests
-                */
-               if (_dasd_requeue_request(cqr))
-                       continue;
-
                if (cqr->block)
                        list_del_init(&cqr->blocklist);
                cqr->block->base->discipline->free_cp(
@@ -3940,8 +3934,7 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device)
                list_splice_tail(&requeue_queue, &device->ccw_queue);
                spin_unlock_irq(get_ccwdev_lock(device->cdev));
        }
-       /* wake up generic waitqueue for eventually ended sleepon requests */
-       wake_up(&generic_waitq);
+       dasd_schedule_device_bh(device);
        return rc;
 }
 
index 1319122e9d1231920ef0325a9e56a4c6de91ff80..9169af7dbb434ff5269406a85200c9796c04a8e4 100644 (file)
@@ -795,6 +795,7 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 
        ccw_device_set_timeout(cdev, 0);
        cdev->private->iretry = 255;
+       cdev->private->async_kill_io_rc = -ETIMEDOUT;
        ret = ccw_device_cancel_halt_clear(cdev);
        if (ret == -EBUSY) {
                ccw_device_set_timeout(cdev, 3*HZ);
@@ -871,7 +872,7 @@ ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event)
        /* OK, i/o is dead now. Call interrupt handler. */
        if (cdev->handler)
                cdev->handler(cdev, cdev->private->intparm,
-                             ERR_PTR(-EIO));
+                             ERR_PTR(cdev->private->async_kill_io_rc));
 }
 
 static void
@@ -888,14 +889,16 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event)
        ccw_device_online_verify(cdev, 0);
        if (cdev->handler)
                cdev->handler(cdev, cdev->private->intparm,
-                             ERR_PTR(-EIO));
+                             ERR_PTR(cdev->private->async_kill_io_rc));
 }
 
 void ccw_device_kill_io(struct ccw_device *cdev)
 {
        int ret;
 
+       ccw_device_set_timeout(cdev, 0);
        cdev->private->iretry = 255;
+       cdev->private->async_kill_io_rc = -EIO;
        ret = ccw_device_cancel_halt_clear(cdev);
        if (ret == -EBUSY) {
                ccw_device_set_timeout(cdev, 3*HZ);
index 1caf6a398760bb1f156f5c088759f12e6039e589..75ce12a24dc2a6a6dfdb756fce5ae6c148de20f6 100644 (file)
@@ -159,7 +159,7 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
 }
 
 /**
- * ccw_device_start_key() - start a s390 channel program with key
+ * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key
  * @cdev: target ccw device
  * @cpa: logical start address of channel program
  * @intparm: user specific interruption parameter; will be presented back to
@@ -170,10 +170,15 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
  * @key: storage key to be used for the I/O
  * @flags: additional flags; defines the action to be performed for I/O
  *        processing.
+ * @expires: timeout value in jiffies
  *
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
+ * This function notifies the device driver if the channel program has not
+ * completed during the time specified by @expires. If a timeout occurs, the
+ * channel program is terminated via xsch, hsch or csch, and the device's
+ * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -182,9 +187,9 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm)
  * Context:
  *  Interrupts disabled, ccw device lock held
  */
-int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
-                        unsigned long intparm, __u8 lpm, __u8 key,
-                        unsigned long flags)
+int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
+                                unsigned long intparm, __u8 lpm, __u8 key,
+                                unsigned long flags, int expires)
 {
        struct subchannel *sch;
        int ret;
@@ -224,6 +229,8 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
        switch (ret) {
        case 0:
                cdev->private->intparm = intparm;
+               if (expires)
+                       ccw_device_set_timeout(cdev, expires);
                break;
        case -EACCES:
        case -ENODEV:
@@ -234,7 +241,7 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
 }
 
 /**
- * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key
+ * ccw_device_start_key() - start a s390 channel program with key
  * @cdev: target ccw device
  * @cpa: logical start address of channel program
  * @intparm: user specific interruption parameter; will be presented back to
@@ -245,15 +252,10 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * @key: storage key to be used for the I/O
  * @flags: additional flags; defines the action to be performed for I/O
  *        processing.
- * @expires: timeout value in jiffies
  *
  * Start a S/390 channel program. When the interrupt arrives, the
  * IRQ handler is called, either immediately, delayed (dev-end missing,
  * or sense required) or never (no IRQ handler registered).
- * This function notifies the device driver if the channel program has not
- * completed during the time specified by @expires. If a timeout occurs, the
- * channel program is terminated via xsch, hsch or csch, and the device's
- * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT).
  * Returns:
  *  %0, if the operation was successful;
  *  -%EBUSY, if the device is busy, or status pending;
@@ -262,19 +264,12 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
  * Context:
  *  Interrupts disabled, ccw device lock held
  */
-int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
-                                unsigned long intparm, __u8 lpm, __u8 key,
-                                unsigned long flags, int expires)
+int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa,
+                        unsigned long intparm, __u8 lpm, __u8 key,
+                        unsigned long flags)
 {
-       int ret;
-
-       if (!cdev)
-               return -ENODEV;
-       ccw_device_set_timeout(cdev, expires);
-       ret = ccw_device_start_key(cdev, cpa, intparm, lpm, key, flags);
-       if (ret != 0)
-               ccw_device_set_timeout(cdev, 0);
-       return ret;
+       return ccw_device_start_timeout_key(cdev, cpa, intparm, lpm, key,
+                                           flags, 0);
 }
 
 /**
@@ -489,18 +484,20 @@ void ccw_device_get_id(struct ccw_device *cdev, struct ccw_dev_id *dev_id)
 EXPORT_SYMBOL(ccw_device_get_id);
 
 /**
- * ccw_device_tm_start_key() - perform start function
+ * ccw_device_tm_start_timeout_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
  * @lpm: mask of paths to use
  * @key: storage key to use for storage access
+ * @expires: time span in jiffies after which to abort request
  *
  * Start the tcw on the given ccw device. Return zero on success, non-zero
  * otherwise.
  */
-int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
-                           unsigned long intparm, u8 lpm, u8 key)
+int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
+                                   unsigned long intparm, u8 lpm, u8 key,
+                                   int expires)
 {
        struct subchannel *sch;
        int rc;
@@ -527,37 +524,32 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
                        return -EACCES;
        }
        rc = cio_tm_start_key(sch, tcw, lpm, key);
-       if (rc == 0)
+       if (rc == 0) {
                cdev->private->intparm = intparm;
+               if (expires)
+                       ccw_device_set_timeout(cdev, expires);
+       }
        return rc;
 }
-EXPORT_SYMBOL(ccw_device_tm_start_key);
+EXPORT_SYMBOL(ccw_device_tm_start_timeout_key);
 
 /**
- * ccw_device_tm_start_timeout_key() - perform start function
+ * ccw_device_tm_start_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
  * @lpm: mask of paths to use
  * @key: storage key to use for storage access
- * @expires: time span in jiffies after which to abort request
  *
  * Start the tcw on the given ccw device. Return zero on success, non-zero
  * otherwise.
  */
-int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
-                                   unsigned long intparm, u8 lpm, u8 key,
-                                   int expires)
+int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
+                           unsigned long intparm, u8 lpm, u8 key)
 {
-       int ret;
-
-       ccw_device_set_timeout(cdev, expires);
-       ret = ccw_device_tm_start_key(cdev, tcw, intparm, lpm, key);
-       if (ret != 0)
-               ccw_device_set_timeout(cdev, 0);
-       return ret;
+       return ccw_device_tm_start_timeout_key(cdev, tcw, intparm, lpm, key, 0);
 }
-EXPORT_SYMBOL(ccw_device_tm_start_timeout_key);
+EXPORT_SYMBOL(ccw_device_tm_start_key);
 
 /**
  * ccw_device_tm_start() - perform start function
index af571d8d6925e7a8bb4f35db7e0a7c8aff1bdbf8..90e4e3a7841be1b23e833809587096dc9290f16e 100644 (file)
@@ -157,6 +157,7 @@ struct ccw_device_private {
        unsigned long intparm;  /* user interruption parameter */
        struct qdio_irq *qdio_data;
        struct irb irb;         /* device status */
+       int async_kill_io_rc;
        struct senseid senseid; /* SenseID info */
        struct pgid pgid[8];    /* path group IDs per chpid*/
        struct ccw1 iccws[2];   /* ccws for SNID/SID/SPGID commands */
index 959c65cf75d945b05954b1dc8d0f054be83920d7..4326715dc13eb18af4c69cc233f279ab264fae36 100644 (file)
@@ -233,8 +233,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa,
 #define QETH_IDX_FUNC_LEVEL_OSD                 0x0101
 #define QETH_IDX_FUNC_LEVEL_IQD                 0x4108
 
-#define QETH_REAL_CARD         1
-#define QETH_VLAN_CARD         2
 #define QETH_BUFSIZE           4096
 
 /**
@@ -556,12 +554,6 @@ enum qeth_prot_versions {
        QETH_PROT_IPV6 = 0x0006,
 };
 
-enum qeth_ip_types {
-       QETH_IP_TYPE_NORMAL,
-       QETH_IP_TYPE_VIPA,
-       QETH_IP_TYPE_RXIP,
-};
-
 enum qeth_cmd_buffer_state {
        BUF_STATE_FREE,
        BUF_STATE_LOCKED,
index ca72f3311004a3d020ed07266596ff63ce740b26..19203340f8795be3dd02de136265a76790e0d314 100644 (file)
@@ -527,8 +527,7 @@ static inline int qeth_is_cq(struct qeth_card *card, unsigned int queue)
            queue == card->qdio.no_in_queues - 1;
 }
 
-
-static int qeth_issue_next_read(struct qeth_card *card)
+static int __qeth_issue_next_read(struct qeth_card *card)
 {
        int rc;
        struct qeth_cmd_buffer *iob;
@@ -559,6 +558,17 @@ static int qeth_issue_next_read(struct qeth_card *card)
        return rc;
 }
 
+static int qeth_issue_next_read(struct qeth_card *card)
+{
+       int ret;
+
+       spin_lock_irq(get_ccwdev_lock(CARD_RDEV(card)));
+       ret = __qeth_issue_next_read(card);
+       spin_unlock_irq(get_ccwdev_lock(CARD_RDEV(card)));
+
+       return ret;
+}
+
 static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card)
 {
        struct qeth_reply *reply;
@@ -708,11 +718,8 @@ static int qeth_check_idx_response(struct qeth_card *card,
 
        QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN);
        if ((buffer[2] & 0xc0) == 0xc0) {
-               QETH_DBF_MESSAGE(2, "received an IDX TERMINATE "
-                          "with cause code 0x%02x%s\n",
-                          buffer[4],
-                          ((buffer[4] == 0x22) ?
-                           " -- try another portname" : ""));
+               QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#02x\n",
+                                buffer[4]);
                QETH_CARD_TEXT(card, 2, "ckidxres");
                QETH_CARD_TEXT(card, 2, " idxterm");
                QETH_CARD_TEXT_(card, 2, "  rc%d", -EIO);
@@ -960,7 +967,7 @@ void qeth_clear_thread_running_bit(struct qeth_card *card, unsigned long thread)
        spin_lock_irqsave(&card->thread_mask_lock, flags);
        card->thread_running_mask &= ~thread;
        spin_unlock_irqrestore(&card->thread_mask_lock, flags);
-       wake_up(&card->wait_q);
+       wake_up_all(&card->wait_q);
 }
 EXPORT_SYMBOL_GPL(qeth_clear_thread_running_bit);
 
@@ -1164,6 +1171,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
                }
                rc = qeth_get_problem(cdev, irb);
                if (rc) {
+                       card->read_or_write_problem = 1;
                        qeth_clear_ipacmd_list(card);
                        qeth_schedule_recovery(card);
                        goto out;
@@ -1182,7 +1190,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
                return;
        if (channel == &card->read &&
            channel->state == CH_STATE_UP)
-               qeth_issue_next_read(card);
+               __qeth_issue_next_read(card);
 
        iob = channel->iob;
        index = channel->buf_no;
@@ -2134,24 +2142,25 @@ int qeth_send_control_data(struct qeth_card *card, int len,
        }
        reply->callback = reply_cb;
        reply->param = reply_param;
-       if (card->state == CARD_STATE_DOWN)
-               reply->seqno = QETH_IDX_COMMAND_SEQNO;
-       else
-               reply->seqno = card->seqno.ipa++;
+
        init_waitqueue_head(&reply->wait_q);
-       spin_lock_irqsave(&card->lock, flags);
-       list_add_tail(&reply->list, &card->cmd_waiter_list);
-       spin_unlock_irqrestore(&card->lock, flags);
 
        while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ;
-       qeth_prepare_control_data(card, len, iob);
 
        if (IS_IPA(iob->data)) {
                cmd = __ipa_cmd(iob);
+               cmd->hdr.seqno = card->seqno.ipa++;
+               reply->seqno = cmd->hdr.seqno;
                event_timeout = QETH_IPA_TIMEOUT;
        } else {
+               reply->seqno = QETH_IDX_COMMAND_SEQNO;
                event_timeout = QETH_TIMEOUT;
        }
+       qeth_prepare_control_data(card, len, iob);
+
+       spin_lock_irqsave(&card->lock, flags);
+       list_add_tail(&reply->list, &card->cmd_waiter_list);
+       spin_unlock_irqrestore(&card->lock, flags);
 
        timeout = jiffies + event_timeout;
 
@@ -2837,7 +2846,8 @@ static int qeth_init_input_buffer(struct qeth_card *card,
        int i;
 
        if ((card->options.cq == QETH_CQ_ENABLED) && (!buf->rx_skb)) {
-               buf->rx_skb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
+               buf->rx_skb = netdev_alloc_skb(card->dev,
+                                              QETH_RX_PULL_LEN + ETH_HLEN);
                if (!buf->rx_skb)
                        return 1;
        }
@@ -2874,8 +2884,8 @@ int qeth_init_qdio_queues(struct qeth_card *card)
        QETH_DBF_TEXT(SETUP, 2, "initqdqs");
 
        /* inbound queue */
-       qdio_reset_buffers(card->qdio.in_q->qdio_bufs,
-                          QDIO_MAX_BUFFERS_PER_Q);
+       qdio_reset_buffers(card->qdio.in_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
+       memset(&card->rx, 0, sizeof(struct qeth_rx));
        qeth_initialize_working_pool_list(card);
        /*give only as many buffers to hardware as we have buffer pool entries*/
        for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i)
@@ -2933,7 +2943,7 @@ static void qeth_fill_ipacmd_header(struct qeth_card *card,
        memset(cmd, 0, sizeof(struct qeth_ipa_cmd));
        cmd->hdr.command = command;
        cmd->hdr.initiator = IPA_CMD_INITIATOR_HOST;
-       cmd->hdr.seqno = card->seqno.ipa;
+       /* cmd->hdr.seqno is set by qeth_send_control_data() */
        cmd->hdr.adapter_type = qeth_get_ipa_adp_type(card->info.link_type);
        cmd->hdr.rel_adapter_no = (__u8) card->info.portno;
        if (card->options.layer2)
@@ -2950,12 +2960,10 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
                enum qeth_ipa_cmds ipacmd, enum qeth_prot_versions prot)
 {
        struct qeth_cmd_buffer *iob;
-       struct qeth_ipa_cmd *cmd;
 
        iob = qeth_get_buffer(&card->write);
        if (iob) {
-               cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
-               qeth_fill_ipacmd_header(card, cmd, ipacmd, prot);
+               qeth_fill_ipacmd_header(card, __ipa_cmd(iob), ipacmd, prot);
        } else {
                dev_warn(&card->gdev->dev,
                         "The qeth driver ran out of channel command buffers\n");
@@ -3066,7 +3074,7 @@ static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
        iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETADAPTERPARMS,
                                     QETH_PROT_IPV4);
        if (iob) {
-               cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+               cmd = __ipa_cmd(iob);
                cmd->data.setadapterparms.hdr.cmdlength = cmdlen;
                cmd->data.setadapterparms.hdr.command_code = command;
                cmd->data.setadapterparms.hdr.used_total = 1;
@@ -3208,7 +3216,7 @@ static int qeth_query_setdiagass(struct qeth_card *card)
        iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.diagass.subcmd_len = 16;
        cmd->data.diagass.subcmd = QETH_DIAGS_CMD_QUERY;
        return qeth_send_ipa_cmd(card, iob, qeth_query_setdiagass_cb, NULL);
@@ -3261,7 +3269,7 @@ int qeth_hw_trap(struct qeth_card *card, enum qeth_diags_trap_action action)
        iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.diagass.subcmd_len = 80;
        cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRAP;
        cmd->data.diagass.type = 1;
@@ -3898,10 +3906,12 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
 int qeth_get_elements_no(struct qeth_card *card,
                     struct sk_buff *skb, int extra_elems, int data_offset)
 {
-       int elements = qeth_get_elements_for_range(
-                               (addr_t)skb->data + data_offset,
-                               (addr_t)skb->data + skb_headlen(skb)) +
-                       qeth_get_elements_for_frags(skb);
+       addr_t end = (addr_t)skb->data + skb_headlen(skb);
+       int elements = qeth_get_elements_for_frags(skb);
+       addr_t start = (addr_t)skb->data + data_offset;
+
+       if (start != end)
+               elements += qeth_get_elements_for_range(start, end);
 
        if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
                QETH_DBF_MESSAGE(2, "Invalid size of IP packet "
@@ -4237,7 +4247,7 @@ void qeth_setadp_promisc_mode(struct qeth_card *card)
                        sizeof(struct qeth_ipacmd_setadpparms_hdr) + 8);
        if (!iob)
                return;
-       cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.setadapterparms.data.mode = mode;
        qeth_send_ipa_cmd(card, iob, qeth_setadp_promisc_mode_cb, NULL);
 }
@@ -4304,7 +4314,7 @@ int qeth_setadpparms_change_macaddr(struct qeth_card *card)
                                   sizeof(struct qeth_change_addr));
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.setadapterparms.data.change_addr.cmd = CHANGE_ADDR_READ_MAC;
        cmd->data.setadapterparms.data.change_addr.addr_size = ETH_ALEN;
        ether_addr_copy(cmd->data.setadapterparms.data.change_addr.addr,
@@ -4419,7 +4429,7 @@ static int qeth_setadpparms_set_access_ctrl(struct qeth_card *card,
                                   sizeof(struct qeth_set_access_ctrl));
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl;
        access_ctrl_req->subcmd_code = isolation;
 
@@ -4665,7 +4675,7 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
                rc = -ENOMEM;
                goto out;
        }
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len);
        rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len,
                                    qeth_snmp_command_cb, (void *)&qinfo);
@@ -4750,7 +4760,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata)
                rc = -ENOMEM;
                goto out_free;
        }
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        oat_req = &cmd->data.setadapterparms.data.query_oat;
        oat_req->subcmd_code = oat_data.command;
 
@@ -5084,8 +5094,6 @@ static void qeth_core_free_card(struct qeth_card *card)
        QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
        qeth_clean_channel(&card->read);
        qeth_clean_channel(&card->write);
-       if (card->dev)
-               free_netdev(card->dev);
        qeth_free_qdio_buffers(card);
        unregister_service_level(&card->qeth_service_level);
        kfree(card);
@@ -5327,7 +5335,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
        } else {
                unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len;
 
-               skb = dev_alloc_skb(linear + headroom);
+               skb = napi_alloc_skb(&card->napi, linear + headroom);
        }
        if (!skb)
                goto no_mem;
@@ -5491,7 +5499,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
        iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETASSPARMS, prot);
 
        if (iob) {
-               cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+               cmd = __ipa_cmd(iob);
                cmd->data.setassparms.hdr.assist_no = ipa_func;
                cmd->data.setassparms.hdr.length = 8 + len;
                cmd->data.setassparms.hdr.command_code = cmd_code;
@@ -5514,7 +5522,7 @@ int qeth_send_setassparms(struct qeth_card *card,
 
        QETH_CARD_TEXT(card, 4, "sendassp");
 
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        if (len <= sizeof(__u32))
                cmd->data.setassparms.data.flags_32bit = (__u32) data;
        else   /* (len > sizeof(__u32)) */
index 7f236440483f2ca6fdc82caf5d21b649f8434471..50a313806dde2a03137451390cc1358b10634bf5 100644 (file)
@@ -108,7 +108,7 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac,
        iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.setdelmac.mac_length = ETH_ALEN;
        ether_addr_copy(cmd->data.setdelmac.mac, mac);
        return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob,
@@ -305,7 +305,7 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
        iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.setdelvlan.vlan_id = i;
        return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob,
                                            qeth_l2_send_setdelvlan_cb, NULL));
@@ -437,10 +437,8 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
                        *done = 1;
                        break;
                }
-               skb->dev = card->dev;
                switch (hdr->hdr.l2.id) {
                case QETH_HEADER_TYPE_LAYER2:
-                       skb->pkt_type = PACKET_HOST;
                        skb->protocol = eth_type_trans(skb, skb->dev);
                        if ((card->dev->features & NETIF_F_RXCSUM)
                           && ((hdr->hdr.l2.flags[1] &
@@ -915,8 +913,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
                qeth_l2_set_offline(cgdev);
 
        if (card->dev) {
-               netif_napi_del(&card->napi);
                unregister_netdev(card->dev);
+               free_netdev(card->dev);
                card->dev = NULL;
        }
        return;
@@ -975,6 +973,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
                return -ENODEV;
 
        card->dev->ml_priv = card;
+       card->dev->priv_flags |= IFF_UNICAST_FLT;
        card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
        card->dev->mtu = card->info.initial_mtu;
        card->dev->min_mtu = 64;
@@ -991,9 +990,16 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
                card->dev->features |= NETIF_F_VLAN_CHALLENGED;
        else
                card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+       if (card->info.type != QETH_CARD_TYPE_OSN &&
+           card->info.type != QETH_CARD_TYPE_IQD) {
+               card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+               card->dev->needed_headroom = sizeof(struct qeth_hdr);
+               card->dev->hw_features |= NETIF_F_SG;
+               card->dev->vlan_features |= NETIF_F_SG;
+       }
+
        if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
-               card->dev->hw_features = NETIF_F_SG;
-               card->dev->vlan_features = NETIF_F_SG;
                card->dev->features |= NETIF_F_SG;
                /* OSA 3S and earlier has no RX/TX support */
                if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
@@ -1005,11 +1011,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
                        card->dev->vlan_features |= NETIF_F_RXCSUM;
                }
        }
-       if (card->info.type != QETH_CARD_TYPE_OSN &&
-           card->info.type != QETH_CARD_TYPE_IQD) {
-               card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-               card->dev->needed_headroom = sizeof(struct qeth_hdr);
-       }
 
        card->info.broadcast_capable = 1;
        qeth_l2_request_initial_mac(card);
@@ -1086,7 +1087,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
        qeth_l2_setup_bridgeport_attrs(card);
 
        card->state = CARD_STATE_HARDSETUP;
-       memset(&card->rx, 0, sizeof(struct qeth_rx));
        qeth_print_status_message(card);
 
        /* softsetup */
@@ -1374,7 +1374,6 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
 {
        struct qeth_cmd_buffer *iob;
        struct qeth_card *card;
-       int rc;
 
        if (!dev)
                return -ENODEV;
@@ -1385,9 +1384,8 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
        if (!qeth_card_hw_is_reachable(card))
                return -ENODEV;
        iob = qeth_wait_for_buffer(&card->write);
-       memcpy(iob->data+IPA_PDU_HEADER_SIZE, data, data_len);
-       rc = qeth_osn_send_ipa_cmd(card, iob, data_len);
-       return rc;
+       memcpy(__ipa_cmd(iob), data, data_len);
+       return qeth_osn_send_ipa_cmd(card, iob, data_len);
 }
 EXPORT_SYMBOL(qeth_osn_assist);
 
@@ -1764,7 +1762,7 @@ static struct qeth_cmd_buffer *qeth_sbp_build_cmd(struct qeth_card *card,
        iob = qeth_get_ipacmd_buffer(card, ipa_cmd, 0);
        if (!iob)
                return iob;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.sbp.hdr.cmdlength = sizeof(struct qeth_ipacmd_sbp_hdr) +
                                      cmd_length;
        cmd->data.sbp.hdr.command_code = sbp_cmd;
@@ -2129,7 +2127,7 @@ static int qeth_l2_vnicc_request(struct qeth_card *card,
                return -ENOMEM;
 
        /* create header for request */
-       cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        req = &cmd->data.vnicc;
 
        /* create sub command header for request */
index bdd45f4dcacecc11b9334c94c717e6cc08b6c84c..87659cfc90666bd0b4b95e371114621383e1cfdf 100644 (file)
 
 #define QETH_SNIFF_AVAIL       0x0008
 
+enum qeth_ip_types {
+       QETH_IP_TYPE_NORMAL,
+       QETH_IP_TYPE_VIPA,
+       QETH_IP_TYPE_RXIP,
+};
+
 struct qeth_ipaddr {
        struct hlist_node hnode;
        enum qeth_ip_types type;
-       enum qeth_ipa_setdelip_flags set_flags;
-       enum qeth_ipa_setdelip_flags del_flags;
+       unsigned char mac[ETH_ALEN];
        u8 is_multicast:1;
        u8 in_progress:1;
        u8 disp_flag:2;
+       u8 ipato:1;                     /* ucast only */
 
        /* is changed only for normal ip addresses
         * for non-normal addresses it always is  1
         */
        int  ref_counter;
        enum qeth_prot_versions proto;
-       unsigned char mac[ETH_ALEN];
        union {
                struct {
                        unsigned int addr;
@@ -40,8 +45,50 @@ struct qeth_ipaddr {
                        unsigned int pfxlen;
                } a6;
        } u;
-
 };
+
+static inline void qeth_l3_init_ipaddr(struct qeth_ipaddr *addr,
+                                      enum qeth_ip_types type,
+                                      enum qeth_prot_versions proto)
+{
+       memset(addr, 0, sizeof(*addr));
+       addr->type = type;
+       addr->proto = proto;
+       addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+}
+
+static inline bool qeth_l3_addr_match_ip(struct qeth_ipaddr *a1,
+                                        struct qeth_ipaddr *a2)
+{
+       if (a1->proto != a2->proto)
+               return false;
+       if (a1->proto == QETH_PROT_IPV6)
+               return ipv6_addr_equal(&a1->u.a6.addr, &a2->u.a6.addr);
+       return a1->u.a4.addr == a2->u.a4.addr;
+}
+
+static inline bool qeth_l3_addr_match_all(struct qeth_ipaddr *a1,
+                                         struct qeth_ipaddr *a2)
+{
+       /* Assumes that the pair was obtained via qeth_l3_addr_find_by_ip(),
+        * so 'proto' and 'addr' match for sure.
+        *
+        * For ucast:
+        * -    'mac' is always 0.
+        * -    'mask'/'pfxlen' for RXIP/VIPA is always 0. For NORMAL, matching
+        *      values are required to avoid mixups in takeover eligibility.
+        *
+        * For mcast,
+        * -    'mac' is mapped from the IP, and thus always matches.
+        * -    'mask'/'pfxlen' is always 0.
+        */
+       if (a1->type != a2->type)
+               return false;
+       if (a1->proto == QETH_PROT_IPV6)
+               return a1->u.a6.pfxlen == a2->u.a6.pfxlen;
+       return a1->u.a4.mask == a2->u.a4.mask;
+}
+
 static inline  u64 qeth_l3_ipaddr_hash(struct qeth_ipaddr *addr)
 {
        u64  ret = 0;
@@ -77,15 +124,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *, struct qeth_ipato_entry *);
 int qeth_l3_del_ipato_entry(struct qeth_card *card,
                            enum qeth_prot_versions proto, u8 *addr,
                            int mask_bits);
-int qeth_l3_add_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *);
-int qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-                    const u8 *addr);
-int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *);
-int qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-                    const u8 *addr);
 void qeth_l3_update_ipato(struct qeth_card *card);
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
-int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
-int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
+int qeth_l3_modify_hsuid(struct qeth_card *card, bool add);
+int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
+                            enum qeth_ip_types type,
+                            enum qeth_prot_versions proto);
 
 #endif /* __QETH_L3_H__ */
index b0c888e86cd4a0980ff42e49f06d6022c93f4de7..c1a16a74aa8331177744fb0a7236e343d38efd3a 100644 (file)
@@ -67,6 +67,33 @@ void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr,
                qeth_l3_ipaddr6_to_string(addr, buf);
 }
 
+static struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions prot)
+{
+       struct qeth_ipaddr *addr = kmalloc(sizeof(*addr), GFP_ATOMIC);
+
+       if (addr)
+               qeth_l3_init_ipaddr(addr, QETH_IP_TYPE_NORMAL, prot);
+       return addr;
+}
+
+static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card,
+                                                  struct qeth_ipaddr *query)
+{
+       u64 key = qeth_l3_ipaddr_hash(query);
+       struct qeth_ipaddr *addr;
+
+       if (query->is_multicast) {
+               hash_for_each_possible(card->ip_mc_htable, addr, hnode, key)
+                       if (qeth_l3_addr_match_ip(addr, query))
+                               return addr;
+       } else {
+               hash_for_each_possible(card->ip_htable,  addr, hnode, key)
+                       if (qeth_l3_addr_match_ip(addr, query))
+                               return addr;
+       }
+       return NULL;
+}
+
 static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len)
 {
        int i, j;
@@ -120,40 +147,18 @@ static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
        return rc;
 }
 
-inline int
-qeth_l3_ipaddrs_is_equal(struct qeth_ipaddr *addr1, struct qeth_ipaddr *addr2)
-{
-       return addr1->proto == addr2->proto &&
-              !memcmp(&addr1->u, &addr2->u, sizeof(addr1->u)) &&
-              ether_addr_equal_64bits(addr1->mac, addr2->mac);
-}
-
-static struct qeth_ipaddr *
-qeth_l3_ip_from_hash(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
-{
-       struct qeth_ipaddr *addr;
-
-       if (tmp_addr->is_multicast) {
-               hash_for_each_possible(card->ip_mc_htable,  addr,
-                               hnode, qeth_l3_ipaddr_hash(tmp_addr))
-                       if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr))
-                               return addr;
-       } else {
-               hash_for_each_possible(card->ip_htable,  addr,
-                               hnode, qeth_l3_ipaddr_hash(tmp_addr))
-                       if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr))
-                               return addr;
-       }
-
-       return NULL;
-}
-
-int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_delete_ip(struct qeth_card *card,
+                            struct qeth_ipaddr *tmp_addr)
 {
        int rc = 0;
        struct qeth_ipaddr *addr;
 
-       QETH_CARD_TEXT(card, 4, "delip");
+       if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+               QETH_CARD_TEXT(card, 2, "delrxip");
+       else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+               QETH_CARD_TEXT(card, 2, "delvipa");
+       else
+               QETH_CARD_TEXT(card, 2, "delip");
 
        if (tmp_addr->proto == QETH_PROT_IPV4)
                QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
@@ -162,23 +167,18 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
                QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8);
        }
 
-       addr = qeth_l3_ip_from_hash(card, tmp_addr);
-       if (!addr)
+       addr = qeth_l3_find_addr_by_ip(card, tmp_addr);
+       if (!addr || !qeth_l3_addr_match_all(addr, tmp_addr))
                return -ENOENT;
 
        addr->ref_counter--;
-       if (addr->ref_counter > 0 && (addr->type == QETH_IP_TYPE_NORMAL ||
-                                     addr->type == QETH_IP_TYPE_RXIP))
+       if (addr->type == QETH_IP_TYPE_NORMAL && addr->ref_counter > 0)
                return rc;
        if (addr->in_progress)
                return -EINPROGRESS;
 
-       if (!qeth_card_hw_is_reachable(card)) {
-               addr->disp_flag = QETH_DISP_ADDR_DELETE;
-               return 0;
-       }
-
-       rc = qeth_l3_deregister_addr_entry(card, addr);
+       if (qeth_card_hw_is_reachable(card))
+               rc = qeth_l3_deregister_addr_entry(card, addr);
 
        hash_del(&addr->hnode);
        kfree(addr);
@@ -186,12 +186,18 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
        return rc;
 }
 
-int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 {
        int rc = 0;
        struct qeth_ipaddr *addr;
+       char buf[40];
 
-       QETH_CARD_TEXT(card, 4, "addip");
+       if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+               QETH_CARD_TEXT(card, 2, "addrxip");
+       else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+               QETH_CARD_TEXT(card, 2, "addvipa");
+       else
+               QETH_CARD_TEXT(card, 2, "addip");
 
        if (tmp_addr->proto == QETH_PROT_IPV4)
                QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
@@ -200,8 +206,20 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
                QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8);
        }
 
-       addr = qeth_l3_ip_from_hash(card, tmp_addr);
-       if (!addr) {
+       addr = qeth_l3_find_addr_by_ip(card, tmp_addr);
+       if (addr) {
+               if (tmp_addr->type != QETH_IP_TYPE_NORMAL)
+                       return -EADDRINUSE;
+               if (qeth_l3_addr_match_all(addr, tmp_addr)) {
+                       addr->ref_counter++;
+                       return 0;
+               }
+               qeth_l3_ipaddr_to_string(tmp_addr->proto, (u8 *)&tmp_addr->u,
+                                        buf);
+               dev_warn(&card->gdev->dev,
+                        "Registering IP address %s failed\n", buf);
+               return -EADDRINUSE;
+       } else {
                addr = qeth_l3_get_addr_buffer(tmp_addr->proto);
                if (!addr)
                        return -ENOMEM;
@@ -211,7 +229,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 
                if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
                        QETH_CARD_TEXT(card, 2, "tkovaddr");
-                       addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+                       addr->ipato = 1;
                }
                hash_add(card->ip_htable, &addr->hnode,
                                qeth_l3_ipaddr_hash(addr));
@@ -241,39 +259,18 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
                                (rc == IPA_RC_LAN_OFFLINE)) {
                        addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
                        if (addr->ref_counter < 1) {
-                               qeth_l3_delete_ip(card, addr);
+                               qeth_l3_deregister_addr_entry(card, addr);
+                               hash_del(&addr->hnode);
                                kfree(addr);
                        }
                } else {
                        hash_del(&addr->hnode);
                        kfree(addr);
                }
-       } else {
-               if (addr->type == QETH_IP_TYPE_NORMAL ||
-                   addr->type == QETH_IP_TYPE_RXIP)
-                       addr->ref_counter++;
        }
-
        return rc;
 }
 
-
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(
-                               enum qeth_prot_versions prot)
-{
-       struct qeth_ipaddr *addr;
-
-       addr = kzalloc(sizeof(struct qeth_ipaddr), GFP_ATOMIC);
-       if (!addr)
-               return NULL;
-
-       addr->type = QETH_IP_TYPE_NORMAL;
-       addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
-       addr->proto = prot;
-
-       return addr;
-}
-
 static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
 {
        struct qeth_ipaddr *addr;
@@ -321,11 +318,7 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
        spin_lock_bh(&card->ip_lock);
 
        hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
-               if (addr->disp_flag == QETH_DISP_ADDR_DELETE) {
-                       qeth_l3_deregister_addr_entry(card, addr);
-                       hash_del(&addr->hnode);
-                       kfree(addr);
-               } else if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
+               if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
                        if (addr->proto == QETH_PROT_IPV4) {
                                addr->in_progress = 1;
                                spin_unlock_bh(&card->ip_lock);
@@ -362,7 +355,7 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card,
        iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        ether_addr_copy(cmd->data.setdelipm.mac, addr->mac);
        if (addr->proto == QETH_PROT_IPV6)
                memcpy(cmd->data.setdelipm.ip6, &addr->u.a6.addr,
@@ -389,21 +382,38 @@ static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len)
        }
 }
 
+static u32 qeth_l3_get_setdelip_flags(struct qeth_ipaddr *addr, bool set)
+{
+       switch (addr->type) {
+       case QETH_IP_TYPE_RXIP:
+               return (set) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+       case QETH_IP_TYPE_VIPA:
+               return (set) ? QETH_IPA_SETIP_VIPA_FLAG :
+                              QETH_IPA_DELIP_VIPA_FLAG;
+       default:
+               return (set && addr->ipato) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+       }
+}
+
 static int qeth_l3_send_setdelip(struct qeth_card *card,
-               struct qeth_ipaddr *addr, int ipacmd, unsigned int flags)
+                                struct qeth_ipaddr *addr,
+                                enum qeth_ipa_cmds ipacmd)
 {
-       int rc;
        struct qeth_cmd_buffer *iob;
        struct qeth_ipa_cmd *cmd;
        __u8 netmask[16];
+       u32 flags;
 
        QETH_CARD_TEXT(card, 4, "setdelip");
-       QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
 
        iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
+
+       flags = qeth_l3_get_setdelip_flags(addr, ipacmd == IPA_CMD_SETIP);
+       QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
+
        if (addr->proto == QETH_PROT_IPV6) {
                memcpy(cmd->data.setdelip6.ip_addr, &addr->u.a6.addr,
                       sizeof(struct in6_addr));
@@ -417,9 +427,7 @@ static int qeth_l3_send_setdelip(struct qeth_card *card,
                cmd->data.setdelip4.flags = flags;
        }
 
-       rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
-
-       return rc;
+       return qeth_send_ipa_cmd(card, iob, NULL, NULL);
 }
 
 static int qeth_l3_send_setrouting(struct qeth_card *card,
@@ -433,7 +441,7 @@ static int qeth_l3_send_setrouting(struct qeth_card *card,
        iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETRTG, prot);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.setrtg.type = (type);
        rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
 
@@ -535,10 +543,7 @@ void qeth_l3_update_ipato(struct qeth_card *card)
        hash_for_each(card->ip_htable, i, addr, hnode) {
                if (addr->type != QETH_IP_TYPE_NORMAL)
                        continue;
-               if (qeth_l3_is_addr_covered_by_ipato(card, addr))
-                       addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
-               else
-                       addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
+               addr->ipato = qeth_l3_is_addr_covered_by_ipato(card, addr);
        }
 }
 
@@ -616,142 +621,39 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card,
        return rc;
 }
 
-/*
- * VIPA related functions
- */
-int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-             const u8 *addr)
+int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
+                            enum qeth_ip_types type,
+                            enum qeth_prot_versions proto)
 {
-       struct qeth_ipaddr *ipaddr;
+       struct qeth_ipaddr addr;
        int rc;
 
-       ipaddr = qeth_l3_get_addr_buffer(proto);
-       if (ipaddr) {
-               if (proto == QETH_PROT_IPV4) {
-                       QETH_CARD_TEXT(card, 2, "addvipa4");
-                       memcpy(&ipaddr->u.a4.addr, addr, 4);
-                       ipaddr->u.a4.mask = 0;
-               } else if (proto == QETH_PROT_IPV6) {
-                       QETH_CARD_TEXT(card, 2, "addvipa6");
-                       memcpy(&ipaddr->u.a6.addr, addr, 16);
-                       ipaddr->u.a6.pfxlen = 0;
-               }
-               ipaddr->type = QETH_IP_TYPE_VIPA;
-               ipaddr->set_flags = QETH_IPA_SETIP_VIPA_FLAG;
-               ipaddr->del_flags = QETH_IPA_DELIP_VIPA_FLAG;
-       } else
-               return -ENOMEM;
-
-       spin_lock_bh(&card->ip_lock);
-
-       if (qeth_l3_ip_from_hash(card, ipaddr))
-               rc = -EEXIST;
+       qeth_l3_init_ipaddr(&addr, type, proto);
+       if (proto == QETH_PROT_IPV4)
+               memcpy(&addr.u.a4.addr, ip, 4);
        else
-               rc = qeth_l3_add_ip(card, ipaddr);
-
-       spin_unlock_bh(&card->ip_lock);
-
-       kfree(ipaddr);
-
-       return rc;
-}
-
-int qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-                    const u8 *addr)
-{
-       struct qeth_ipaddr *ipaddr;
-       int rc;
-
-       ipaddr = qeth_l3_get_addr_buffer(proto);
-       if (ipaddr) {
-               if (proto == QETH_PROT_IPV4) {
-                       QETH_CARD_TEXT(card, 2, "delvipa4");
-                       memcpy(&ipaddr->u.a4.addr, addr, 4);
-                       ipaddr->u.a4.mask = 0;
-               } else if (proto == QETH_PROT_IPV6) {
-                       QETH_CARD_TEXT(card, 2, "delvipa6");
-                       memcpy(&ipaddr->u.a6.addr, addr, 16);
-                       ipaddr->u.a6.pfxlen = 0;
-               }
-               ipaddr->type = QETH_IP_TYPE_VIPA;
-       } else
-               return -ENOMEM;
+               memcpy(&addr.u.a6.addr, ip, 16);
 
        spin_lock_bh(&card->ip_lock);
-       rc = qeth_l3_delete_ip(card, ipaddr);
+       rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
        spin_unlock_bh(&card->ip_lock);
-
-       kfree(ipaddr);
        return rc;
 }
 
-/*
- * proxy ARP related functions
- */
-int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-             const u8 *addr)
+int qeth_l3_modify_hsuid(struct qeth_card *card, bool add)
 {
-       struct qeth_ipaddr *ipaddr;
-       int rc;
-
-       ipaddr = qeth_l3_get_addr_buffer(proto);
-       if (ipaddr) {
-               if (proto == QETH_PROT_IPV4) {
-                       QETH_CARD_TEXT(card, 2, "addrxip4");
-                       memcpy(&ipaddr->u.a4.addr, addr, 4);
-                       ipaddr->u.a4.mask = 0;
-               } else if (proto == QETH_PROT_IPV6) {
-                       QETH_CARD_TEXT(card, 2, "addrxip6");
-                       memcpy(&ipaddr->u.a6.addr, addr, 16);
-                       ipaddr->u.a6.pfxlen = 0;
-               }
+       struct qeth_ipaddr addr;
+       int rc, i;
 
-               ipaddr->type = QETH_IP_TYPE_RXIP;
-               ipaddr->set_flags = QETH_IPA_SETIP_TAKEOVER_FLAG;
-               ipaddr->del_flags = 0;
-       } else
-               return -ENOMEM;
+       qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+       addr.u.a6.addr.s6_addr[0] = 0xfe;
+       addr.u.a6.addr.s6_addr[1] = 0x80;
+       for (i = 0; i < 8; i++)
+               addr.u.a6.addr.s6_addr[8+i] = card->options.hsuid[i];
 
        spin_lock_bh(&card->ip_lock);
-
-       if (qeth_l3_ip_from_hash(card, ipaddr))
-               rc = -EEXIST;
-       else
-               rc = qeth_l3_add_ip(card, ipaddr);
-
+       rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
        spin_unlock_bh(&card->ip_lock);
-
-       kfree(ipaddr);
-
-       return rc;
-}
-
-int qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-                    const u8 *addr)
-{
-       struct qeth_ipaddr *ipaddr;
-       int rc;
-
-       ipaddr = qeth_l3_get_addr_buffer(proto);
-       if (ipaddr) {
-               if (proto == QETH_PROT_IPV4) {
-                       QETH_CARD_TEXT(card, 2, "delrxip4");
-                       memcpy(&ipaddr->u.a4.addr, addr, 4);
-                       ipaddr->u.a4.mask = 0;
-               } else if (proto == QETH_PROT_IPV6) {
-                       QETH_CARD_TEXT(card, 2, "delrxip6");
-                       memcpy(&ipaddr->u.a6.addr, addr, 16);
-                       ipaddr->u.a6.pfxlen = 0;
-               }
-               ipaddr->type = QETH_IP_TYPE_RXIP;
-       } else
-               return -ENOMEM;
-
-       spin_lock_bh(&card->ip_lock);
-       rc = qeth_l3_delete_ip(card, ipaddr);
-       spin_unlock_bh(&card->ip_lock);
-
-       kfree(ipaddr);
        return rc;
 }
 
@@ -778,8 +680,7 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card,
                if (addr->is_multicast)
                        rc =  qeth_l3_send_setdelmc(card, addr, IPA_CMD_SETIPM);
                else
-                       rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP,
-                                       addr->set_flags);
+                       rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP);
                if (rc)
                        QETH_CARD_TEXT(card, 2, "failed");
        } while ((--cnt > 0) && rc);
@@ -811,8 +712,7 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card,
        if (addr->is_multicast)
                rc = qeth_l3_send_setdelmc(card, addr, IPA_CMD_DELIPM);
        else
-               rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP,
-                                       addr->del_flags);
+               rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP);
        if (rc)
                QETH_CARD_TEXT(card, 2, "failed");
 
@@ -1092,7 +992,7 @@ static int qeth_l3_iqd_read_initial_mac(struct qeth_card *card)
                                     QETH_PROT_IPV6);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        *((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
                        card->info.unique_id;
 
@@ -1137,7 +1037,7 @@ static int qeth_l3_get_unique_id(struct qeth_card *card)
                                     QETH_PROT_IPV6);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        *((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
                        card->info.unique_id;
 
@@ -1213,7 +1113,7 @@ qeth_diags_trace(struct qeth_card *card, enum qeth_diags_trace_cmds diags_cmd)
        iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.diagass.subcmd_len = 16;
        cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRACE;
        cmd->data.diagass.type = QETH_DIAGS_TYPE_HIPERSOCKET;
@@ -1239,8 +1139,9 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev)
                tmp->u.a4.addr = be32_to_cpu(im4->multiaddr);
                tmp->is_multicast = 1;
 
-               ipm = qeth_l3_ip_from_hash(card, tmp);
+               ipm = qeth_l3_find_addr_by_ip(card, tmp);
                if (ipm) {
+                       /* for mcast, by-IP match means full match */
                        ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
                } else {
                        ipm = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
@@ -1319,8 +1220,9 @@ static void qeth_l3_add_mc6_to_hash(struct qeth_card *card,
                       sizeof(struct in6_addr));
                tmp->is_multicast = 1;
 
-               ipm = qeth_l3_ip_from_hash(card, tmp);
+               ipm = qeth_l3_find_addr_by_ip(card, tmp);
                if (ipm) {
+                       /* for mcast, by-IP match means full match */
                        ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
                        continue;
                }
@@ -1520,30 +1422,24 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
                                ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr);
 
                        card->stats.multicast++;
-                       skb->pkt_type = PACKET_MULTICAST;
                        break;
                case QETH_CAST_BROADCAST:
                        ether_addr_copy(tg_addr, card->dev->broadcast);
                        card->stats.multicast++;
-                       skb->pkt_type = PACKET_BROADCAST;
                        break;
-               case QETH_CAST_UNICAST:
-               case QETH_CAST_ANYCAST:
-               case QETH_CAST_NOCAST:
                default:
                        if (card->options.sniffer)
                                skb->pkt_type = PACKET_OTHERHOST;
-                       else
-                               skb->pkt_type = PACKET_HOST;
                        ether_addr_copy(tg_addr, card->dev->dev_addr);
                }
+
                if (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_SRC_MAC_ADDR)
                        card->dev->header_ops->create(skb, card->dev, prot,
                                tg_addr, &hdr->hdr.l3.next_hop.rx.src_mac,
-                               card->dev->addr_len);
+                               skb->len);
                else
                        card->dev->header_ops->create(skb, card->dev, prot,
-                               tg_addr, "FAKELL", card->dev->addr_len);
+                               tg_addr, "FAKELL", skb->len);
        }
 
        skb->protocol = eth_type_trans(skb, card->dev);
@@ -1590,20 +1486,16 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
                        *done = 1;
                        break;
                }
-               skb->dev = card->dev;
                switch (hdr->hdr.l3.id) {
                case QETH_HEADER_TYPE_LAYER3:
                        magic = *(__u16 *)skb->data;
                        if ((card->info.type == QETH_CARD_TYPE_IQD) &&
                            (magic == ETH_P_AF_IUCV)) {
                                skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
-                               skb->pkt_type = PACKET_HOST;
-                               skb->mac_header = NET_SKB_PAD;
-                               skb->dev = card->dev;
                                len = skb->len;
                                card->dev->header_ops->create(skb, card->dev, 0,
-                                       card->dev->dev_addr, "FAKELL",
-                                       card->dev->addr_len);
+                                       card->dev->dev_addr, "FAKELL", len);
+                               skb_reset_mac_header(skb);
                                netif_receive_skb(skb);
                        } else {
                                qeth_l3_rebuild_skb(card, skb, hdr);
@@ -1612,7 +1504,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
                        }
                        break;
                case QETH_HEADER_TYPE_LAYER2: /* for HiperSockets sniffer */
-                       skb->pkt_type = PACKET_HOST;
                        skb->protocol = eth_type_trans(skb, skb->dev);
                        len = skb->len;
                        netif_receive_skb(skb);
@@ -1631,69 +1522,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
        return work_done;
 }
 
-static int qeth_l3_verify_vlan_dev(struct net_device *dev,
-                       struct qeth_card *card)
-{
-       int rc = 0;
-       u16 vid;
-
-       for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
-               struct net_device *netdev;
-
-               rcu_read_lock();
-               netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
-                                             vid);
-               rcu_read_unlock();
-               if (netdev == dev) {
-                       rc = QETH_VLAN_CARD;
-                       break;
-               }
-       }
-
-       if (rc && !(vlan_dev_real_dev(dev)->ml_priv == (void *)card))
-               return 0;
-
-       return rc;
-}
-
-static int qeth_l3_verify_dev(struct net_device *dev)
-{
-       struct qeth_card *card;
-       int rc = 0;
-       unsigned long flags;
-
-       read_lock_irqsave(&qeth_core_card_list.rwlock, flags);
-       list_for_each_entry(card, &qeth_core_card_list.list, list) {
-               if (card->dev == dev) {
-                       rc = QETH_REAL_CARD;
-                       break;
-               }
-               rc = qeth_l3_verify_vlan_dev(dev, card);
-               if (rc)
-                       break;
-       }
-       read_unlock_irqrestore(&qeth_core_card_list.rwlock, flags);
-
-       return rc;
-}
-
-static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
-{
-       struct qeth_card *card = NULL;
-       int rc;
-
-       rc = qeth_l3_verify_dev(dev);
-       if (rc == QETH_REAL_CARD)
-               card = dev->ml_priv;
-       else if (rc == QETH_VLAN_CARD)
-               card = vlan_dev_real_dev(dev)->ml_priv;
-       if (card && card->options.layer2)
-               card = NULL;
-       if (card)
-               QETH_CARD_TEXT_(card, 4, "%d", rc);
-       return card ;
-}
-
 static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
 {
        QETH_DBF_TEXT(SETUP, 2, "stopcard");
@@ -2022,7 +1850,7 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
                                       prot);
        if (!iob)
                return -ENOMEM;
-       cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+       cmd = __ipa_cmd(iob);
        cmd->data.setassparms.data.query_arp.request_bits = 0x000F;
        cmd->data.setassparms.data.query_arp.reply_bits = 0;
        cmd->data.setassparms.data.query_arp.no_entries = 0;
@@ -2450,11 +2278,12 @@ static void qeth_tso_fill_header(struct qeth_card *card,
 static int qeth_l3_get_elements_no_tso(struct qeth_card *card,
                        struct sk_buff *skb, int extra_elems)
 {
-       addr_t tcpdptr = (addr_t)tcp_hdr(skb) + tcp_hdrlen(skb);
-       int elements = qeth_get_elements_for_range(
-                               tcpdptr,
-                               (addr_t)skb->data + skb_headlen(skb)) +
-                               qeth_get_elements_for_frags(skb);
+       addr_t start = (addr_t)tcp_hdr(skb) + tcp_hdrlen(skb);
+       addr_t end = (addr_t)skb->data + skb_headlen(skb);
+       int elements = qeth_get_elements_for_frags(skb);
+
+       if (start != end)
+               elements += qeth_get_elements_for_range(start, end);
 
        if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
                QETH_DBF_MESSAGE(2,
@@ -2802,14 +2631,16 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
                        if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
                                card->dev->dev_id = card->info.unique_id &
                                                         0xffff;
+
+                       card->dev->hw_features |= NETIF_F_SG;
+                       card->dev->vlan_features |= NETIF_F_SG;
+
                        if (!card->info.guestlan) {
-                               card->dev->hw_features = NETIF_F_SG |
-                                       NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
-                                       NETIF_F_TSO;
-                               card->dev->vlan_features = NETIF_F_SG |
-                                       NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
-                                       NETIF_F_TSO;
                                card->dev->features |= NETIF_F_SG;
+                               card->dev->hw_features |= NETIF_F_TSO |
+                                       NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
+                               card->dev->vlan_features |= NETIF_F_TSO |
+                                       NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
                        }
                }
        } else if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -2882,8 +2713,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
                qeth_l3_set_offline(cgdev);
 
        if (card->dev) {
-               netif_napi_del(&card->napi);
                unregister_netdev(card->dev);
+               free_netdev(card->dev);
                card->dev = NULL;
        }
 
@@ -2924,7 +2755,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
                card->info.hwtrap = 0;
 
        card->state = CARD_STATE_HARDSETUP;
-       memset(&card->rx, 0, sizeof(struct qeth_rx));
        qeth_print_status_message(card);
 
        /* softsetup */
@@ -3147,13 +2977,43 @@ struct qeth_discipline qeth_l3_discipline = {
 };
 EXPORT_SYMBOL_GPL(qeth_l3_discipline);
 
+static int qeth_l3_handle_ip_event(struct qeth_card *card,
+                                  struct qeth_ipaddr *addr,
+                                  unsigned long event)
+{
+       switch (event) {
+       case NETDEV_UP:
+               spin_lock_bh(&card->ip_lock);
+               qeth_l3_add_ip(card, addr);
+               spin_unlock_bh(&card->ip_lock);
+               return NOTIFY_OK;
+       case NETDEV_DOWN:
+               spin_lock_bh(&card->ip_lock);
+               qeth_l3_delete_ip(card, addr);
+               spin_unlock_bh(&card->ip_lock);
+               return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
+static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
+{
+       if (is_vlan_dev(dev))
+               dev = vlan_dev_real_dev(dev);
+       if (dev->netdev_ops == &qeth_l3_osa_netdev_ops ||
+           dev->netdev_ops == &qeth_l3_netdev_ops)
+               return (struct qeth_card *) dev->ml_priv;
+       return NULL;
+}
+
 static int qeth_l3_ip_event(struct notifier_block *this,
                            unsigned long event, void *ptr)
 {
 
        struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
-       struct net_device *dev = (struct net_device *)ifa->ifa_dev->dev;
-       struct qeth_ipaddr *addr;
+       struct net_device *dev = ifa->ifa_dev->dev;
+       struct qeth_ipaddr addr;
        struct qeth_card *card;
 
        if (dev_net(dev) != &init_net)
@@ -3164,29 +3024,11 @@ static int qeth_l3_ip_event(struct notifier_block *this,
                return NOTIFY_DONE;
        QETH_CARD_TEXT(card, 3, "ipevent");
 
-       addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
-       if (addr) {
-               addr->u.a4.addr = be32_to_cpu(ifa->ifa_address);
-               addr->u.a4.mask = be32_to_cpu(ifa->ifa_mask);
-               addr->type = QETH_IP_TYPE_NORMAL;
-       } else
-               return NOTIFY_DONE;
-
-       switch (event) {
-       case NETDEV_UP:
-               spin_lock_bh(&card->ip_lock);
-               qeth_l3_add_ip(card, addr);
-               spin_unlock_bh(&card->ip_lock);
-               break;
-       case NETDEV_DOWN:
-               spin_lock_bh(&card->ip_lock);
-               qeth_l3_delete_ip(card, addr);
-               spin_unlock_bh(&card->ip_lock);
-               break;
-       }
+       qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV4);
+       addr.u.a4.addr = be32_to_cpu(ifa->ifa_address);
+       addr.u.a4.mask = be32_to_cpu(ifa->ifa_mask);
 
-       kfree(addr);
-       return NOTIFY_DONE;
+       return qeth_l3_handle_ip_event(card, &addr, event);
 }
 
 static struct notifier_block qeth_l3_ip_notifier = {
@@ -3198,8 +3040,8 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
                             unsigned long event, void *ptr)
 {
        struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
-       struct net_device *dev = (struct net_device *)ifa->idev->dev;
-       struct qeth_ipaddr *addr;
+       struct net_device *dev = ifa->idev->dev;
+       struct qeth_ipaddr addr;
        struct qeth_card *card;
 
        card = qeth_l3_get_card_from_dev(dev);
@@ -3209,29 +3051,11 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
        if (!qeth_is_supported(card, IPA_IPV6))
                return NOTIFY_DONE;
 
-       addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-       if (addr) {
-               memcpy(&addr->u.a6.addr, &ifa->addr, sizeof(struct in6_addr));
-               addr->u.a6.pfxlen = ifa->prefix_len;
-               addr->type = QETH_IP_TYPE_NORMAL;
-       } else
-               return NOTIFY_DONE;
-
-       switch (event) {
-       case NETDEV_UP:
-               spin_lock_bh(&card->ip_lock);
-               qeth_l3_add_ip(card, addr);
-               spin_unlock_bh(&card->ip_lock);
-               break;
-       case NETDEV_DOWN:
-               spin_lock_bh(&card->ip_lock);
-               qeth_l3_delete_ip(card, addr);
-               spin_unlock_bh(&card->ip_lock);
-               break;
-       }
+       qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+       addr.u.a6.addr = ifa->addr;
+       addr.u.a6.pfxlen = ifa->prefix_len;
 
-       kfree(addr);
-       return NOTIFY_DONE;
+       return qeth_l3_handle_ip_event(card, &addr, event);
 }
 
 static struct notifier_block qeth_l3_ip6_notifier = {
index a645cfe66ddf724b22e9691467768d44bd620dc5..f61192a048f447168a35e970b693c7004a490629 100644 (file)
@@ -272,9 +272,8 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
 {
        struct qeth_card *card = dev_get_drvdata(dev);
-       struct qeth_ipaddr *addr;
        char *tmp;
-       int rc, i;
+       int rc;
 
        if (!card)
                return -EINVAL;
@@ -293,25 +292,9 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
        if (strlen(tmp) > 8)
                return -EINVAL;
 
-       if (card->options.hsuid[0]) {
+       if (card->options.hsuid[0])
                /* delete old ip address */
-               addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-               if (!addr)
-                       return -ENOMEM;
-
-               addr->u.a6.addr.s6_addr32[0] = cpu_to_be32(0xfe800000);
-               addr->u.a6.addr.s6_addr32[1] = 0x00000000;
-               for (i = 8; i < 16; i++)
-                       addr->u.a6.addr.s6_addr[i] =
-                               card->options.hsuid[i - 8];
-               addr->u.a6.pfxlen = 0;
-               addr->type = QETH_IP_TYPE_NORMAL;
-
-               spin_lock_bh(&card->ip_lock);
-               qeth_l3_delete_ip(card, addr);
-               spin_unlock_bh(&card->ip_lock);
-               kfree(addr);
-       }
+               qeth_l3_modify_hsuid(card, false);
 
        if (strlen(tmp) == 0) {
                /* delete ip address only */
@@ -331,21 +314,7 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
        if (card->dev)
                memcpy(card->dev->perm_addr, card->options.hsuid, 9);
 
-       addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-       if (addr != NULL) {
-               addr->u.a6.addr.s6_addr32[0] = cpu_to_be32(0xfe800000);
-               addr->u.a6.addr.s6_addr32[1] = 0x00000000;
-               for (i = 8; i < 16; i++)
-                       addr->u.a6.addr.s6_addr[i] = card->options.hsuid[i - 8];
-               addr->u.a6.pfxlen = 0;
-               addr->type = QETH_IP_TYPE_NORMAL;
-       } else
-               return -ENOMEM;
-
-       spin_lock_bh(&card->ip_lock);
-       rc = qeth_l3_add_ip(card, addr);
-       spin_unlock_bh(&card->ip_lock);
-       kfree(addr);
+       rc = qeth_l3_modify_hsuid(card, true);
 
        return rc ? rc : count;
 }
@@ -767,7 +736,8 @@ static ssize_t qeth_l3_dev_vipa_add_store(const char *buf, size_t count,
        mutex_lock(&card->conf_mutex);
        rc = qeth_l3_parse_vipae(buf, proto, addr);
        if (!rc)
-               rc = qeth_l3_add_vipa(card, proto, addr);
+               rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+                                             QETH_IP_TYPE_VIPA, proto);
        mutex_unlock(&card->conf_mutex);
        return rc ? rc : count;
 }
@@ -796,7 +766,8 @@ static ssize_t qeth_l3_dev_vipa_del_store(const char *buf, size_t count,
        mutex_lock(&card->conf_mutex);
        rc = qeth_l3_parse_vipae(buf, proto, addr);
        if (!rc)
-               rc = qeth_l3_del_vipa(card, proto, addr);
+               rc = qeth_l3_modify_rxip_vipa(card, false, addr,
+                                             QETH_IP_TYPE_VIPA, proto);
        mutex_unlock(&card->conf_mutex);
        return rc ? rc : count;
 }
@@ -908,7 +879,8 @@ static ssize_t qeth_l3_dev_rxip_add_store(const char *buf, size_t count,
        mutex_lock(&card->conf_mutex);
        rc = qeth_l3_parse_rxipe(buf, proto, addr);
        if (!rc)
-               rc = qeth_l3_add_rxip(card, proto, addr);
+               rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+                                             QETH_IP_TYPE_RXIP, proto);
        mutex_unlock(&card->conf_mutex);
        return rc ? rc : count;
 }
@@ -937,7 +909,8 @@ static ssize_t qeth_l3_dev_rxip_del_store(const char *buf, size_t count,
        mutex_lock(&card->conf_mutex);
        rc = qeth_l3_parse_rxipe(buf, proto, addr);
        if (!rc)
-               rc = qeth_l3_del_rxip(card, proto, addr);
+               rc = qeth_l3_modify_rxip_vipa(card, false, addr,
+                                             QETH_IP_TYPE_RXIP, proto);
        mutex_unlock(&card->conf_mutex);
        return rc ? rc : count;
 }
index 57bf43e34863ec09c9c15217f7f02fa97711a75f..dd9464920456fd550f8da9bab2e345ae843d1d15 100644 (file)
@@ -328,8 +328,6 @@ static void scsi_host_dev_release(struct device *dev)
        if (shost->work_q)
                destroy_workqueue(shost->work_q);
 
-       destroy_rcu_head(&shost->rcu);
-
        if (shost->shost_state == SHOST_CREATED) {
                /*
                 * Free the shost_dev device name here if scsi_host_alloc()
@@ -404,7 +402,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
        INIT_LIST_HEAD(&shost->starved_list);
        init_waitqueue_head(&shost->host_wait);
        mutex_init(&shost->scan_mutex);
-       init_rcu_head(&shost->rcu);
 
        index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL);
        if (index < 0)
index 6de9681ace82603370005590238e315b36df19b4..ceab5e5c41c277a25f879348885be5fa847c4cd8 100644 (file)
@@ -223,6 +223,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
 {
        struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
+       struct domain_device *dev = cmd_to_domain_dev(cmd);
        struct sas_task *task = TO_SAS_TASK(cmd);
 
        /* At this point, we only get called following an actual abort
@@ -231,6 +232,14 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
         */
        sas_end_task(cmd, task);
 
+       if (dev_is_sata(dev)) {
+               /* defer commands to libata so that libata EH can
+                * handle ata qcs correctly
+                */
+               list_move_tail(&cmd->eh_entry, &sas_ha->eh_ata_q);
+               return;
+       }
+
        /* now finish the command and move it on to the error
         * handler done list, this also takes it off the
         * error handler pending list.
@@ -238,22 +247,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
        scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q);
 }
 
-static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
-{
-       struct domain_device *dev = cmd_to_domain_dev(cmd);
-       struct sas_ha_struct *ha = dev->port->ha;
-       struct sas_task *task = TO_SAS_TASK(cmd);
-
-       if (!dev_is_sata(dev)) {
-               sas_eh_finish_cmd(cmd);
-               return;
-       }
-
-       /* report the timeout to libata */
-       sas_end_task(cmd, task);
-       list_move_tail(&cmd->eh_entry, &ha->eh_ata_q);
-}
-
 static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd)
 {
        struct scsi_cmnd *cmd, *n;
@@ -261,7 +254,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd
        list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
                if (cmd->device->sdev_target == my_cmd->device->sdev_target &&
                    cmd->device->lun == my_cmd->device->lun)
-                       sas_eh_defer_cmd(cmd);
+                       sas_eh_finish_cmd(cmd);
        }
 }
 
@@ -631,12 +624,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
                case TASK_IS_DONE:
                        SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
                                    task);
-                       sas_eh_defer_cmd(cmd);
+                       sas_eh_finish_cmd(cmd);
                        continue;
                case TASK_IS_ABORTED:
                        SAS_DPRINTK("%s: task 0x%p is aborted\n",
                                    __func__, task);
-                       sas_eh_defer_cmd(cmd);
+                       sas_eh_finish_cmd(cmd);
                        continue;
                case TASK_IS_AT_LU:
                        SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
@@ -647,7 +640,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
                                            "recovered\n",
                                            SAS_ADDR(task->dev),
                                            cmd->device->lun);
-                               sas_eh_defer_cmd(cmd);
+                               sas_eh_finish_cmd(cmd);
                                sas_scsi_clear_queue_lu(work_q, cmd);
                                goto Again;
                        }
index 073ced07e662bc11bd00cdfc9d6214ec3a6a0b8e..dc8e850fbfd2217d1a64ea1ff9522bc2eeca3a40 100644 (file)
@@ -216,36 +216,30 @@ inline void megasas_return_cmd_fusion(struct megasas_instance *instance,
 /**
  * megasas_fire_cmd_fusion -   Sends command to the FW
  * @instance:                  Adapter soft state
- * @req_desc:                  32bit or 64bit Request descriptor
+ * @req_desc:                  64bit Request descriptor
  *
- * Perform PCI Write. Ventura supports 32 bit Descriptor.
- * Prior to Ventura (12G) MR controller supports 64 bit Descriptor.
+ * Perform PCI Write.
  */
 
 static void
 megasas_fire_cmd_fusion(struct megasas_instance *instance,
                union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc)
 {
-       if (instance->adapter_type == VENTURA_SERIES)
-               writel(le32_to_cpu(req_desc->u.low),
-                       &instance->reg_set->inbound_single_queue_port);
-       else {
 #if defined(writeq) && defined(CONFIG_64BIT)
-               u64 req_data = (((u64)le32_to_cpu(req_desc->u.high) << 32) |
-                               le32_to_cpu(req_desc->u.low));
+       u64 req_data = (((u64)le32_to_cpu(req_desc->u.high) << 32) |
+               le32_to_cpu(req_desc->u.low));
 
-               writeq(req_data, &instance->reg_set->inbound_low_queue_port);
+       writeq(req_data, &instance->reg_set->inbound_low_queue_port);
 #else
-               unsigned long flags;
-               spin_lock_irqsave(&instance->hba_lock, flags);
-               writel(le32_to_cpu(req_desc->u.low),
-                       &instance->reg_set->inbound_low_queue_port);
-               writel(le32_to_cpu(req_desc->u.high),
-                       &instance->reg_set->inbound_high_queue_port);
-               mmiowb();
-               spin_unlock_irqrestore(&instance->hba_lock, flags);
+       unsigned long flags;
+       spin_lock_irqsave(&instance->hba_lock, flags);
+       writel(le32_to_cpu(req_desc->u.low),
+               &instance->reg_set->inbound_low_queue_port);
+       writel(le32_to_cpu(req_desc->u.high),
+               &instance->reg_set->inbound_high_queue_port);
+       mmiowb();
+       spin_unlock_irqrestore(&instance->hba_lock, flags);
 #endif
-       }
 }
 
 /**
@@ -982,7 +976,6 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
        const char *sys_info;
        MFI_CAPABILITIES *drv_ops;
        u32 scratch_pad_2;
-       unsigned long flags;
        ktime_t time;
        bool cur_fw_64bit_dma_capable;
 
@@ -1121,14 +1114,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
                        break;
        }
 
-       /* For Ventura also IOC INIT required 64 bit Descriptor write. */
-       spin_lock_irqsave(&instance->hba_lock, flags);
-       writel(le32_to_cpu(req_desc.u.low),
-              &instance->reg_set->inbound_low_queue_port);
-       writel(le32_to_cpu(req_desc.u.high),
-              &instance->reg_set->inbound_high_queue_port);
-       mmiowb();
-       spin_unlock_irqrestore(&instance->hba_lock, flags);
+       megasas_fire_cmd_fusion(instance, &req_desc);
 
        wait_and_poll(instance, cmd, MFI_POLL_TIMEOUT_SECS);
 
index 59a87ca328d36d39d2d95a1e6bd39e25573065a9..0aafbfd1b7465c3a566c89392e73bfab1e037173 100644 (file)
@@ -6297,14 +6297,14 @@ _base_reset_handler(struct MPT3SAS_ADAPTER *ioc, int reset_phase)
 }
 
 /**
- * _wait_for_commands_to_complete - reset controller
+ * mpt3sas_wait_for_commands_to_complete - reset controller
  * @ioc: Pointer to MPT_ADAPTER structure
  *
  * This function is waiting 10s for all pending commands to complete
  * prior to putting controller in reset.
  */
-static void
-_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc)
+void
+mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc)
 {
        u32 ioc_state;
 
@@ -6377,7 +6377,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
                        is_fault = 1;
        }
        _base_reset_handler(ioc, MPT3_IOC_PRE_RESET);
-       _wait_for_commands_to_complete(ioc);
+       mpt3sas_wait_for_commands_to_complete(ioc);
        _base_mask_interrupts(ioc);
        r = _base_make_ioc_ready(ioc, type);
        if (r)
index 789bc421424bcf5ba7de5904e9fef073843e312d..99ccf83b8c518c91b794a0311ce79d4cd41de795 100644 (file)
@@ -1433,6 +1433,9 @@ void mpt3sas_base_update_missing_delay(struct MPT3SAS_ADAPTER *ioc,
 
 int mpt3sas_port_enable(struct MPT3SAS_ADAPTER *ioc);
 
+void
+mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc);
+
 
 /* scsih shared API */
 struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc,
index 74fca184dba9830acd9b812b7b27f71cd32bf724..a1cb0236c550330a5de690462f5ac48c4a3e8bad 100644 (file)
@@ -2835,7 +2835,8 @@ scsih_abort(struct scsi_cmnd *scmd)
        _scsih_tm_display_info(ioc, scmd);
 
        sas_device_priv_data = scmd->device->hostdata;
-       if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
+       if (!sas_device_priv_data || !sas_device_priv_data->sas_target ||
+           ioc->remove_host) {
                sdev_printk(KERN_INFO, scmd->device,
                        "device been deleted! scmd(%p)\n", scmd);
                scmd->result = DID_NO_CONNECT << 16;
@@ -2898,7 +2899,8 @@ scsih_dev_reset(struct scsi_cmnd *scmd)
        _scsih_tm_display_info(ioc, scmd);
 
        sas_device_priv_data = scmd->device->hostdata;
-       if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
+       if (!sas_device_priv_data || !sas_device_priv_data->sas_target ||
+           ioc->remove_host) {
                sdev_printk(KERN_INFO, scmd->device,
                        "device been deleted! scmd(%p)\n", scmd);
                scmd->result = DID_NO_CONNECT << 16;
@@ -2961,7 +2963,8 @@ scsih_target_reset(struct scsi_cmnd *scmd)
        _scsih_tm_display_info(ioc, scmd);
 
        sas_device_priv_data = scmd->device->hostdata;
-       if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
+       if (!sas_device_priv_data || !sas_device_priv_data->sas_target ||
+           ioc->remove_host) {
                starget_printk(KERN_INFO, starget, "target been deleted! scmd(%p)\n",
                        scmd);
                scmd->result = DID_NO_CONNECT << 16;
@@ -3019,7 +3022,7 @@ scsih_host_reset(struct scsi_cmnd *scmd)
            ioc->name, scmd);
        scsi_print_command(scmd);
 
-       if (ioc->is_driver_loading) {
+       if (ioc->is_driver_loading || ioc->remove_host) {
                pr_info(MPT3SAS_FMT "Blocking the host reset\n",
                    ioc->name);
                r = FAILED;
@@ -4453,7 +4456,7 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc)
                st = scsi_cmd_priv(scmd);
                mpt3sas_base_clear_st(ioc, st);
                scsi_dma_unmap(scmd);
-               if (ioc->pci_error_recovery)
+               if (ioc->pci_error_recovery || ioc->remove_host)
                        scmd->result = DID_NO_CONNECT << 16;
                else
                        scmd->result = DID_RESET << 16;
@@ -9739,6 +9742,10 @@ static void scsih_remove(struct pci_dev *pdev)
        unsigned long flags;
 
        ioc->remove_host = 1;
+
+       mpt3sas_wait_for_commands_to_complete(ioc);
+       _scsih_flush_running_cmds(ioc);
+
        _scsih_fw_event_cleanup_queue(ioc);
 
        spin_lock_irqsave(&ioc->fw_event_lock, flags);
@@ -9815,6 +9822,10 @@ scsih_shutdown(struct pci_dev *pdev)
        unsigned long flags;
 
        ioc->remove_host = 1;
+
+       mpt3sas_wait_for_commands_to_complete(ioc);
+       _scsih_flush_running_cmds(ioc);
+
        _scsih_fw_event_cleanup_queue(ioc);
 
        spin_lock_irqsave(&ioc->fw_event_lock, flags);
@@ -10547,7 +10558,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        snprintf(ioc->firmware_event_name, sizeof(ioc->firmware_event_name),
            "fw_event_%s%d", ioc->driver_name, ioc->id);
        ioc->firmware_event_thread = alloc_ordered_workqueue(
-           ioc->firmware_event_name, WQ_MEM_RECLAIM);
+           ioc->firmware_event_name, 0);
        if (!ioc->firmware_event_thread) {
                pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n",
                    ioc->name, __FILE__, __LINE__, __func__);
index 667d7697ba01d6a63da5b28ce6953d7cb9465e09..d09afe1b567d9dd2cbfd383fb771071e61d61609 100644 (file)
@@ -762,6 +762,11 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
 
        iscsi_cid = cqe->conn_id;
        qedi_conn = qedi->cid_que.conn_cid_tbl[iscsi_cid];
+       if (!qedi_conn) {
+               QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+                         "icid not found 0x%x\n", cqe->conn_id);
+               return;
+       }
 
        /* Based on this itt get the corresponding qedi_cmd */
        spin_lock_bh(&qedi_conn->tmf_work_lock);
index be7d6824581ac059015d0a8c74d7449074d09962..c9689f97c307ac34c0baeed35bfd60b13c2e96e2 100644 (file)
 struct name_list_extended {
        struct get_name_list_extended *l;
        dma_addr_t              ldma;
-       struct list_head        fcports;        /* protect by sess_list */
+       struct list_head        fcports;
+       spinlock_t              fcports_lock;
        u32                     size;
-       u8                      sent;
 };
 /*
  * Timeout timer counts in seconds
@@ -2217,6 +2217,7 @@ typedef struct {
 
 /* FCP-4 types */
 #define FC4_TYPE_FCP_SCSI      0x08
+#define FC4_TYPE_NVME          0x28
 #define FC4_TYPE_OTHER         0x0
 #define FC4_TYPE_UNKNOWN       0xff
 
index 5bf9a59432f6a10a23360e4f0ea9950ee285956c..403fa096f8c807bbc1510cad0bc7ee1230dddb6c 100644 (file)
@@ -3179,6 +3179,7 @@ int qla24xx_async_gidpn(scsi_qla_host_t *vha, fc_port_t *fcport)
        sp->free(sp);
        fcport->flags &= ~FCF_ASYNC_SENT;
 done:
+       fcport->flags &= ~FCF_ASYNC_ACTIVE;
        return rval;
 }
 
@@ -3370,6 +3371,7 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
        sp->free(sp);
        fcport->flags &= ~FCF_ASYNC_SENT;
 done:
+       fcport->flags &= ~FCF_ASYNC_ACTIVE;
        return rval;
 }
 
@@ -3971,6 +3973,9 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp)
        spin_lock_irqsave(&vha->work_lock, flags);
        vha->scan.scan_flags &= ~SF_SCANNING;
        spin_unlock_irqrestore(&vha->work_lock, flags);
+
+       if ((fc4type == FC4_TYPE_FCP_SCSI) && vha->flags.nvme_enabled)
+               qla24xx_async_gpnft(vha, FC4_TYPE_NVME);
 }
 
 static void qla2x00_async_gpnft_gnnft_sp_done(void *s, int res)
index 2dea1129d3967f04e775ef09cd71de8ba9ddce30..8d7fab3cd01d28e393a17263b13dab44fe855f8a 100644 (file)
@@ -213,6 +213,7 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
        sp->free(sp);
        fcport->flags &= ~FCF_ASYNC_SENT;
 done:
+       fcport->flags &= ~FCF_ASYNC_ACTIVE;
        return rval;
 }
 
@@ -263,7 +264,7 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 done_free_sp:
        sp->free(sp);
 done:
-       fcport->flags &= ~FCF_ASYNC_SENT;
+       fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
        return rval;
 }
 
@@ -271,6 +272,7 @@ void
 qla2x00_async_prlo_done(struct scsi_qla_host *vha, fc_port_t *fcport,
     uint16_t *data)
 {
+       fcport->flags &= ~FCF_ASYNC_ACTIVE;
        /* Don't re-login in target mode */
        if (!fcport->tgt_session)
                qla2x00_mark_device_lost(vha, fcport, 1, 0);
@@ -284,6 +286,7 @@ qla2x00_async_prlo_sp_done(void *s, int res)
        struct srb_iocb *lio = &sp->u.iocb_cmd;
        struct scsi_qla_host *vha = sp->vha;
 
+       sp->fcport->flags &= ~FCF_ASYNC_ACTIVE;
        if (!test_bit(UNLOADING, &vha->dpc_flags))
                qla2x00_post_async_prlo_done_work(sp->fcport->vha, sp->fcport,
                    lio->u.logio.data);
@@ -322,6 +325,7 @@ qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport)
 done_free_sp:
        sp->free(sp);
 done:
+       fcport->flags &= ~FCF_ASYNC_ACTIVE;
        return rval;
 }
 
@@ -375,6 +379,8 @@ qla2x00_async_adisc_sp_done(void *ptr, int res)
            "Async done-%s res %x %8phC\n",
            sp->name, res, sp->fcport->port_name);
 
+       sp->fcport->flags &= ~FCF_ASYNC_SENT;
+
        memset(&ea, 0, sizeof(ea));
        ea.event = FCME_ADISC_DONE;
        ea.rc = res;
@@ -425,7 +431,7 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
 done_free_sp:
        sp->free(sp);
 done:
-       fcport->flags &= ~FCF_ASYNC_SENT;
+       fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
        qla2x00_post_async_adisc_work(vha, fcport, data);
        return rval;
 }
@@ -643,8 +649,7 @@ qla24xx_async_gnl_sp_done(void *s, int res)
                    (loop_id & 0x7fff));
        }
 
-       spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
-       vha->gnl.sent = 0;
+       spin_lock_irqsave(&vha->gnl.fcports_lock, flags);
 
        INIT_LIST_HEAD(&h);
        fcport = tf = NULL;
@@ -653,12 +658,16 @@ qla24xx_async_gnl_sp_done(void *s, int res)
 
        list_for_each_entry_safe(fcport, tf, &h, gnl_entry) {
                list_del_init(&fcport->gnl_entry);
+               spin_lock(&vha->hw->tgt.sess_lock);
                fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+               spin_unlock(&vha->hw->tgt.sess_lock);
                ea.fcport = fcport;
 
                qla2x00_fcport_event_handler(vha, &ea);
        }
+       spin_unlock_irqrestore(&vha->gnl.fcports_lock, flags);
 
+       spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
        /* create new fcport if fw has knowledge of new sessions */
        for (i = 0; i < n; i++) {
                port_id_t id;
@@ -710,18 +719,21 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
        ql_dbg(ql_dbg_disc, vha, 0x20d9,
            "Async-gnlist WWPN %8phC \n", fcport->port_name);
 
-       spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+       spin_lock_irqsave(&vha->gnl.fcports_lock, flags);
+       if (!list_empty(&fcport->gnl_entry)) {
+               spin_unlock_irqrestore(&vha->gnl.fcports_lock, flags);
+               rval = QLA_SUCCESS;
+               goto done;
+       }
+
+       spin_lock(&vha->hw->tgt.sess_lock);
        fcport->disc_state = DSC_GNL;
        fcport->last_rscn_gen = fcport->rscn_gen;
        fcport->last_login_gen = fcport->login_gen;
+       spin_unlock(&vha->hw->tgt.sess_lock);
 
        list_add_tail(&fcport->gnl_entry, &vha->gnl.fcports);
-       if (vha->gnl.sent) {
-               spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
-               return QLA_SUCCESS;
-       }
-       vha->gnl.sent = 1;
-       spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+       spin_unlock_irqrestore(&vha->gnl.fcports_lock, flags);
 
        sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
        if (!sp)
@@ -1049,6 +1061,7 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
        fc_port_t *fcport = ea->fcport;
        struct port_database_24xx *pd;
        struct srb *sp = ea->sp;
+       uint8_t ls;
 
        pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in;
 
@@ -1061,7 +1074,12 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
        if (fcport->disc_state == DSC_DELETE_PEND)
                return;
 
-       switch (pd->current_login_state) {
+       if (fcport->fc4f_nvme)
+               ls = pd->current_login_state >> 4;
+       else
+               ls = pd->current_login_state & 0xf;
+
+       switch (ls) {
        case PDS_PRLI_COMPLETE:
                __qla24xx_parse_gpdb(vha, fcport, pd);
                break;
@@ -1151,8 +1169,9 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
        if (fcport->scan_state != QLA_FCPORT_FOUND)
                return 0;
 
-       if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-           (fcport->fw_login_state == DSC_LS_PRLI_PEND))
+       if ((fcport->loop_id != FC_NO_LOOP_ID) &&
+           ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
+            (fcport->fw_login_state == DSC_LS_PRLI_PEND)))
                return 0;
 
        if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
@@ -1527,6 +1546,7 @@ qla24xx_abort_sp_done(void *ptr, int res)
        srb_t *sp = ptr;
        struct srb_iocb *abt = &sp->u.iocb_cmd;
 
+       del_timer(&sp->u.iocb_cmd.timer);
        complete(&abt->u.abt.comp);
 }
 
@@ -1699,7 +1719,6 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 
                        set_bit(ea->fcport->loop_id, vha->hw->loop_id_map);
                        spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
-                       ea->fcport->loop_id = FC_NO_LOOP_ID;
                        ea->fcport->chip_reset = vha->hw->base_qpair->chip_reset;
                        ea->fcport->logout_on_delete = 1;
                        ea->fcport->send_els_logo = 0;
@@ -1791,6 +1810,7 @@ qla2x00_async_logout_done(struct scsi_qla_host *vha, fc_port_t *fcport,
        qla2x00_mark_device_lost(vha, fcport, 1, 0);
        qlt_logo_completion_handler(fcport, data[0]);
        fcport->login_gen++;
+       fcport->flags &= ~FCF_ASYNC_ACTIVE;
        return;
 }
 
@@ -1798,6 +1818,7 @@ void
 qla2x00_async_adisc_done(struct scsi_qla_host *vha, fc_port_t *fcport,
     uint16_t *data)
 {
+       fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
        if (data[0] == MBS_COMMAND_COMPLETE) {
                qla2x00_update_fcport(vha, fcport);
 
@@ -1805,7 +1826,6 @@ qla2x00_async_adisc_done(struct scsi_qla_host *vha, fc_port_t *fcport,
        }
 
        /* Retry login. */
-       fcport->flags &= ~FCF_ASYNC_SENT;
        if (data[1] & QLA_LOGIO_LOGIN_RETRIED)
                set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
        else
index afcb5567998a51d6b1aae1c4872d0b2d5312ec71..5c5dcca4d1da4aee502f2ac33b3dd13a8375812c 100644 (file)
@@ -454,7 +454,7 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, struct req_que *req,
        ha->req_q_map[0] = req;
        set_bit(0, ha->rsp_qid_map);
        set_bit(0, ha->req_qid_map);
-       return 1;
+       return 0;
 
 fail_qpair_map:
        kfree(ha->base_qpair);
@@ -471,6 +471,9 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, struct req_que *req,
 
 static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
 {
+       if (!ha->req_q_map)
+               return;
+
        if (IS_QLAFX00(ha)) {
                if (req && req->ring_fx00)
                        dma_free_coherent(&ha->pdev->dev,
@@ -481,14 +484,17 @@ static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
                (req->length + 1) * sizeof(request_t),
                req->ring, req->dma);
 
-       if (req)
+       if (req) {
                kfree(req->outstanding_cmds);
-
-       kfree(req);
+               kfree(req);
+       }
 }
 
 static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
 {
+       if (!ha->rsp_q_map)
+               return;
+
        if (IS_QLAFX00(ha)) {
                if (rsp && rsp->ring)
                        dma_free_coherent(&ha->pdev->dev,
@@ -499,7 +505,8 @@ static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
                (rsp->length + 1) * sizeof(response_t),
                rsp->ring, rsp->dma);
        }
-       kfree(rsp);
+       if (rsp)
+               kfree(rsp);
 }
 
 static void qla2x00_free_queues(struct qla_hw_data *ha)
@@ -1723,6 +1730,8 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
        struct qla_tgt_cmd *cmd;
        uint8_t trace = 0;
 
+       if (!ha->req_q_map)
+               return;
        spin_lock_irqsave(qp->qp_lock_ptr, flags);
        req = qp->req;
        for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
@@ -3095,14 +3104,14 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
        /* Set up the irqs */
        ret = qla2x00_request_irqs(ha, rsp);
        if (ret)
-               goto probe_hw_failed;
+               goto probe_failed;
 
        /* Alloc arrays of request and response ring ptrs */
-       if (!qla2x00_alloc_queues(ha, req, rsp)) {
+       if (qla2x00_alloc_queues(ha, req, rsp)) {
                ql_log(ql_log_fatal, base_vha, 0x003d,
                    "Failed to allocate memory for queue pointers..."
                    "aborting.\n");
-               goto probe_init_failed;
+               goto probe_failed;
        }
 
        if (ha->mqenable && shost_use_blk_mq(host)) {
@@ -3387,15 +3396,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
        return 0;
 
-probe_init_failed:
-       qla2x00_free_req_que(ha, req);
-       ha->req_q_map[0] = NULL;
-       clear_bit(0, ha->req_qid_map);
-       qla2x00_free_rsp_que(ha, rsp);
-       ha->rsp_q_map[0] = NULL;
-       clear_bit(0, ha->rsp_qid_map);
-       ha->max_req_queues = ha->max_rsp_queues = 0;
-
 probe_failed:
        if (base_vha->timer_active)
                qla2x00_stop_timer(base_vha);
@@ -4508,11 +4508,17 @@ qla2x00_mem_free(struct qla_hw_data *ha)
        if (ha->init_cb)
                dma_free_coherent(&ha->pdev->dev, ha->init_cb_size,
                        ha->init_cb, ha->init_cb_dma);
-       vfree(ha->optrom_buffer);
-       kfree(ha->nvram);
-       kfree(ha->npiv_info);
-       kfree(ha->swl);
-       kfree(ha->loop_id_map);
+
+       if (ha->optrom_buffer)
+               vfree(ha->optrom_buffer);
+       if (ha->nvram)
+               kfree(ha->nvram);
+       if (ha->npiv_info)
+               kfree(ha->npiv_info);
+       if (ha->swl)
+               kfree(ha->swl);
+       if (ha->loop_id_map)
+               kfree(ha->loop_id_map);
 
        ha->srb_mempool = NULL;
        ha->ctx_mempool = NULL;
@@ -4528,6 +4534,15 @@ qla2x00_mem_free(struct qla_hw_data *ha)
        ha->ex_init_cb_dma = 0;
        ha->async_pd = NULL;
        ha->async_pd_dma = 0;
+       ha->loop_id_map = NULL;
+       ha->npiv_info = NULL;
+       ha->optrom_buffer = NULL;
+       ha->swl = NULL;
+       ha->nvram = NULL;
+       ha->mctp_dump = NULL;
+       ha->dcbx_tlv = NULL;
+       ha->xgmac_data = NULL;
+       ha->sfp_data = NULL;
 
        ha->s_dma_pool = NULL;
        ha->dl_dma_pool = NULL;
@@ -4577,6 +4592,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 
        spin_lock_init(&vha->work_lock);
        spin_lock_init(&vha->cmd_list_lock);
+       spin_lock_init(&vha->gnl.fcports_lock);
        init_waitqueue_head(&vha->fcport_waitQ);
        init_waitqueue_head(&vha->vref_waitq);
 
@@ -4806,9 +4822,12 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
                        fcport->d_id = e->u.new_sess.id;
                        fcport->flags |= FCF_FABRIC_DEVICE;
                        fcport->fw_login_state = DSC_LS_PLOGI_PEND;
-                       if (e->u.new_sess.fc4_type == FC4_TYPE_FCP_SCSI)
+                       if (e->u.new_sess.fc4_type == FC4_TYPE_FCP_SCSI) {
                                fcport->fc4_type = FC4_TYPE_FCP_SCSI;
-
+                       } else if (e->u.new_sess.fc4_type == FC4_TYPE_NVME) {
+                               fcport->fc4_type = FC4_TYPE_OTHER;
+                               fcport->fc4f_nvme = FC4_TYPE_NVME;
+                       }
                        memcpy(fcport->port_name, e->u.new_sess.port_name,
                            WWN_SIZE);
                } else {
@@ -4877,6 +4896,8 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
                        }
                        qlt_plogi_ack_unref(vha, pla);
                } else {
+                       fc_port_t *dfcp = NULL;
+
                        spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
                        tfcp = qla2x00_find_fcport_by_nportid(vha,
                            &e->u.new_sess.id, 1);
@@ -4899,11 +4920,13 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
                                default:
                                        fcport->login_pause = 1;
                                        tfcp->conflict = fcport;
-                                       qlt_schedule_sess_for_deletion(tfcp);
+                                       dfcp = tfcp;
                                        break;
                                }
                        }
                        spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+                       if (dfcp)
+                               qlt_schedule_sess_for_deletion(tfcp);
 
                        wwn = wwn_to_u64(fcport->node_name);
 
index 896b2d8bd8035ece6c42e99455bad596553cc594..b49ac85f3de2254694e728f75d4cf2fa7e260662 100644 (file)
@@ -1224,10 +1224,10 @@ static void qla24xx_chk_fcp_state(struct fc_port *sess)
        }
 }
 
-/* ha->tgt.sess_lock supposed to be held on entry */
 void qlt_schedule_sess_for_deletion(struct fc_port *sess)
 {
        struct qla_tgt *tgt = sess->tgt;
+       struct qla_hw_data *ha = sess->vha->hw;
        unsigned long flags;
 
        if (sess->disc_state == DSC_DELETE_PEND)
@@ -1244,16 +1244,16 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess)
                        return;
        }
 
+       spin_lock_irqsave(&ha->tgt.sess_lock, flags);
        if (sess->deleted == QLA_SESS_DELETED)
                sess->logout_on_delete = 0;
 
-       spin_lock_irqsave(&sess->vha->work_lock, flags);
        if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
-               spin_unlock_irqrestore(&sess->vha->work_lock, flags);
+               spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
                return;
        }
        sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
-       spin_unlock_irqrestore(&sess->vha->work_lock, flags);
+       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
        sess->disc_state = DSC_DELETE_PEND;
 
@@ -1262,13 +1262,10 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess)
        ql_dbg(ql_dbg_tgt, sess->vha, 0xe001,
            "Scheduling sess %p for deletion\n", sess);
 
-       /* use cancel to push work element through before re-queue */
-       cancel_work_sync(&sess->del_work);
        INIT_WORK(&sess->del_work, qla24xx_delete_sess_fn);
-       queue_work(sess->vha->hw->wq, &sess->del_work);
+       WARN_ON(!queue_work(sess->vha->hw->wq, &sess->del_work));
 }
 
-/* ha->tgt.sess_lock supposed to be held on entry */
 static void qlt_clear_tgt_db(struct qla_tgt *tgt)
 {
        struct fc_port *sess;
@@ -1451,8 +1448,8 @@ qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen)
        ql_dbg(ql_dbg_tgt_mgt, vha, 0xf008, "qla_tgt_fc_port_deleted %p", sess);
 
        sess->local = 1;
-       qlt_schedule_sess_for_deletion(sess);
        spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+       qlt_schedule_sess_for_deletion(sess);
 }
 
 static inline int test_tgt_sess_count(struct qla_tgt *tgt)
@@ -1512,10 +1509,8 @@ int qlt_stop_phase1(struct qla_tgt *tgt)
         * Lock is needed, because we still can get an incoming packet.
         */
        mutex_lock(&vha->vha_tgt.tgt_mutex);
-       spin_lock_irqsave(&ha->tgt.sess_lock, flags);
        tgt->tgt_stop = 1;
        qlt_clear_tgt_db(tgt);
-       spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
        mutex_unlock(&vha->vha_tgt.tgt_mutex);
        mutex_unlock(&qla_tgt_mutex);
 
index d042915ce895517b472c52a75dbf8927bf9eed86..ca53a5f785ee239cee9b7de7c10edf75b01ff2d8 100644 (file)
@@ -223,7 +223,8 @@ static void scsi_eh_reset(struct scsi_cmnd *scmd)
 
 static void scsi_eh_inc_host_failed(struct rcu_head *head)
 {
-       struct Scsi_Host *shost = container_of(head, typeof(*shost), rcu);
+       struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu);
+       struct Scsi_Host *shost = scmd->device->host;
        unsigned long flags;
 
        spin_lock_irqsave(shost->host_lock, flags);
@@ -259,7 +260,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
         * Ensure that all tasks observe the host state change before the
         * host_failed change.
         */
-       call_rcu(&shost->rcu, scsi_eh_inc_host_failed);
+       call_rcu(&scmd->rcu, scsi_eh_inc_host_failed);
 }
 
 /**
index a86df9ca7d1c88aceb1d1e2298f48fbdb8bb3c49..c84f931388f226cdab2071245f7308d2c0b035b3 100644 (file)
@@ -671,6 +671,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
        if (!blk_rq_is_scsi(req)) {
                WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
                cmd->flags &= ~SCMD_INITIALIZED;
+               destroy_rcu_head(&cmd->rcu);
        }
 
        if (req->mq_ctx) {
@@ -720,6 +721,8 @@ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
                int result)
 {
        switch (host_byte(result)) {
+       case DID_OK:
+               return BLK_STS_OK;
        case DID_TRANSPORT_FAILFAST:
                return BLK_STS_TRANSPORT;
        case DID_TARGET_FAILURE:
@@ -1151,6 +1154,7 @@ static void scsi_initialize_rq(struct request *rq)
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
 
        scsi_req_init(&cmd->req);
+       init_rcu_head(&cmd->rcu);
        cmd->jiffies_at_alloc = jiffies;
        cmd->retries = 0;
 }
index bff21e636dddefba176ff6a435cd23863335c3f8..3541caf3fceb4441b220d50b4fac1dc8b98b751d 100644 (file)
@@ -2595,6 +2595,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer)
        int res;
        struct scsi_device *sdp = sdkp->device;
        struct scsi_mode_data data;
+       int disk_ro = get_disk_ro(sdkp->disk);
        int old_wp = sdkp->write_prot;
 
        set_disk_ro(sdkp->disk, 0);
@@ -2635,7 +2636,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer)
                          "Test WP failed, assume Write Enabled\n");
        } else {
                sdkp->write_prot = ((data.device_specific & 0x80) != 0);
-               set_disk_ro(sdkp->disk, sdkp->write_prot);
+               set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro);
                if (sdkp->first_scan || old_wp != sdkp->write_prot) {
                        sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n",
                                  sdkp->write_prot ? "on" : "off");
index 6c348a211ebb1e73d5803be0957aacc4c179a18a..89cf4498f5358e17c5473783a58603ca0a38c9a5 100644 (file)
@@ -403,7 +403,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
  */
 static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 {
-       u64 zone_blocks;
+       u64 zone_blocks = 0;
        sector_t block = 0;
        unsigned char *buf;
        unsigned char *rec;
@@ -421,10 +421,8 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 
        /* Do a report zone to get the same field */
        ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0);
-       if (ret) {
-               zone_blocks = 0;
-               goto out;
-       }
+       if (ret)
+               goto out_free;
 
        same = buf[4] & 0x0f;
        if (same > 0) {
@@ -464,7 +462,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
                        ret = sd_zbc_report_zones(sdkp, buf,
                                                  SD_ZBC_BUF_SIZE, block);
                        if (ret)
-                               return ret;
+                               goto out_free;
                }
 
        } while (block < sdkp->capacity);
@@ -472,35 +470,32 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
        zone_blocks = sdkp->zone_blocks;
 
 out:
-       kfree(buf);
-
        if (!zone_blocks) {
                if (sdkp->first_scan)
                        sd_printk(KERN_NOTICE, sdkp,
                                  "Devices with non constant zone "
                                  "size are not supported\n");
-               return -ENODEV;
-       }
-
-       if (!is_power_of_2(zone_blocks)) {
+               ret = -ENODEV;
+       } else if (!is_power_of_2(zone_blocks)) {
                if (sdkp->first_scan)
                        sd_printk(KERN_NOTICE, sdkp,
                                  "Devices with non power of 2 zone "
                                  "size are not supported\n");
-               return -ENODEV;
-       }
-
-       if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
+               ret = -ENODEV;
+       } else if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
                if (sdkp->first_scan)
                        sd_printk(KERN_NOTICE, sdkp,
                                  "Zone size too large\n");
-               return -ENODEV;
+               ret = -ENODEV;
+       } else {
+               sdkp->zone_blocks = zone_blocks;
+               sdkp->zone_shift = ilog2(zone_blocks);
        }
 
-       sdkp->zone_blocks = zone_blocks;
-       sdkp->zone_shift = ilog2(zone_blocks);
+out_free:
+       kfree(buf);
 
-       return 0;
+       return ret;
 }
 
 /**
index 6be5ab32c94fef437f552a38f79bdc36825efb98..8c51d628b52edfd7e891182919fab16b469b0f3c 100644 (file)
@@ -1311,7 +1311,8 @@ static int storvsc_do_io(struct hv_device *device,
                         */
                        cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
                                    cpumask_of_node(cpu_to_node(q_num)));
-                       for_each_cpu(tgt_cpu, &alloced_mask) {
+                       for_each_cpu_wrap(tgt_cpu, &alloced_mask,
+                                       outgoing_channel->target_cpu + 1) {
                                if (tgt_cpu != outgoing_channel->target_cpu) {
                                        outgoing_channel =
                                        stor_device->stor_chns[tgt_cpu];
index e4f5bb056fd275b84d18a80ae1917e86baf1e319..ba3cfa8e279b508fc632147da0aab110e81724ca 100644 (file)
@@ -2443,39 +2443,21 @@ struct cgr_comp {
        struct completion completion;
 };
 
-static int qman_delete_cgr_thread(void *p)
+static void qman_delete_cgr_smp_call(void *p)
 {
-       struct cgr_comp *cgr_comp = (struct cgr_comp *)p;
-       int ret;
-
-       ret = qman_delete_cgr(cgr_comp->cgr);
-       complete(&cgr_comp->completion);
-
-       return ret;
+       qman_delete_cgr((struct qman_cgr *)p);
 }
 
 void qman_delete_cgr_safe(struct qman_cgr *cgr)
 {
-       struct task_struct *thread;
-       struct cgr_comp cgr_comp;
-
        preempt_disable();
        if (qman_cgr_cpus[cgr->cgrid] != smp_processor_id()) {
-               init_completion(&cgr_comp.completion);
-               cgr_comp.cgr = cgr;
-               thread = kthread_create(qman_delete_cgr_thread, &cgr_comp,
-                                       "cgr_del");
-
-               if (IS_ERR(thread))
-                       goto out;
-
-               kthread_bind(thread, qman_cgr_cpus[cgr->cgrid]);
-               wake_up_process(thread);
-               wait_for_completion(&cgr_comp.completion);
+               smp_call_function_single(qman_cgr_cpus[cgr->cgrid],
+                                        qman_delete_cgr_smp_call, cgr, true);
                preempt_enable();
                return;
        }
-out:
+
        qman_delete_cgr(cgr);
        preempt_enable();
 }
index 53f7275d6cbdb513001582555f898adb35e1cc5b..750f931974119804734945a501e0786ca003977d 100644 (file)
@@ -348,7 +348,7 @@ static int imx_gpc_old_dt_init(struct device *dev, struct regmap *regmap,
                if (i == 1) {
                        domain->supply = devm_regulator_get(dev, "pu");
                        if (IS_ERR(domain->supply))
-                               return PTR_ERR(domain->supply);;
+                               return PTR_ERR(domain->supply);
 
                        ret = imx_pgc_get_clocks(dev, domain);
                        if (ret)
@@ -470,13 +470,21 @@ static int imx_gpc_probe(struct platform_device *pdev)
 
 static int imx_gpc_remove(struct platform_device *pdev)
 {
+       struct device_node *pgc_node;
        int ret;
 
+       pgc_node = of_get_child_by_name(pdev->dev.of_node, "pgc");
+
+       /* bail out if DT too old and doesn't provide the necessary info */
+       if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells") &&
+           !pgc_node)
+               return 0;
+
        /*
         * If the old DT binding is used the toplevel driver needs to
         * de-register the power domains
         */
-       if (!of_get_child_by_name(pdev->dev.of_node, "pgc")) {
+       if (!pgc_node) {
                of_genpd_del_provider(pdev->dev.of_node);
 
                ret = pm_genpd_remove(&imx_gpc_domains[GPC_PGC_DOMAIN_PU].base);
index b3f5cae98ea620cb1726a41fef19979d4811ee0a..9371651d801776702dd34cde1d04d3501f1c54ef 100644 (file)
@@ -117,7 +117,7 @@ config SSB_SERIAL
 
 config SSB_DRIVER_PCICORE_POSSIBLE
        bool
-       depends on SSB_PCIHOST
+       depends on SSB_PCIHOST && SSB = y
        default y
 
 config SSB_DRIVER_PCICORE
index 65420a9f0e8202b1fb7d1214239c717b8fb25bbd..116594413f66dc093b37e99b1b38c4cca3fe028d 100644 (file)
@@ -522,7 +522,7 @@ static int ssb_devices_register(struct ssb_bus *bus)
                        /* Set dev to NULL to not unregister
                         * dev on error unwinding. */
                        sdev->dev = NULL;
-                       kfree(devwrap);
+                       put_device(dev);
                        goto error;
                }
                dev_idx++;
@@ -1116,7 +1116,7 @@ static bool ssb_dma_translation_special_bit(struct ssb_device *dev)
                        chip_id == 43231 || chip_id == 43222);
        }
 
-       return 0;
+       return false;
 }
 
 u32 ssb_dma_translation(struct ssb_device *dev)
index 6dbba5aff19117c6b85c05a4a114be93068a82ab..86580b6df33d05c44b50b2dfda758d95c34b4386 100644 (file)
@@ -326,24 +326,23 @@ static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin)
        mutex_lock(&ashmem_mutex);
 
        if (asma->size == 0) {
-               ret = -EINVAL;
-               goto out;
+               mutex_unlock(&ashmem_mutex);
+               return -EINVAL;
        }
 
        if (!asma->file) {
-               ret = -EBADF;
-               goto out;
+               mutex_unlock(&ashmem_mutex);
+               return -EBADF;
        }
 
+       mutex_unlock(&ashmem_mutex);
+
        ret = vfs_llseek(asma->file, offset, origin);
        if (ret < 0)
-               goto out;
+               return ret;
 
        /** Copy f_pos from backing file, since f_ops->llseek() sets it */
        file->f_pos = asma->file->f_pos;
-
-out:
-       mutex_unlock(&ashmem_mutex);
        return ret;
 }
 
@@ -702,16 +701,14 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd,
        size_t pgstart, pgend;
        int ret = -EINVAL;
 
+       if (unlikely(copy_from_user(&pin, p, sizeof(pin))))
+               return -EFAULT;
+
        mutex_lock(&ashmem_mutex);
 
        if (unlikely(!asma->file))
                goto out_unlock;
 
-       if (unlikely(copy_from_user(&pin, p, sizeof(pin)))) {
-               ret = -EFAULT;
-               goto out_unlock;
-       }
-
        /* per custom, you can pass zero for len to mean "everything onward" */
        if (!pin.len)
                pin.len = PAGE_ALIGN(asma->size) - pin.offset;
index e618a87521a35606cb97cae84d50fbcdfe13f688..9d733471ca2ede9c997e7a41262f7c1a3ca6635a 100644 (file)
@@ -475,8 +475,7 @@ unsigned int comedi_nsamples_left(struct comedi_subdevice *s,
        struct comedi_cmd *cmd = &async->cmd;
 
        if (cmd->stop_src == TRIG_COUNT) {
-               unsigned int nscans = nsamples / cmd->scan_end_arg;
-               unsigned int scans_left = __comedi_nscans_left(s, nscans);
+               unsigned int scans_left = __comedi_nscans_left(s, cmd->stop_arg);
                unsigned int scan_pos =
                    comedi_bytes_to_samples(s, async->scan_progress);
                unsigned long long samples_left = 0;
index 5c0e59e8fe46b087b58b213d6ee04ecfc7f36e3a..cbe98bc2b998276fd95b2d8086a6fabcf2351bf7 100644 (file)
@@ -2180,6 +2180,12 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
                                }
                                if (tty_hung_up_p(file))
                                        break;
+                               /*
+                                * Abort readers for ttys which never actually
+                                * get hung up.  See __tty_hangup().
+                                */
+                               if (test_bit(TTY_HUPPING, &tty->flags))
+                                       break;
                                if (!timeout)
                                        break;
                                if (file->f_flags & O_NONBLOCK) {
index 54adf8d563501ab844cea41edf0427fdb83a1a8e..a93f77ab3da089d9db5874c81a631f6a2f090a19 100644 (file)
@@ -3387,11 +3387,9 @@ static int serial_pci_is_class_communication(struct pci_dev *dev)
        /*
         * If it is not a communications device or the programming
         * interface is greater than 6, give up.
-        *
-        * (Should we try to make guesses for multiport serial devices
-        * later?)
         */
        if ((((dev->class >> 8) != PCI_CLASS_COMMUNICATION_SERIAL) &&
+            ((dev->class >> 8) != PCI_CLASS_COMMUNICATION_MULTISERIAL) &&
             ((dev->class >> 8) != PCI_CLASS_COMMUNICATION_MODEM)) ||
            (dev->class & 0xff) > 6)
                return -ENODEV;
@@ -3428,6 +3426,12 @@ serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board)
 {
        int num_iomem, num_port, first_port = -1, i;
 
+       /*
+        * Should we try to make guesses for multiport serial devices later?
+        */
+       if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_MULTISERIAL)
+               return -ENODEV;
+
        num_iomem = num_port = 0;
        for (i = 0; i < PCI_NUM_BAR_RESOURCES; i++) {
                if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
@@ -4698,6 +4702,17 @@ static const struct pci_device_id serial_pci_tbl[] = {
        {       PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400,
                PCI_ANY_ID, PCI_ANY_ID, 0, 0,    /* 135a.0dc0 */
                pbn_b2_4_115200 },
+       /*
+        * BrainBoxes UC-260
+        */
+       {       PCI_VENDOR_ID_INTASHIELD, 0x0D21,
+               PCI_ANY_ID, PCI_ANY_ID,
+               PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
+               pbn_b2_4_115200 },
+       {       PCI_VENDOR_ID_INTASHIELD, 0x0E34,
+               PCI_ANY_ID, PCI_ANY_ID,
+                PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
+               pbn_b2_4_115200 },
        /*
         * Perle PCI-RAS cards
         */
index df46a9e88c34dd89eaa766603fd5738c6d99ac9a..e287fe8f10fc035072f0f7ced35c0860616fe100 100644 (file)
@@ -1734,6 +1734,7 @@ static void atmel_get_ip_name(struct uart_port *port)
                switch (version) {
                case 0x302:
                case 0x10213:
+               case 0x10302:
                        dev_dbg(port->dev, "This version is usart\n");
                        atmel_port->has_frac_baudrate = true;
                        atmel_port->has_hw_timer = true;
index 870e84fb6e39ebb8fbfbc31f1a3608caddf6072d..a24278380fec2a6a67b2513514f08ecd6688dfe2 100644 (file)
@@ -245,11 +245,12 @@ int __init of_setup_earlycon(const struct earlycon_id *match,
        }
        port->mapbase = addr;
        port->uartclk = BASE_BAUD * 16;
-       port->membase = earlycon_map(port->mapbase, SZ_4K);
 
        val = of_get_flat_dt_prop(node, "reg-offset", NULL);
        if (val)
                port->mapbase += be32_to_cpu(*val);
+       port->membase = earlycon_map(port->mapbase, SZ_4K);
+
        val = of_get_flat_dt_prop(node, "reg-shift", NULL);
        if (val)
                port->regshift = be32_to_cpu(*val);
index 1d7ca382bc12b2381eba26cf93eb67ec80853977..a33c685af99076953d99718ee05b0e2e97d4b803 100644 (file)
@@ -2093,7 +2093,7 @@ static int serial_imx_probe(struct platform_device *pdev)
        uart_get_rs485_mode(&pdev->dev, &sport->port.rs485);
 
        if (sport->port.rs485.flags & SER_RS485_ENABLED &&
-           (!sport->have_rtscts || !sport->have_rtsgpio))
+           (!sport->have_rtscts && !sport->have_rtsgpio))
                dev_err(&pdev->dev, "no RTS control, disabling rs485\n");
 
        imx_rs485_config(&sport->port, &sport->port.rs485);
index c8dde56b532b2d88fdf15f29f7e81251c39be4ff..35b9201db3b4bb8bcb9219259ec04c98fbab6250 100644 (file)
@@ -1144,6 +1144,8 @@ static int uart_do_autoconfig(struct tty_struct *tty,struct uart_state *state)
                uport->ops->config_port(uport, flags);
 
                ret = uart_startup(tty, state, 1);
+               if (ret == 0)
+                       tty_port_set_initialized(port, true);
                if (ret > 0)
                        ret = 0;
        }
index 7257c078e1554b198a29221b9ce142b5f98305ab..44adf9db38f8955b6cf730108abe5d72c87ef3e9 100644 (file)
@@ -885,6 +885,8 @@ static void sci_receive_chars(struct uart_port *port)
                /* Tell the rest of the system the news. New characters! */
                tty_flip_buffer_push(tport);
        } else {
+               /* TTY buffers full; read from RX reg to prevent lockup */
+               serial_port_in(port, SCxRDR);
                serial_port_in(port, SCxSR); /* dummy read */
                sci_clear_SCxSR(port, SCxSR_RDxF_CLEAR(port));
        }
index eb9133b472f4849e831fd1a4a1c70cf6e917ba28..63114ea35ec1b8f3620ec888155b929ed1915200 100644 (file)
@@ -586,6 +586,14 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
                return;
        }
 
+       /*
+        * Some console devices aren't actually hung up for technical and
+        * historical reasons, which can lead to indefinite interruptible
+        * sleep in n_tty_read().  The following explicitly tells
+        * n_tty_read() to abort readers.
+        */
+       set_bit(TTY_HUPPING, &tty->flags);
+
        /* inuse_filps is protected by the single tty lock,
           this really needs to change if we want to flush the
           workqueue with the lock held */
@@ -640,6 +648,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
         * from the ldisc side, which is now guaranteed.
         */
        set_bit(TTY_HUPPED, &tty->flags);
+       clear_bit(TTY_HUPPING, &tty->flags);
        tty_unlock(tty);
 
        if (f)
index c64cf6c4a83dde975fa90188f2b5f08cec91d688..0c11d40a12bcfa9a012edb8a4bf0d41d6a844365 100644 (file)
@@ -151,6 +151,10 @@ int usb_control_msg(struct usb_device *dev, unsigned int pipe, __u8 request,
 
        ret = usb_internal_control_msg(dev, pipe, dr, data, size, timeout);
 
+       /* Linger a bit, prior to the next control message. */
+       if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
+               msleep(200);
+
        kfree(dr);
 
        return ret;
index f4a548471f0fabb09ed78688cad39e0e085a3ca9..54b019e267c5d4b28613ccb99ab253749b016ffa 100644 (file)
@@ -230,7 +230,8 @@ static const struct usb_device_id usb_quirk_list[] = {
        { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT },
 
        /* Corsair Strafe RGB */
-       { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT },
+       { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT |
+         USB_QUIRK_DELAY_CTRL_MSG },
 
        /* Corsair K70 LUX */
        { USB_DEVICE(0x1b1c, 0x1b36), .driver_info = USB_QUIRK_DELAY_INIT },
index 03fd20f0b49613aaffba14aa117e48a7781391a5..c4a47496d2fb92a9ee7488a9e71446bfac0a488a 100644 (file)
@@ -137,7 +137,7 @@ static void dwc2_set_stm32f4x9_fsotg_params(struct dwc2_hsotg *hsotg)
        p->activate_stm_fs_transceiver = true;
 }
 
-static void dwc2_set_stm32f7xx_hsotg_params(struct dwc2_hsotg *hsotg)
+static void dwc2_set_stm32f7_hsotg_params(struct dwc2_hsotg *hsotg)
 {
        struct dwc2_core_params *p = &hsotg->params;
 
@@ -164,8 +164,8 @@ const struct of_device_id dwc2_of_match_table[] = {
        { .compatible = "st,stm32f4x9-fsotg",
          .data = dwc2_set_stm32f4x9_fsotg_params },
        { .compatible = "st,stm32f4x9-hsotg" },
-       { .compatible = "st,stm32f7xx-hsotg",
-         .data = dwc2_set_stm32f7xx_hsotg_params },
+       { .compatible = "st,stm32f7-hsotg",
+         .data = dwc2_set_stm32f7_hsotg_params },
        {},
 };
 MODULE_DEVICE_TABLE(of, dwc2_of_match_table);
index f1d838a4acd61532823883a0debe63b630bcec8e..e94bf91cc58a8821978418b6286fc0f8f2cd6e1d 100644 (file)
@@ -175,7 +175,7 @@ void dwc3_set_mode(struct dwc3 *dwc, u32 mode)
        dwc->desired_dr_role = mode;
        spin_unlock_irqrestore(&dwc->lock, flags);
 
-       queue_work(system_power_efficient_wq, &dwc->drd_work);
+       queue_work(system_freezable_wq, &dwc->drd_work);
 }
 
 u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type)
index 37557651b600aa8aed271dd128e44745d190cbaf..c13befa311107d9b4447a63f761e9bffb420534a 100644 (file)
@@ -507,7 +507,6 @@ static int eem_unwrap(struct gether *port,
                                                0,
                                                GFP_ATOMIC);
                        if (unlikely(!skb3)) {
-                               DBG(cdev, "unable to realign EEM packet\n");
                                dev_kfree_skb_any(skb2);
                                continue;
                        }
index c2592d883f67c45f980c21cfa4181f77fac14760..d2428a9e89003adcb5949c0636d3c0ffd940766d 100644 (file)
@@ -1538,7 +1538,6 @@ ffs_fs_kill_sb(struct super_block *sb)
        if (sb->s_fs_info) {
                ffs_release_dev(sb->s_fs_info);
                ffs_data_closed(sb->s_fs_info);
-               ffs_data_put(sb->s_fs_info);
        }
 }
 
index 84f88fa411cde338981dd9c7686d916dc22fd6ed..d088c340e4d06eb23bf500f2a5dca7a8610460f7 100644 (file)
@@ -447,7 +447,8 @@ static int ohci_init (struct ohci_hcd *ohci)
        struct usb_hcd *hcd = ohci_to_hcd(ohci);
 
        /* Accept arbitrarily long scatter-gather lists */
-       hcd->self.sg_tablesize = ~0;
+       if (!(hcd->driver->flags & HCD_LOCAL_MEM))
+               hcd->self.sg_tablesize = ~0;
 
        if (distrust_firmware)
                ohci->flags |= OHCI_QUIRK_HUB_POWER;
index a1ab8acf39ba5b420dc6932d860a57d150c0ba05..c359bae7b7542cda0d6fbf7d3867b62ab234ff66 100644 (file)
@@ -328,13 +328,14 @@ dbc_ep_do_queue(struct dbc_ep *dep, struct dbc_request *req)
 int dbc_ep_queue(struct dbc_ep *dep, struct dbc_request *req,
                 gfp_t gfp_flags)
 {
+       unsigned long           flags;
        struct xhci_dbc         *dbc = dep->dbc;
        int                     ret = -ESHUTDOWN;
 
-       spin_lock(&dbc->lock);
+       spin_lock_irqsave(&dbc->lock, flags);
        if (dbc->state == DS_CONFIGURED)
                ret = dbc_ep_do_queue(dep, req);
-       spin_unlock(&dbc->lock);
+       spin_unlock_irqrestore(&dbc->lock, flags);
 
        mod_delayed_work(system_wq, &dbc->event_work, 0);
 
@@ -521,15 +522,16 @@ static void xhci_do_dbc_stop(struct xhci_hcd *xhci)
 static int xhci_dbc_start(struct xhci_hcd *xhci)
 {
        int                     ret;
+       unsigned long           flags;
        struct xhci_dbc         *dbc = xhci->dbc;
 
        WARN_ON(!dbc);
 
        pm_runtime_get_sync(xhci_to_hcd(xhci)->self.controller);
 
-       spin_lock(&dbc->lock);
+       spin_lock_irqsave(&dbc->lock, flags);
        ret = xhci_do_dbc_start(xhci);
-       spin_unlock(&dbc->lock);
+       spin_unlock_irqrestore(&dbc->lock, flags);
 
        if (ret) {
                pm_runtime_put(xhci_to_hcd(xhci)->self.controller);
@@ -541,6 +543,7 @@ static int xhci_dbc_start(struct xhci_hcd *xhci)
 
 static void xhci_dbc_stop(struct xhci_hcd *xhci)
 {
+       unsigned long           flags;
        struct xhci_dbc         *dbc = xhci->dbc;
        struct dbc_port         *port = &dbc->port;
 
@@ -551,9 +554,9 @@ static void xhci_dbc_stop(struct xhci_hcd *xhci)
        if (port->registered)
                xhci_dbc_tty_unregister_device(xhci);
 
-       spin_lock(&dbc->lock);
+       spin_lock_irqsave(&dbc->lock, flags);
        xhci_do_dbc_stop(xhci);
-       spin_unlock(&dbc->lock);
+       spin_unlock_irqrestore(&dbc->lock, flags);
 
        pm_runtime_put_sync(xhci_to_hcd(xhci)->self.controller);
 }
@@ -779,14 +782,15 @@ static void xhci_dbc_handle_events(struct work_struct *work)
        int                     ret;
        enum evtreturn          evtr;
        struct xhci_dbc         *dbc;
+       unsigned long           flags;
        struct xhci_hcd         *xhci;
 
        dbc = container_of(to_delayed_work(work), struct xhci_dbc, event_work);
        xhci = dbc->xhci;
 
-       spin_lock(&dbc->lock);
+       spin_lock_irqsave(&dbc->lock, flags);
        evtr = xhci_dbc_do_handle_events(dbc);
-       spin_unlock(&dbc->lock);
+       spin_unlock_irqrestore(&dbc->lock, flags);
 
        switch (evtr) {
        case EVT_GSER:
index 8d47b6fbf973567620cb531c748f9ba32c9c1a2a..75f0b92694ba141beda621b712419031e80b1445 100644 (file)
@@ -92,21 +92,23 @@ static void dbc_start_rx(struct dbc_port *port)
 static void
 dbc_read_complete(struct xhci_hcd *xhci, struct dbc_request *req)
 {
+       unsigned long           flags;
        struct xhci_dbc         *dbc = xhci->dbc;
        struct dbc_port         *port = &dbc->port;
 
-       spin_lock(&port->port_lock);
+       spin_lock_irqsave(&port->port_lock, flags);
        list_add_tail(&req->list_pool, &port->read_queue);
        tasklet_schedule(&port->push);
-       spin_unlock(&port->port_lock);
+       spin_unlock_irqrestore(&port->port_lock, flags);
 }
 
 static void dbc_write_complete(struct xhci_hcd *xhci, struct dbc_request *req)
 {
+       unsigned long           flags;
        struct xhci_dbc         *dbc = xhci->dbc;
        struct dbc_port         *port = &dbc->port;
 
-       spin_lock(&port->port_lock);
+       spin_lock_irqsave(&port->port_lock, flags);
        list_add(&req->list_pool, &port->write_pool);
        switch (req->status) {
        case 0:
@@ -119,7 +121,7 @@ static void dbc_write_complete(struct xhci_hcd *xhci, struct dbc_request *req)
                          req->status);
                break;
        }
-       spin_unlock(&port->port_lock);
+       spin_unlock_irqrestore(&port->port_lock, flags);
 }
 
 static void xhci_dbc_free_req(struct dbc_ep *dep, struct dbc_request *req)
@@ -327,12 +329,13 @@ static void dbc_rx_push(unsigned long _port)
 {
        struct dbc_request      *req;
        struct tty_struct       *tty;
+       unsigned long           flags;
        bool                    do_push = false;
        bool                    disconnect = false;
        struct dbc_port         *port = (void *)_port;
        struct list_head        *queue = &port->read_queue;
 
-       spin_lock_irq(&port->port_lock);
+       spin_lock_irqsave(&port->port_lock, flags);
        tty = port->port.tty;
        while (!list_empty(queue)) {
                req = list_first_entry(queue, struct dbc_request, list_pool);
@@ -392,16 +395,17 @@ static void dbc_rx_push(unsigned long _port)
        if (!disconnect)
                dbc_start_rx(port);
 
-       spin_unlock_irq(&port->port_lock);
+       spin_unlock_irqrestore(&port->port_lock, flags);
 }
 
 static int dbc_port_activate(struct tty_port *_port, struct tty_struct *tty)
 {
+       unsigned long   flags;
        struct dbc_port *port = container_of(_port, struct dbc_port, port);
 
-       spin_lock_irq(&port->port_lock);
+       spin_lock_irqsave(&port->port_lock, flags);
        dbc_start_rx(port);
-       spin_unlock_irq(&port->port_lock);
+       spin_unlock_irqrestore(&port->port_lock, flags);
 
        return 0;
 }
index 5262fa571a5dabb3d84437798b033c5a9af91347..d9f831b67e579d037f8cca4c7f5a93afbbb85f65 100644 (file)
@@ -126,6 +126,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
        if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
                xhci->quirks |= XHCI_AMD_PLL_FIX;
 
+       if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x43bb)
+               xhci->quirks |= XHCI_SUSPEND_DELAY;
+
        if (pdev->vendor == PCI_VENDOR_ID_AMD)
                xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 
index 6f038306c14d6c244dc9a7305adce185ee3aaecc..6652e2d5bd2e4bc3f9d95f04f36c58a6850c7545 100644 (file)
@@ -360,7 +360,6 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev)
 {
        struct usb_hcd  *hcd = dev_get_drvdata(dev);
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
-       int ret;
 
        /*
         * xhci_suspend() needs `do_wakeup` to know whether host is allowed
@@ -370,12 +369,7 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev)
         * reconsider this when xhci_plat_suspend enlarges its scope, e.g.,
         * also applies to runtime suspend.
         */
-       ret = xhci_suspend(xhci, device_may_wakeup(dev));
-
-       if (!device_may_wakeup(dev) && !IS_ERR(xhci->clk))
-               clk_disable_unprepare(xhci->clk);
-
-       return ret;
+       return xhci_suspend(xhci, device_may_wakeup(dev));
 }
 
 static int __maybe_unused xhci_plat_resume(struct device *dev)
@@ -384,9 +378,6 @@ static int __maybe_unused xhci_plat_resume(struct device *dev)
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
        int ret;
 
-       if (!device_may_wakeup(dev) && !IS_ERR(xhci->clk))
-               clk_prepare_enable(xhci->clk);
-
        ret = xhci_priv_resume_quirk(hcd);
        if (ret)
                return ret;
index f0b559660007f03c0fe60c0f1bb162a40ca90dad..f33ffc2bc4ed67015e2493374ebf284f7d03b48a 100644 (file)
@@ -83,6 +83,10 @@ static const struct soc_device_attribute rcar_quirks_match[]  = {
                .soc_id = "r8a7796",
                .data = (void *)RCAR_XHCI_FIRMWARE_V3,
        },
+       {
+               .soc_id = "r8a77965",
+               .data = (void *)RCAR_XHCI_FIRMWARE_V3,
+       },
        { /* sentinel */ },
 };
 
index 25d4b748a56f3f4a5e3d29a55e853571195fc014..5d37700ae4b009f0a87e5fa5e3c0cb1e64f39c02 100644 (file)
@@ -877,6 +877,9 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup)
        clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
        del_timer_sync(&xhci->shared_hcd->rh_timer);
 
+       if (xhci->quirks & XHCI_SUSPEND_DELAY)
+               usleep_range(1000, 1500);
+
        spin_lock_irq(&xhci->lock);
        clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
        clear_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags);
index e4d7d3d06a759489b0c4aeb8690b58ca72f9c056..866e141d4972b5d743d5cb61be2f045a102a6574 100644 (file)
@@ -718,11 +718,12 @@ struct xhci_ep_ctx {
 /* bits 10:14 are Max Primary Streams */
 /* bit 15 is Linear Stream Array */
 /* Interval - period between requests to an endpoint - 125u increments. */
-#define EP_INTERVAL(p)         (((p) & 0xff) << 16)
-#define EP_INTERVAL_TO_UFRAMES(p)              (1 << (((p) >> 16) & 0xff))
-#define CTX_TO_EP_INTERVAL(p)  (((p) >> 16) & 0xff)
-#define EP_MAXPSTREAMS_MASK    (0x1f << 10)
-#define EP_MAXPSTREAMS(p)      (((p) << 10) & EP_MAXPSTREAMS_MASK)
+#define EP_INTERVAL(p)                 (((p) & 0xff) << 16)
+#define EP_INTERVAL_TO_UFRAMES(p)      (1 << (((p) >> 16) & 0xff))
+#define CTX_TO_EP_INTERVAL(p)          (((p) >> 16) & 0xff)
+#define EP_MAXPSTREAMS_MASK            (0x1f << 10)
+#define EP_MAXPSTREAMS(p)              (((p) << 10) & EP_MAXPSTREAMS_MASK)
+#define CTX_TO_EP_MAXPSTREAMS(p)       (((p) & EP_MAXPSTREAMS_MASK) >> 10)
 /* Endpoint is set up with a Linear Stream Array (vs. Secondary Stream Array) */
 #define        EP_HAS_LSA              (1 << 15)
 /* hosts with LEC=1 use bits 31:24 as ESIT high bits. */
@@ -1825,6 +1826,7 @@ struct xhci_hcd {
 #define XHCI_U2_DISABLE_WAKE   (1 << 27)
 #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL        (1 << 28)
 #define XHCI_HW_LPM_DISABLE    (1 << 29)
+#define XHCI_SUSPEND_DELAY     (1 << 30)
 
        unsigned int            num_active_eps;
        unsigned int            limit_active_eps;
@@ -2549,21 +2551,22 @@ static inline const char *xhci_decode_ep_context(u32 info, u32 info2, u64 deq,
        u8 burst;
        u8 cerr;
        u8 mult;
-       u8 lsa;
-       u8 hid;
+
+       bool lsa;
+       bool hid;
 
        esit = CTX_TO_MAX_ESIT_PAYLOAD_HI(info) << 16 |
                CTX_TO_MAX_ESIT_PAYLOAD(tx_info);
 
        ep_state = info & EP_STATE_MASK;
-       max_pstr = info & EP_MAXPSTREAMS_MASK;
+       max_pstr = CTX_TO_EP_MAXPSTREAMS(info);
        interval = CTX_TO_EP_INTERVAL(info);
        mult = CTX_TO_EP_MULT(info) + 1;
-       lsa = info & EP_HAS_LSA;
+       lsa = !!(info & EP_HAS_LSA);
 
        cerr = (info2 & (3 << 1)) >> 1;
        ep_type = CTX_TO_EP_TYPE(info2);
-       hid = info2 & (1 << 7);
+       hid = !!(info2 & (1 << 7));
        burst = CTX_TO_MAX_BURST(info2);
        maxp = MAX_PACKET_DECODED(info2);
 
index f5e1bb5e521777f1e5b6b29c61f4c4f9f19f9662..984f7e12a6a5b431d0281fd6715f9e990ff775b6 100644 (file)
@@ -85,6 +85,8 @@ struct mon_reader_text {
 
        wait_queue_head_t wait;
        int printf_size;
+       size_t printf_offset;
+       size_t printf_togo;
        char *printf_buf;
        struct mutex printf_lock;
 
@@ -376,75 +378,103 @@ static int mon_text_open(struct inode *inode, struct file *file)
        return rc;
 }
 
-/*
- * For simplicity, we read one record in one system call and throw out
- * what does not fit. This means that the following does not work:
- *   dd if=/dbg/usbmon/0t bs=10
- * Also, we do not allow seeks and do not bother advancing the offset.
- */
+static ssize_t mon_text_copy_to_user(struct mon_reader_text *rp,
+    char __user * const buf, const size_t nbytes)
+{
+       const size_t togo = min(nbytes, rp->printf_togo);
+
+       if (copy_to_user(buf, &rp->printf_buf[rp->printf_offset], togo))
+               return -EFAULT;
+       rp->printf_togo -= togo;
+       rp->printf_offset += togo;
+       return togo;
+}
+
+/* ppos is not advanced since the llseek operation is not permitted. */
 static ssize_t mon_text_read_t(struct file *file, char __user *buf,
-                               size_t nbytes, loff_t *ppos)
+    size_t nbytes, loff_t *ppos)
 {
        struct mon_reader_text *rp = file->private_data;
        struct mon_event_text *ep;
        struct mon_text_ptr ptr;
+       ssize_t ret;
 
-       ep = mon_text_read_wait(rp, file);
-       if (IS_ERR(ep))
-               return PTR_ERR(ep);
        mutex_lock(&rp->printf_lock);
-       ptr.cnt = 0;
-       ptr.pbuf = rp->printf_buf;
-       ptr.limit = rp->printf_size;
-
-       mon_text_read_head_t(rp, &ptr, ep);
-       mon_text_read_statset(rp, &ptr, ep);
-       ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt,
-           " %d", ep->length);
-       mon_text_read_data(rp, &ptr, ep);
-
-       if (copy_to_user(buf, rp->printf_buf, ptr.cnt))
-               ptr.cnt = -EFAULT;
+
+       if (rp->printf_togo == 0) {
+
+               ep = mon_text_read_wait(rp, file);
+               if (IS_ERR(ep)) {
+                       mutex_unlock(&rp->printf_lock);
+                       return PTR_ERR(ep);
+               }
+               ptr.cnt = 0;
+               ptr.pbuf = rp->printf_buf;
+               ptr.limit = rp->printf_size;
+
+               mon_text_read_head_t(rp, &ptr, ep);
+               mon_text_read_statset(rp, &ptr, ep);
+               ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt,
+                   " %d", ep->length);
+               mon_text_read_data(rp, &ptr, ep);
+
+               rp->printf_togo = ptr.cnt;
+               rp->printf_offset = 0;
+
+               kmem_cache_free(rp->e_slab, ep);
+       }
+
+       ret = mon_text_copy_to_user(rp, buf, nbytes);
        mutex_unlock(&rp->printf_lock);
-       kmem_cache_free(rp->e_slab, ep);
-       return ptr.cnt;
+       return ret;
 }
 
+/* ppos is not advanced since the llseek operation is not permitted. */
 static ssize_t mon_text_read_u(struct file *file, char __user *buf,
-                               size_t nbytes, loff_t *ppos)
+    size_t nbytes, loff_t *ppos)
 {
        struct mon_reader_text *rp = file->private_data;
        struct mon_event_text *ep;
        struct mon_text_ptr ptr;
+       ssize_t ret;
 
-       ep = mon_text_read_wait(rp, file);
-       if (IS_ERR(ep))
-               return PTR_ERR(ep);
        mutex_lock(&rp->printf_lock);
-       ptr.cnt = 0;
-       ptr.pbuf = rp->printf_buf;
-       ptr.limit = rp->printf_size;
 
-       mon_text_read_head_u(rp, &ptr, ep);
-       if (ep->type == 'E') {
-               mon_text_read_statset(rp, &ptr, ep);
-       } else if (ep->xfertype == USB_ENDPOINT_XFER_ISOC) {
-               mon_text_read_isostat(rp, &ptr, ep);
-               mon_text_read_isodesc(rp, &ptr, ep);
-       } else if (ep->xfertype == USB_ENDPOINT_XFER_INT) {
-               mon_text_read_intstat(rp, &ptr, ep);
-       } else {
-               mon_text_read_statset(rp, &ptr, ep);
+       if (rp->printf_togo == 0) {
+
+               ep = mon_text_read_wait(rp, file);
+               if (IS_ERR(ep)) {
+                       mutex_unlock(&rp->printf_lock);
+                       return PTR_ERR(ep);
+               }
+               ptr.cnt = 0;
+               ptr.pbuf = rp->printf_buf;
+               ptr.limit = rp->printf_size;
+
+               mon_text_read_head_u(rp, &ptr, ep);
+               if (ep->type == 'E') {
+                       mon_text_read_statset(rp, &ptr, ep);
+               } else if (ep->xfertype == USB_ENDPOINT_XFER_ISOC) {
+                       mon_text_read_isostat(rp, &ptr, ep);
+                       mon_text_read_isodesc(rp, &ptr, ep);
+               } else if (ep->xfertype == USB_ENDPOINT_XFER_INT) {
+                       mon_text_read_intstat(rp, &ptr, ep);
+               } else {
+                       mon_text_read_statset(rp, &ptr, ep);
+               }
+               ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt,
+                   " %d", ep->length);
+               mon_text_read_data(rp, &ptr, ep);
+
+               rp->printf_togo = ptr.cnt;
+               rp->printf_offset = 0;
+
+               kmem_cache_free(rp->e_slab, ep);
        }
-       ptr.cnt += snprintf(ptr.pbuf + ptr.cnt, ptr.limit - ptr.cnt,
-           " %d", ep->length);
-       mon_text_read_data(rp, &ptr, ep);
 
-       if (copy_to_user(buf, rp->printf_buf, ptr.cnt))
-               ptr.cnt = -EFAULT;
+       ret = mon_text_copy_to_user(rp, buf, nbytes);
        mutex_unlock(&rp->printf_lock);
-       kmem_cache_free(rp->e_slab, ep);
-       return ptr.cnt;
+       return ret;
 }
 
 static struct mon_event_text *mon_text_read_wait(struct mon_reader_text *rp,
index eef4ad578b31da28641cd2fe5889827781fdc6ab..4d723077be2b990e109e6896d3e8e68cdcd08639 100644 (file)
@@ -1756,6 +1756,7 @@ vbus_show(struct device *dev, struct device_attribute *attr, char *buf)
        int             vbus;
        u8              devctl;
 
+       pm_runtime_get_sync(dev);
        spin_lock_irqsave(&musb->lock, flags);
        val = musb->a_wait_bcon;
        vbus = musb_platform_get_vbus_status(musb);
@@ -1769,6 +1770,7 @@ vbus_show(struct device *dev, struct device_attribute *attr, char *buf)
                        vbus = 0;
        }
        spin_unlock_irqrestore(&musb->lock, flags);
+       pm_runtime_put_sync(dev);
 
        return sprintf(buf, "Vbus %s, timeout %lu msec\n",
                        vbus ? "on" : "off", val);
@@ -2471,11 +2473,11 @@ static int musb_remove(struct platform_device *pdev)
        musb_disable_interrupts(musb);
        musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
        spin_unlock_irqrestore(&musb->lock, flags);
+       musb_platform_exit(musb);
 
        pm_runtime_dont_use_autosuspend(musb->controller);
        pm_runtime_put_sync(musb->controller);
        pm_runtime_disable(musb->controller);
-       musb_platform_exit(musb);
        musb_phy_callback = NULL;
        if (musb->dma_controller)
                musb_dma_controller_destroy(musb->dma_controller);
index 3b1b9695177a4c9c0875fd764d1930cdffe5273b..6034c39b67d14ab43376b87eabbbc8e5bf87e0b1 100644 (file)
@@ -1076,7 +1076,7 @@ static int uas_post_reset(struct usb_interface *intf)
                return 0;
 
        err = uas_configure_endpoints(devinfo);
-       if (err && err != ENODEV)
+       if (err && err != -ENODEV)
                shost_printk(KERN_ERR, shost,
                             "%s: alloc streams error %d after reset",
                             __func__, err);
index 264af199aec855babdd43331893d523318cd47a0..747d3a9596d947e11b245b795da66d63f226de3f 100644 (file)
@@ -2118,6 +2118,13 @@ UNUSUAL_DEV(  0x152d, 0x2566, 0x0114, 0x0114,
                USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                US_FL_BROKEN_FUA ),
 
+/* Reported by Teijo Kinnunen <teijo.kinnunen@code-q.fi> */
+UNUSUAL_DEV(  0x152d, 0x2567, 0x0117, 0x0117,
+               "JMicron",
+               "USB to ATA/ATAPI Bridge",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_BROKEN_FUA ),
+
 /* Reported-by George Cherian <george.cherian@cavium.com> */
 UNUSUAL_DEV(0x152d, 0x9561, 0x0000, 0x9999,
                "JMicron",
index 9ce4756adad6e5fa20017ae4f27603eb87127312..dcd8ef085b30151e76e5cf5921040f15214a17e9 100644 (file)
@@ -1857,7 +1857,8 @@ static int fusb302_probe(struct i2c_client *client,
        chip->tcpm_port = tcpm_register_port(&client->dev, &chip->tcpc_dev);
        if (IS_ERR(chip->tcpm_port)) {
                ret = PTR_ERR(chip->tcpm_port);
-               dev_err(dev, "cannot register tcpm port, ret=%d", ret);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(dev, "cannot register tcpm port, ret=%d", ret);
                goto destroy_workqueue;
        }
 
index f4d563ee769046236daeedb1956907752a1c3c3f..8b637a4b474b08b40690289c33ddc1306811e932 100644 (file)
@@ -252,9 +252,6 @@ struct tcpm_port {
        unsigned int nr_src_pdo;
        u32 snk_pdo[PDO_MAX_OBJECTS];
        unsigned int nr_snk_pdo;
-       unsigned int nr_fixed; /* number of fixed sink PDOs */
-       unsigned int nr_var; /* number of variable sink PDOs */
-       unsigned int nr_batt; /* number of battery sink PDOs */
        u32 snk_vdo[VDO_MAX_OBJECTS];
        unsigned int nr_snk_vdo;
 
@@ -1770,90 +1767,39 @@ static int tcpm_pd_check_request(struct tcpm_port *port)
        return 0;
 }
 
-#define min_power(x, y) min(pdo_max_power(x), pdo_max_power(y))
-#define min_current(x, y) min(pdo_max_current(x), pdo_max_current(y))
-
-static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo,
-                             int *src_pdo)
+static int tcpm_pd_select_pdo(struct tcpm_port *port)
 {
-       unsigned int i, j, max_mw = 0, max_mv = 0, mw = 0, mv = 0, ma = 0;
+       unsigned int i, max_mw = 0, max_mv = 0;
        int ret = -EINVAL;
 
        /*
-        * Select the source PDO providing the most power which has a
-        * matchig sink cap.
+        * Select the source PDO providing the most power while staying within
+        * the board's voltage limits. Prefer PDO providing exp
         */
        for (i = 0; i < port->nr_source_caps; i++) {
                u32 pdo = port->source_caps[i];
                enum pd_pdo_type type = pdo_type(pdo);
+               unsigned int mv, ma, mw;
 
-               if (type == PDO_TYPE_FIXED) {
-                       for (j = 0; j < port->nr_fixed; j++) {
-                               if (pdo_fixed_voltage(pdo) ==
-                                   pdo_fixed_voltage(port->snk_pdo[j])) {
-                                       ma = min_current(pdo, port->snk_pdo[j]);
-                                       mv = pdo_fixed_voltage(pdo);
-                                       mw = ma * mv / 1000;
-                                       if (mw > max_mw ||
-                                           (mw == max_mw && mv > max_mv)) {
-                                               ret = 0;
-                                               *src_pdo = i;
-                                               *sink_pdo = j;
-                                               max_mw = mw;
-                                               max_mv = mv;
-                                       }
-                                       /* There could only be one fixed pdo
-                                        * at a specific voltage level.
-                                        * So breaking here.
-                                        */
-                                       break;
-                               }
-                       }
-               } else if (type == PDO_TYPE_BATT) {
-                       for (j = port->nr_fixed;
-                            j < port->nr_fixed +
-                                port->nr_batt;
-                            j++) {
-                               if (pdo_min_voltage(pdo) >=
-                                    pdo_min_voltage(port->snk_pdo[j]) &&
-                                    pdo_max_voltage(pdo) <=
-                                    pdo_max_voltage(port->snk_pdo[j])) {
-                                       mw = min_power(pdo, port->snk_pdo[j]);
-                                       mv = pdo_min_voltage(pdo);
-                                       if (mw > max_mw ||
-                                           (mw == max_mw && mv > max_mv)) {
-                                               ret = 0;
-                                               *src_pdo = i;
-                                               *sink_pdo = j;
-                                               max_mw = mw;
-                                               max_mv = mv;
-                                       }
-                               }
-                       }
-               } else if (type == PDO_TYPE_VAR) {
-                       for (j = port->nr_fixed +
-                                port->nr_batt;
-                            j < port->nr_fixed +
-                                port->nr_batt +
-                                port->nr_var;
-                            j++) {
-                               if (pdo_min_voltage(pdo) >=
-                                    pdo_min_voltage(port->snk_pdo[j]) &&
-                                    pdo_max_voltage(pdo) <=
-                                    pdo_max_voltage(port->snk_pdo[j])) {
-                                       ma = min_current(pdo, port->snk_pdo[j]);
-                                       mv = pdo_min_voltage(pdo);
-                                       mw = ma * mv / 1000;
-                                       if (mw > max_mw ||
-                                           (mw == max_mw && mv > max_mv)) {
-                                               ret = 0;
-                                               *src_pdo = i;
-                                               *sink_pdo = j;
-                                               max_mw = mw;
-                                               max_mv = mv;
-                                       }
-                               }
-                       }
+               if (type == PDO_TYPE_FIXED)
+                       mv = pdo_fixed_voltage(pdo);
+               else
+                       mv = pdo_min_voltage(pdo);
+
+               if (type == PDO_TYPE_BATT) {
+                       mw = pdo_max_power(pdo);
+               } else {
+                       ma = min(pdo_max_current(pdo),
+                                port->max_snk_ma);
+                       mw = ma * mv / 1000;
+               }
+
+               /* Perfer higher voltages if available */
+               if ((mw > max_mw || (mw == max_mw && mv > max_mv)) &&
+                   mv <= port->max_snk_mv) {
+                       ret = i;
+                       max_mw = mw;
+                       max_mv = mv;
                }
        }
 
@@ -1865,14 +1811,13 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
        unsigned int mv, ma, mw, flags;
        unsigned int max_ma, max_mw;
        enum pd_pdo_type type;
-       int src_pdo_index, snk_pdo_index;
-       u32 pdo, matching_snk_pdo;
+       int index;
+       u32 pdo;
 
-       if (tcpm_pd_select_pdo(port, &snk_pdo_index, &src_pdo_index) < 0)
+       index = tcpm_pd_select_pdo(port);
+       if (index < 0)
                return -EINVAL;
-
-       pdo = port->source_caps[src_pdo_index];
-       matching_snk_pdo = port->snk_pdo[snk_pdo_index];
+       pdo = port->source_caps[index];
        type = pdo_type(pdo);
 
        if (type == PDO_TYPE_FIXED)
@@ -1880,28 +1825,26 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
        else
                mv = pdo_min_voltage(pdo);
 
-       /* Select maximum available current within the sink pdo's limit */
+       /* Select maximum available current within the board's power limit */
        if (type == PDO_TYPE_BATT) {
-               mw = min_power(pdo, matching_snk_pdo);
-               ma = 1000 * mw / mv;
+               mw = pdo_max_power(pdo);
+               ma = 1000 * min(mw, port->max_snk_mw) / mv;
        } else {
-               ma = min_current(pdo, matching_snk_pdo);
-               mw = ma * mv / 1000;
+               ma = min(pdo_max_current(pdo),
+                        1000 * port->max_snk_mw / mv);
        }
+       ma = min(ma, port->max_snk_ma);
 
        flags = RDO_USB_COMM | RDO_NO_SUSPEND;
 
        /* Set mismatch bit if offered power is less than operating power */
+       mw = ma * mv / 1000;
        max_ma = ma;
        max_mw = mw;
        if (mw < port->operating_snk_mw) {
                flags |= RDO_CAP_MISMATCH;
-               if (type == PDO_TYPE_BATT &&
-                   (pdo_max_power(matching_snk_pdo) > pdo_max_power(pdo)))
-                       max_mw = pdo_max_power(matching_snk_pdo);
-               else if (pdo_max_current(matching_snk_pdo) >
-                        pdo_max_current(pdo))
-                       max_ma = pdo_max_current(matching_snk_pdo);
+               max_mw = port->operating_snk_mw;
+               max_ma = max_mw * 1000 / mv;
        }
 
        tcpm_log(port, "cc=%d cc1=%d cc2=%d vbus=%d vconn=%s polarity=%d",
@@ -1910,16 +1853,16 @@ static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
                 port->polarity);
 
        if (type == PDO_TYPE_BATT) {
-               *rdo = RDO_BATT(src_pdo_index + 1, mw, max_mw, flags);
+               *rdo = RDO_BATT(index + 1, mw, max_mw, flags);
 
                tcpm_log(port, "Requesting PDO %d: %u mV, %u mW%s",
-                        src_pdo_index, mv, mw,
+                        index, mv, mw,
                         flags & RDO_CAP_MISMATCH ? " [mismatch]" : "");
        } else {
-               *rdo = RDO_FIXED(src_pdo_index + 1, ma, max_ma, flags);
+               *rdo = RDO_FIXED(index + 1, ma, max_ma, flags);
 
                tcpm_log(port, "Requesting PDO %d: %u mV, %u mA%s",
-                        src_pdo_index, mv, ma,
+                        index, mv, ma,
                         flags & RDO_CAP_MISMATCH ? " [mismatch]" : "");
        }
 
@@ -3650,19 +3593,6 @@ int tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo,
 }
 EXPORT_SYMBOL_GPL(tcpm_update_sink_capabilities);
 
-static int nr_type_pdos(const u32 *pdo, unsigned int nr_pdo,
-                       enum pd_pdo_type type)
-{
-       int count = 0;
-       int i;
-
-       for (i = 0; i < nr_pdo; i++) {
-               if (pdo_type(pdo[i]) == type)
-                       count++;
-       }
-       return count;
-}
-
 struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 {
        struct tcpm_port *port;
@@ -3708,15 +3638,6 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
                                          tcpc->config->nr_src_pdo);
        port->nr_snk_pdo = tcpm_copy_pdos(port->snk_pdo, tcpc->config->snk_pdo,
                                          tcpc->config->nr_snk_pdo);
-       port->nr_fixed =  nr_type_pdos(port->snk_pdo,
-                                      port->nr_snk_pdo,
-                                      PDO_TYPE_FIXED);
-       port->nr_var = nr_type_pdos(port->snk_pdo,
-                                   port->nr_snk_pdo,
-                                   PDO_TYPE_VAR);
-       port->nr_batt = nr_type_pdos(port->snk_pdo,
-                                    port->nr_snk_pdo,
-                                    PDO_TYPE_BATT);
        port->nr_snk_vdo = tcpm_copy_vdos(port->snk_vdo, tcpc->config->snk_vdo,
                                          tcpc->config->nr_snk_vdo);
 
index d86f72bbbb91ddc24638ec0db5c1b96d8dcb82e1..6dcd3ff655c3367cde86919e024b9cc7d174ecec 100644 (file)
@@ -105,10 +105,14 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
        if (rv != 0)
                return -EINVAL;
 
+       if (!udc) {
+               dev_err(dev, "no device");
+               return -ENODEV;
+       }
        spin_lock_irqsave(&udc->lock, flags);
        /* Don't export what we don't have */
-       if (!udc || !udc->driver || !udc->pullup) {
-               dev_err(dev, "no device or gadget not bound");
+       if (!udc->driver || !udc->pullup) {
+               dev_err(dev, "gadget not bound");
                ret = -ENODEV;
                goto unlock;
        }
index e30e29ae4819f60cbb53c808558ee808079febe0..45657e2b1ff77b813b546239dd11ee97dfb2fcb4 100644 (file)
@@ -338,11 +338,12 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 {
        struct page *page[1];
        struct vm_area_struct *vma;
+       struct vm_area_struct *vmas[1];
        int ret;
 
        if (mm == current->mm) {
-               ret = get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE),
-                                         page);
+               ret = get_user_pages_longterm(vaddr, 1, !!(prot & IOMMU_WRITE),
+                                             page, vmas);
        } else {
                unsigned int flags = 0;
 
@@ -351,7 +352,18 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 
                down_read(&mm->mmap_sem);
                ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
-                                           NULL, NULL);
+                                           vmas, NULL);
+               /*
+                * The lifetime of a vaddr_get_pfn() page pin is
+                * userspace-controlled. In the fs-dax case this could
+                * lead to indefinite stalls in filesystem operations.
+                * Disallow attempts to pin fs-dax pages via this
+                * interface.
+                */
+               if (ret > 0 && vma_is_fsdax(vmas[0])) {
+                       ret = -EOPNOTSUPP;
+                       put_page(page[0]);
+               }
                up_read(&mm->mmap_sem);
        }
 
index b5fb56b822fde48bd28650a51c761a672541c31c..a31d9b240af8abba24362000eb4eca38be4cccd4 100644 (file)
@@ -170,7 +170,7 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
        if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) {
                ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head,
                                   vhost_net_buf_get_size(rxq),
-                                  __skb_array_destroy_skb);
+                                  tun_ptr_free);
                rxq->head = rxq->tail = 0;
        }
 }
@@ -948,6 +948,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
                n->vqs[i].done_idx = 0;
                n->vqs[i].vhost_hlen = 0;
                n->vqs[i].sock_hlen = 0;
+               n->vqs[i].rx_ring = NULL;
                vhost_net_buf_init(&n->vqs[i].rxq);
        }
        vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
@@ -972,6 +973,7 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
        vhost_net_disable_vq(n, vq);
        vq->private_data = NULL;
        vhost_net_buf_unproduce(nvq);
+       nvq->rx_ring = NULL;
        mutex_unlock(&vq->mutex);
        return sock;
 }
@@ -1160,14 +1162,14 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
                vhost_net_disable_vq(n, vq);
                vq->private_data = sock;
                vhost_net_buf_unproduce(nvq);
-               if (index == VHOST_NET_VQ_RX)
-                       nvq->rx_ring = get_tap_ptr_ring(fd);
                r = vhost_vq_init_access(vq);
                if (r)
                        goto err_used;
                r = vhost_net_enable_vq(n, vq);
                if (r)
                        goto err_used;
+               if (index == VHOST_NET_VQ_RX)
+                       nvq->rx_ring = get_tap_ptr_ring(fd);
 
                oldubufs = nvq->ubufs;
                nvq->ubufs = ubufs;
index 0d14e2ff19f16b18a2babdc3107f6ae7d1f35a9f..0898dbdbf955faa4c274c35fab33c0ef4da5bf92 100644 (file)
@@ -61,9 +61,9 @@ static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid)
                if (other_cid == 0)
                        continue;
 
-               if (other_cid == guest_cid) {
+               if (other_cid == guest_cid)
                        return vsock;
-               }
+
        }
 
        return NULL;
index af6fc97f4ba4a5fac8cf2f100616f3cdf33a8aae..a436d44f1b7fbf4e2fe2447de21a6aa5a4903cde 100644 (file)
@@ -122,7 +122,7 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg,
                unsigned char __user *ured;
                unsigned char __user *ugreen;
                unsigned char __user *ublue;
-               int index, count, i;
+               unsigned int index, count, i;
 
                if (get_user(index, &c->index) ||
                    __get_user(count, &c->count) ||
@@ -161,7 +161,7 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg,
                unsigned char __user *ugreen;
                unsigned char __user *ublue;
                struct fb_cmap *cmap = &info->cmap;
-               int index, count, i;
+               unsigned int index, count, i;
                u8 red, green, blue;
 
                if (get_user(index, &c->index) ||
index eb30f3e09a4775b3f046ccc698f18a05210f17ad..71458f493cf864aa260e493c0ef5ddc4d6c78274 100644 (file)
@@ -428,8 +428,6 @@ static inline int virtqueue_add(struct virtqueue *_vq,
                i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next);
        }
 
-       vq->vq.num_free += total_sg;
-
        if (indirect)
                kfree(desc);
 
index aff773bcebdb59f14a88a3c92b779f5c03de3ba5..37460cd6cabb806ecdf536f5a44019ff845164b9 100644 (file)
@@ -226,6 +226,7 @@ config ZIIRAVE_WATCHDOG
 config RAVE_SP_WATCHDOG
        tristate "RAVE SP Watchdog timer"
        depends on RAVE_SP_CORE
+       depends on NVMEM || !NVMEM
        select WATCHDOG_CORE
        help
          Support for the watchdog on RAVE SP device.
@@ -903,6 +904,7 @@ config F71808E_WDT
 config SP5100_TCO
        tristate "AMD/ATI SP5100 TCO Timer/Watchdog"
        depends on X86 && PCI
+       select WATCHDOG_CORE
        ---help---
          Hardware watchdog driver for the AMD/ATI SP5100 chipset. The TCO
          (Total Cost of Ownership) timer is a watchdog timer that will reboot
@@ -1008,6 +1010,7 @@ config WAFER_WDT
 config I6300ESB_WDT
        tristate "Intel 6300ESB Timer/Watchdog"
        depends on PCI
+       select WATCHDOG_CORE
        ---help---
          Hardware driver for the watchdog timer built into the Intel
          6300ESB controller hub.
@@ -1837,6 +1840,7 @@ config WATCHDOG_SUN4V
 config XEN_WDT
        tristate "Xen Watchdog support"
        depends on XEN
+       select WATCHDOG_CORE
        help
          Say Y here to support the hypervisor watchdog capability provided
          by Xen 4.0 and newer.  The watchdog timeout period is normally one
index e0678c14480f2c96619562e6c3d0098a0541189d..3a33c5344bd5eaaad338dd77e138b5610c35dc27 100644 (file)
@@ -566,7 +566,8 @@ static ssize_t watchdog_write(struct file *file, const char __user *buf,
                                char c;
                                if (get_user(c, buf + i))
                                        return -EFAULT;
-                               expect_close = (c == 'V');
+                               if (c == 'V')
+                                       expect_close = true;
                        }
 
                        /* Properly order writes across fork()ed processes */
index f1f00dfc0e68ce3eb323d7e7bf63ecb4f7ba399a..b0a158073abd55b4dcb961ef7d241dcecfe225f7 100644 (file)
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <linux/watchdog.h>
-#ifdef CONFIG_HPWDT_NMI_DECODING
-#include <linux/dmi.h>
-#include <linux/spinlock.h>
-#include <linux/nmi.h>
-#include <linux/kdebug.h>
-#include <linux/notifier.h>
-#include <asm/set_memory.h>
-#endif /* CONFIG_HPWDT_NMI_DECODING */
 #include <asm/nmi.h>
-#include <asm/frame.h>
 
 #define HPWDT_VERSION                  "1.4.0"
 #define SECS_TO_TICKS(secs)            ((secs) * 1000 / 128)
@@ -48,6 +39,9 @@
 static unsigned int soft_margin = DEFAULT_MARGIN;      /* in seconds */
 static unsigned int reload;                    /* the computed soft_margin */
 static bool nowayout = WATCHDOG_NOWAYOUT;
+#ifdef CONFIG_HPWDT_NMI_DECODING
+static unsigned int allow_kdump = 1;
+#endif
 static char expect_release;
 static unsigned long hpwdt_is_open;
 
@@ -63,373 +57,6 @@ static const struct pci_device_id hpwdt_devices[] = {
 };
 MODULE_DEVICE_TABLE(pci, hpwdt_devices);
 
-#ifdef CONFIG_HPWDT_NMI_DECODING
-#define PCI_BIOS32_SD_VALUE            0x5F32335F      /* "_32_" */
-#define CRU_BIOS_SIGNATURE_VALUE       0x55524324
-#define PCI_BIOS32_PARAGRAPH_LEN       16
-#define PCI_ROM_BASE1                  0x000F0000
-#define ROM_SIZE                       0x10000
-
-struct bios32_service_dir {
-       u32 signature;
-       u32 entry_point;
-       u8 revision;
-       u8 length;
-       u8 checksum;
-       u8 reserved[5];
-};
-
-/* type 212 */
-struct smbios_cru64_info {
-       u8 type;
-       u8 byte_length;
-       u16 handle;
-       u32 signature;
-       u64 physical_address;
-       u32 double_length;
-       u32 double_offset;
-};
-#define SMBIOS_CRU64_INFORMATION       212
-
-/* type 219 */
-struct smbios_proliant_info {
-       u8 type;
-       u8 byte_length;
-       u16 handle;
-       u32 power_features;
-       u32 omega_features;
-       u32 reserved;
-       u32 misc_features;
-};
-#define SMBIOS_ICRU_INFORMATION                219
-
-
-struct cmn_registers {
-       union {
-               struct {
-                       u8 ral;
-                       u8 rah;
-                       u16 rea2;
-               };
-               u32 reax;
-       } u1;
-       union {
-               struct {
-                       u8 rbl;
-                       u8 rbh;
-                       u8 reb2l;
-                       u8 reb2h;
-               };
-               u32 rebx;
-       } u2;
-       union {
-               struct {
-                       u8 rcl;
-                       u8 rch;
-                       u16 rec2;
-               };
-               u32 recx;
-       } u3;
-       union {
-               struct {
-                       u8 rdl;
-                       u8 rdh;
-                       u16 red2;
-               };
-               u32 redx;
-       } u4;
-
-       u32 resi;
-       u32 redi;
-       u16 rds;
-       u16 res;
-       u32 reflags;
-}  __attribute__((packed));
-
-static unsigned int hpwdt_nmi_decoding;
-static unsigned int allow_kdump = 1;
-static unsigned int is_icru;
-static unsigned int is_uefi;
-static DEFINE_SPINLOCK(rom_lock);
-static void *cru_rom_addr;
-static struct cmn_registers cmn_regs;
-
-extern asmlinkage void asminline_call(struct cmn_registers *pi86Regs,
-                                               unsigned long *pRomEntry);
-
-#ifdef CONFIG_X86_32
-/* --32 Bit Bios------------------------------------------------------------ */
-
-#define HPWDT_ARCH     32
-
-asm(".text                          \n\t"
-    ".align 4                       \n\t"
-    ".globl asminline_call         \n"
-    "asminline_call:                \n\t"
-    "pushl       %ebp               \n\t"
-    "movl        %esp, %ebp         \n\t"
-    "pusha                          \n\t"
-    "pushf                          \n\t"
-    "push        %es                \n\t"
-    "push        %ds                \n\t"
-    "pop         %es                \n\t"
-    "movl        8(%ebp),%eax       \n\t"
-    "movl        4(%eax),%ebx       \n\t"
-    "movl        8(%eax),%ecx       \n\t"
-    "movl        12(%eax),%edx      \n\t"
-    "movl        16(%eax),%esi      \n\t"
-    "movl        20(%eax),%edi      \n\t"
-    "movl        (%eax),%eax        \n\t"
-    "push        %cs                \n\t"
-    "call        *12(%ebp)          \n\t"
-    "pushf                          \n\t"
-    "pushl       %eax               \n\t"
-    "movl        8(%ebp),%eax       \n\t"
-    "movl        %ebx,4(%eax)       \n\t"
-    "movl        %ecx,8(%eax)       \n\t"
-    "movl        %edx,12(%eax)      \n\t"
-    "movl        %esi,16(%eax)      \n\t"
-    "movl        %edi,20(%eax)      \n\t"
-    "movw        %ds,24(%eax)       \n\t"
-    "movw        %es,26(%eax)       \n\t"
-    "popl        %ebx               \n\t"
-    "movl        %ebx,(%eax)        \n\t"
-    "popl        %ebx               \n\t"
-    "movl        %ebx,28(%eax)      \n\t"
-    "pop         %es                \n\t"
-    "popf                           \n\t"
-    "popa                           \n\t"
-    "leave                          \n\t"
-    "ret                            \n\t"
-    ".previous");
-
-
-/*
- *     cru_detect
- *
- *     Routine Description:
- *     This function uses the 32-bit BIOS Service Directory record to
- *     search for a $CRU record.
- *
- *     Return Value:
- *     0        :  SUCCESS
- *     <0       :  FAILURE
- */
-static int cru_detect(unsigned long map_entry,
-       unsigned long map_offset)
-{
-       void *bios32_map;
-       unsigned long *bios32_entrypoint;
-       unsigned long cru_physical_address;
-       unsigned long cru_length;
-       unsigned long physical_bios_base = 0;
-       unsigned long physical_bios_offset = 0;
-       int retval = -ENODEV;
-
-       bios32_map = ioremap(map_entry, (2 * PAGE_SIZE));
-
-       if (bios32_map == NULL)
-               return -ENODEV;
-
-       bios32_entrypoint = bios32_map + map_offset;
-
-       cmn_regs.u1.reax = CRU_BIOS_SIGNATURE_VALUE;
-
-       set_memory_x((unsigned long)bios32_map, 2);
-       asminline_call(&cmn_regs, bios32_entrypoint);
-
-       if (cmn_regs.u1.ral != 0) {
-               pr_warn("Call succeeded but with an error: 0x%x\n",
-                       cmn_regs.u1.ral);
-       } else {
-               physical_bios_base = cmn_regs.u2.rebx;
-               physical_bios_offset = cmn_regs.u4.redx;
-               cru_length = cmn_regs.u3.recx;
-               cru_physical_address =
-                       physical_bios_base + physical_bios_offset;
-
-               /* If the values look OK, then map it in. */
-               if ((physical_bios_base + physical_bios_offset)) {
-                       cru_rom_addr =
-                               ioremap(cru_physical_address, cru_length);
-                       if (cru_rom_addr) {
-                               set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK,
-                                       (cru_length + PAGE_SIZE - 1) >> PAGE_SHIFT);
-                               retval = 0;
-                       }
-               }
-
-               pr_debug("CRU Base Address:   0x%lx\n", physical_bios_base);
-               pr_debug("CRU Offset Address: 0x%lx\n", physical_bios_offset);
-               pr_debug("CRU Length:         0x%lx\n", cru_length);
-               pr_debug("CRU Mapped Address: %p\n", &cru_rom_addr);
-       }
-       iounmap(bios32_map);
-       return retval;
-}
-
-/*
- *     bios_checksum
- */
-static int bios_checksum(const char __iomem *ptr, int len)
-{
-       char sum = 0;
-       int i;
-
-       /*
-        * calculate checksum of size bytes. This should add up
-        * to zero if we have a valid header.
-        */
-       for (i = 0; i < len; i++)
-               sum += ptr[i];
-
-       return ((sum == 0) && (len > 0));
-}
-
-/*
- *     bios32_present
- *
- *     Routine Description:
- *     This function finds the 32-bit BIOS Service Directory
- *
- *     Return Value:
- *     0        :  SUCCESS
- *     <0       :  FAILURE
- */
-static int bios32_present(const char __iomem *p)
-{
-       struct bios32_service_dir *bios_32_ptr;
-       int length;
-       unsigned long map_entry, map_offset;
-
-       bios_32_ptr = (struct bios32_service_dir *) p;
-
-       /*
-        * Search for signature by checking equal to the swizzled value
-        * instead of calling another routine to perform a strcmp.
-        */
-       if (bios_32_ptr->signature == PCI_BIOS32_SD_VALUE) {
-               length = bios_32_ptr->length * PCI_BIOS32_PARAGRAPH_LEN;
-               if (bios_checksum(p, length)) {
-                       /*
-                        * According to the spec, we're looking for the
-                        * first 4KB-aligned address below the entrypoint
-                        * listed in the header. The Service Directory code
-                        * is guaranteed to occupy no more than 2 4KB pages.
-                        */
-                       map_entry = bios_32_ptr->entry_point & ~(PAGE_SIZE - 1);
-                       map_offset = bios_32_ptr->entry_point - map_entry;
-
-                       return cru_detect(map_entry, map_offset);
-               }
-       }
-       return -ENODEV;
-}
-
-static int detect_cru_service(void)
-{
-       char __iomem *p, *q;
-       int rc = -1;
-
-       /*
-        * Search from 0x0f0000 through 0x0fffff, inclusive.
-        */
-       p = ioremap(PCI_ROM_BASE1, ROM_SIZE);
-       if (p == NULL)
-               return -ENOMEM;
-
-       for (q = p; q < p + ROM_SIZE; q += 16) {
-               rc = bios32_present(q);
-               if (!rc)
-                       break;
-       }
-       iounmap(p);
-       return rc;
-}
-/* ------------------------------------------------------------------------- */
-#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_64
-/* --64 Bit Bios------------------------------------------------------------ */
-
-#define HPWDT_ARCH     64
-
-asm(".text                      \n\t"
-    ".align 4                   \n\t"
-    ".globl asminline_call     \n\t"
-    ".type asminline_call, @function \n\t"
-    "asminline_call:            \n\t"
-    FRAME_BEGIN
-    "pushq      %rax            \n\t"
-    "pushq      %rbx            \n\t"
-    "pushq      %rdx            \n\t"
-    "pushq      %r12            \n\t"
-    "pushq      %r9             \n\t"
-    "movq       %rsi, %r12      \n\t"
-    "movq       %rdi, %r9       \n\t"
-    "movl       4(%r9),%ebx     \n\t"
-    "movl       8(%r9),%ecx     \n\t"
-    "movl       12(%r9),%edx    \n\t"
-    "movl       16(%r9),%esi    \n\t"
-    "movl       20(%r9),%edi    \n\t"
-    "movl       (%r9),%eax      \n\t"
-    "call       *%r12           \n\t"
-    "pushfq                     \n\t"
-    "popq        %r12           \n\t"
-    "movl       %eax, (%r9)     \n\t"
-    "movl       %ebx, 4(%r9)    \n\t"
-    "movl       %ecx, 8(%r9)    \n\t"
-    "movl       %edx, 12(%r9)   \n\t"
-    "movl       %esi, 16(%r9)   \n\t"
-    "movl       %edi, 20(%r9)   \n\t"
-    "movq       %r12, %rax      \n\t"
-    "movl       %eax, 28(%r9)   \n\t"
-    "popq       %r9             \n\t"
-    "popq       %r12            \n\t"
-    "popq       %rdx            \n\t"
-    "popq       %rbx            \n\t"
-    "popq       %rax            \n\t"
-    FRAME_END
-    "ret                        \n\t"
-    ".previous");
-
-/*
- *     dmi_find_cru
- *
- *     Routine Description:
- *     This function checks whether or not a SMBIOS/DMI record is
- *     the 64bit CRU info or not
- */
-static void dmi_find_cru(const struct dmi_header *dm, void *dummy)
-{
-       struct smbios_cru64_info *smbios_cru64_ptr;
-       unsigned long cru_physical_address;
-
-       if (dm->type == SMBIOS_CRU64_INFORMATION) {
-               smbios_cru64_ptr = (struct smbios_cru64_info *) dm;
-               if (smbios_cru64_ptr->signature == CRU_BIOS_SIGNATURE_VALUE) {
-                       cru_physical_address =
-                               smbios_cru64_ptr->physical_address +
-                               smbios_cru64_ptr->double_offset;
-                       cru_rom_addr = ioremap(cru_physical_address,
-                               smbios_cru64_ptr->double_length);
-                       set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK,
-                               smbios_cru64_ptr->double_length >> PAGE_SHIFT);
-               }
-       }
-}
-
-static int detect_cru_service(void)
-{
-       cru_rom_addr = NULL;
-
-       dmi_walk(dmi_find_cru, NULL);
-
-       /* if cru_rom_addr has been set then we found a CRU service */
-       return ((cru_rom_addr != NULL) ? 0 : -ENODEV);
-}
-/* ------------------------------------------------------------------------- */
-#endif /* CONFIG_X86_64 */
-#endif /* CONFIG_HPWDT_NMI_DECODING */
 
 /*
  *     Watchdog operations
@@ -486,30 +113,12 @@ static int hpwdt_my_nmi(void)
  */
 static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
 {
-       unsigned long rom_pl;
-       static int die_nmi_called;
-
-       if (!hpwdt_nmi_decoding)
-               return NMI_DONE;
-
        if ((ulReason == NMI_UNKNOWN) && !hpwdt_my_nmi())
                return NMI_DONE;
 
-       spin_lock_irqsave(&rom_lock, rom_pl);
-       if (!die_nmi_called && !is_icru && !is_uefi)
-               asminline_call(&cmn_regs, cru_rom_addr);
-       die_nmi_called = 1;
-       spin_unlock_irqrestore(&rom_lock, rom_pl);
-
        if (allow_kdump)
                hpwdt_stop();
 
-       if (!is_icru && !is_uefi) {
-               if (cmn_regs.u1.ral == 0) {
-                       nmi_panic(regs, "An NMI occurred, but unable to determine source.\n");
-                       return NMI_HANDLED;
-               }
-       }
        nmi_panic(regs, "An NMI occurred. Depending on your system the reason "
                "for the NMI is logged in any one of the following "
                "resources:\n"
@@ -675,84 +284,11 @@ static struct miscdevice hpwdt_miscdev = {
  *     Init & Exit
  */
 
-#ifdef CONFIG_HPWDT_NMI_DECODING
-#ifdef CONFIG_X86_LOCAL_APIC
-static void hpwdt_check_nmi_decoding(struct pci_dev *dev)
-{
-       /*
-        * If nmi_watchdog is turned off then we can turn on
-        * our nmi decoding capability.
-        */
-       hpwdt_nmi_decoding = 1;
-}
-#else
-static void hpwdt_check_nmi_decoding(struct pci_dev *dev)
-{
-       dev_warn(&dev->dev, "NMI decoding is disabled. "
-               "Your kernel does not support a NMI Watchdog.\n");
-}
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-/*
- *     dmi_find_icru
- *
- *     Routine Description:
- *     This function checks whether or not we are on an iCRU-based server.
- *     This check is independent of architecture and needs to be made for
- *     any ProLiant system.
- */
-static void dmi_find_icru(const struct dmi_header *dm, void *dummy)
-{
-       struct smbios_proliant_info *smbios_proliant_ptr;
-
-       if (dm->type == SMBIOS_ICRU_INFORMATION) {
-               smbios_proliant_ptr = (struct smbios_proliant_info *) dm;
-               if (smbios_proliant_ptr->misc_features & 0x01)
-                       is_icru = 1;
-               if (smbios_proliant_ptr->misc_features & 0x1400)
-                       is_uefi = 1;
-       }
-}
 
 static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
+#ifdef CONFIG_HPWDT_NMI_DECODING
        int retval;
-
-       /*
-        * On typical CRU-based systems we need to map that service in
-        * the BIOS. For 32 bit Operating Systems we need to go through
-        * the 32 Bit BIOS Service Directory. For 64 bit Operating
-        * Systems we get that service through SMBIOS.
-        *
-        * On systems that support the new iCRU service all we need to
-        * do is call dmi_walk to get the supported flag value and skip
-        * the old cru detect code.
-        */
-       dmi_walk(dmi_find_icru, NULL);
-       if (!is_icru && !is_uefi) {
-
-               /*
-               * We need to map the ROM to get the CRU service.
-               * For 32 bit Operating Systems we need to go through the 32 Bit
-               * BIOS Service Directory
-               * For 64 bit Operating Systems we get that service through SMBIOS.
-               */
-               retval = detect_cru_service();
-               if (retval < 0) {
-                       dev_warn(&dev->dev,
-                               "Unable to detect the %d Bit CRU Service.\n",
-                               HPWDT_ARCH);
-                       return retval;
-               }
-
-               /*
-               * We know this is the only CRU call we need to make so lets keep as
-               * few instructions as possible once the NMI comes in.
-               */
-               cmn_regs.u1.rah = 0x0D;
-               cmn_regs.u1.ral = 0x02;
-       }
-
        /*
         * Only one function can register for NMI_UNKNOWN
         */
@@ -780,44 +316,25 @@ static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
        dev_warn(&dev->dev,
                "Unable to register a die notifier (err=%d).\n",
                retval);
-       if (cru_rom_addr)
-               iounmap(cru_rom_addr);
        return retval;
+#endif /* CONFIG_HPWDT_NMI_DECODING */
+       return 0;
 }
 
 static void hpwdt_exit_nmi_decoding(void)
 {
+#ifdef CONFIG_HPWDT_NMI_DECODING
        unregister_nmi_handler(NMI_UNKNOWN, "hpwdt");
        unregister_nmi_handler(NMI_SERR, "hpwdt");
        unregister_nmi_handler(NMI_IO_CHECK, "hpwdt");
-       if (cru_rom_addr)
-               iounmap(cru_rom_addr);
-}
-#else /* !CONFIG_HPWDT_NMI_DECODING */
-static void hpwdt_check_nmi_decoding(struct pci_dev *dev)
-{
-}
-
-static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
-{
-       return 0;
+#endif
 }
 
-static void hpwdt_exit_nmi_decoding(void)
-{
-}
-#endif /* CONFIG_HPWDT_NMI_DECODING */
-
 static int hpwdt_init_one(struct pci_dev *dev,
                                        const struct pci_device_id *ent)
 {
        int retval;
 
-       /*
-        * Check if we can do NMI decoding or not
-        */
-       hpwdt_check_nmi_decoding(dev);
-
        /*
         * First let's find out if we are on an iLO2+ server. We will
         * not run on a legacy ASM box.
@@ -922,6 +439,6 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 #ifdef CONFIG_HPWDT_NMI_DECODING
 module_param(allow_kdump, int, 0);
 MODULE_PARM_DESC(allow_kdump, "Start a kernel dump after NMI occurs");
-#endif /* !CONFIG_HPWDT_NMI_DECODING */
+#endif /* CONFIG_HPWDT_NMI_DECODING */
 
 module_pci_driver(hpwdt_driver);
index 316c2eb122d23d335d738947a63fc2f9db2e4f1b..e8bd9887c56638aaf81659c45d7505c424266b55 100644 (file)
@@ -50,6 +50,7 @@
  */
 
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -159,7 +160,7 @@ static unsigned int sbsa_gwdt_get_timeleft(struct watchdog_device *wdd)
            !(readl(gwdt->control_base + SBSA_GWDT_WCS) & SBSA_GWDT_WCS_WS0))
                timeleft += readl(gwdt->control_base + SBSA_GWDT_WOR);
 
-       timeleft += readq(gwdt->control_base + SBSA_GWDT_WCV) -
+       timeleft += lo_hi_readq(gwdt->control_base + SBSA_GWDT_WCV) -
                    arch_counter_get_cntvct();
 
        do_div(timeleft, gwdt->clk);
index 6d1fbda0f461ca2d2304eaeb43aef35adfd77cf4..0da9943d405f8ff89efc87a0cf6df5ff43df9d6f 100644 (file)
@@ -392,7 +392,7 @@ static int wdat_wdt_probe(struct platform_device *pdev)
 
                memset(&r, 0, sizeof(r));
                r.start = gas->address;
-               r.end = r.start + gas->access_width;
+               r.end = r.start + gas->access_width - 1;
                if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
                        r.flags = IORESOURCE_MEM;
                } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
index 1ab4bd11f5f3f01f34b1a0055f836c9a0ce755ba..762378f1811cc9069dc6171edb55aaa3610b82fa 100644 (file)
@@ -755,8 +755,8 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
        mutex_unlock(&irq_mapping_update_lock);
        return irq;
 error_irq:
-       for (; i >= 0; i--)
-               __unbind_from_irq(irq + i);
+       while (nvec--)
+               __unbind_from_irq(irq + nvec);
        mutex_unlock(&irq_mapping_update_lock);
        return ret;
 }
index 156e5aea36db964dfaac58784044ca1cd0cf57c7..b1092fbefa6309d2535b17b78979b6f3fa9b2b42 100644 (file)
@@ -416,7 +416,7 @@ static int pvcalls_back_connect(struct xenbus_device *dev,
                                        sock);
        if (!map) {
                ret = -EFAULT;
-               sock_release(map->sock);
+               sock_release(sock);
        }
 
 out:
index aedbee3b28386a1cc1625334fe431875bd787277..2f11ca72a281410122ef0b4f0dc4f173ef7c6697 100644 (file)
@@ -73,20 +73,25 @@ struct sock_mapping {
                        wait_queue_head_t inflight_conn_req;
                } active;
                struct {
-               /* Socket status */
+               /*
+                * Socket status, needs to be 64-bit aligned due to the
+                * test_and_* functions which have this requirement on arm64.
+                */
 #define PVCALLS_STATUS_UNINITALIZED  0
 #define PVCALLS_STATUS_BIND          1
 #define PVCALLS_STATUS_LISTEN        2
-                       uint8_t status;
+                       uint8_t status __attribute__((aligned(8)));
                /*
                 * Internal state-machine flags.
                 * Only one accept operation can be inflight for a socket.
                 * Only one poll operation can be inflight for a given socket.
+                * flags needs to be 64-bit aligned due to the test_and_*
+                * functions which have this requirement on arm64.
                 */
 #define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
 #define PVCALLS_FLAG_POLL_INFLIGHT   1
 #define PVCALLS_FLAG_POLL_RET        2
-                       uint8_t flags;
+                       uint8_t flags __attribute__((aligned(8)));
                        uint32_t inflight_req_id;
                        struct sock_mapping *accept_map;
                        wait_queue_head_t inflight_accept_req;
index 74888cacd0b0bdcd250135e6436e0c1e39fc330f..ec9eb4fba59c7e88f746f01aa9fe66c2ed590482 100644 (file)
@@ -466,8 +466,11 @@ int xenbus_probe_node(struct xen_bus_type *bus,
 
        /* Register with generic device framework. */
        err = device_register(&xendev->dev);
-       if (err)
+       if (err) {
+               put_device(&xendev->dev);
+               xendev = NULL;
                goto fail;
+       }
 
        return 0;
 fail:
index a062d75109cb380aeb819fe42d43644c8be33a91..6bcd3fb5265ad13fba958c2de06d0f54232a5eba 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -68,9 +68,9 @@ struct aio_ring {
 #define AIO_RING_PAGES 8
 
 struct kioctx_table {
-       struct rcu_head rcu;
-       unsigned        nr;
-       struct kioctx   *table[];
+       struct rcu_head         rcu;
+       unsigned                nr;
+       struct kioctx __rcu     *table[];
 };
 
 struct kioctx_cpu {
@@ -115,7 +115,8 @@ struct kioctx {
        struct page             **ring_pages;
        long                    nr_pages;
 
-       struct work_struct      free_work;
+       struct rcu_head         free_rcu;
+       struct work_struct      free_work;      /* see free_ioctx() */
 
        /*
         * signals when all in-flight requests are done
@@ -329,7 +330,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
        for (i = 0; i < table->nr; i++) {
                struct kioctx *ctx;
 
-               ctx = table->table[i];
+               ctx = rcu_dereference(table->table[i]);
                if (ctx && ctx->aio_ring_file == file) {
                        if (!atomic_read(&ctx->dead)) {
                                ctx->user_id = ctx->mmap_base = vma->vm_start;
@@ -588,6 +589,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
        return cancel(&kiocb->common);
 }
 
+/*
+ * free_ioctx() should be RCU delayed to synchronize against the RCU
+ * protected lookup_ioctx() and also needs process context to call
+ * aio_free_ring(), so the double bouncing through kioctx->free_rcu and
+ * ->free_work.
+ */
 static void free_ioctx(struct work_struct *work)
 {
        struct kioctx *ctx = container_of(work, struct kioctx, free_work);
@@ -601,6 +608,14 @@ static void free_ioctx(struct work_struct *work)
        kmem_cache_free(kioctx_cachep, ctx);
 }
 
+static void free_ioctx_rcufn(struct rcu_head *head)
+{
+       struct kioctx *ctx = container_of(head, struct kioctx, free_rcu);
+
+       INIT_WORK(&ctx->free_work, free_ioctx);
+       schedule_work(&ctx->free_work);
+}
+
 static void free_ioctx_reqs(struct percpu_ref *ref)
 {
        struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
@@ -609,8 +624,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
        if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
                complete(&ctx->rq_wait->comp);
 
-       INIT_WORK(&ctx->free_work, free_ioctx);
-       schedule_work(&ctx->free_work);
+       /* Synchronize against RCU protected table->table[] dereferences */
+       call_rcu(&ctx->free_rcu, free_ioctx_rcufn);
 }
 
 /*
@@ -651,9 +666,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
        while (1) {
                if (table)
                        for (i = 0; i < table->nr; i++)
-                               if (!table->table[i]) {
+                               if (!rcu_access_pointer(table->table[i])) {
                                        ctx->id = i;
-                                       table->table[i] = ctx;
+                                       rcu_assign_pointer(table->table[i], ctx);
                                        spin_unlock(&mm->ioctx_lock);
 
                                        /* While kioctx setup is in progress,
@@ -834,11 +849,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
        }
 
        table = rcu_dereference_raw(mm->ioctx_table);
-       WARN_ON(ctx != table->table[ctx->id]);
-       table->table[ctx->id] = NULL;
+       WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
+       RCU_INIT_POINTER(table->table[ctx->id], NULL);
        spin_unlock(&mm->ioctx_lock);
 
-       /* percpu_ref_kill() will do the necessary call_rcu() */
+       /* free_ioctx_reqs() will do the necessary RCU synchronization */
        wake_up_all(&ctx->wait);
 
        /*
@@ -880,7 +895,8 @@ void exit_aio(struct mm_struct *mm)
 
        skipped = 0;
        for (i = 0; i < table->nr; ++i) {
-               struct kioctx *ctx = table->table[i];
+               struct kioctx *ctx =
+                       rcu_dereference_protected(table->table[i], true);
 
                if (!ctx) {
                        skipped++;
@@ -1069,7 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
        if (!table || id >= table->nr)
                goto out;
 
-       ctx = table->table[id];
+       ctx = rcu_dereference(table->table[id]);
        if (ctx && ctx->user_id == ctx_id) {
                percpu_ref_get(&ctx->users);
                ret = ctx;
index 4a181fcb51751dc2cbc8fda10930a47bc883380e..fe09ef9c21f349a55d31261b5dd416b1bde2a3aa 100644 (file)
@@ -1058,6 +1058,27 @@ static int bd_prepare_to_claim(struct block_device *bdev,
        return 0;
 }
 
+static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
+{
+       struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
+
+       if (!disk)
+               return NULL;
+       /*
+        * Now that we hold gendisk reference we make sure bdev we looked up is
+        * not stale. If it is, it means device got removed and created before
+        * we looked up gendisk and we fail open in such case. Associating
+        * unhashed bdev with newly created gendisk could lead to two bdevs
+        * (and thus two independent caches) being associated with one device
+        * which is bad.
+        */
+       if (inode_unhashed(bdev->bd_inode)) {
+               put_disk_and_module(disk);
+               return NULL;
+       }
+       return disk;
+}
+
 /**
  * bd_start_claiming - start claiming a block device
  * @bdev: block device of interest
@@ -1094,7 +1115,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
         * @bdev might not have been initialized properly yet, look up
         * and grab the outer block device the hard way.
         */
-       disk = get_gendisk(bdev->bd_dev, &partno);
+       disk = bdev_get_gendisk(bdev, &partno);
        if (!disk)
                return ERR_PTR(-ENXIO);
 
@@ -1111,8 +1132,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
        else
                whole = bdgrab(bdev);
 
-       module_put(disk->fops->owner);
-       put_disk(disk);
+       put_disk_and_module(disk);
        if (!whole)
                return ERR_PTR(-ENOMEM);
 
@@ -1407,10 +1427,10 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
 static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 {
        struct gendisk *disk;
-       struct module *owner;
        int ret;
        int partno;
        int perm = 0;
+       bool first_open = false;
 
        if (mode & FMODE_READ)
                perm |= MAY_READ;
@@ -1430,14 +1450,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  restart:
 
        ret = -ENXIO;
-       disk = get_gendisk(bdev->bd_dev, &partno);
+       disk = bdev_get_gendisk(bdev, &partno);
        if (!disk)
                goto out;
-       owner = disk->fops->owner;
 
        disk_block_events(disk);
        mutex_lock_nested(&bdev->bd_mutex, for_part);
        if (!bdev->bd_openers) {
+               first_open = true;
                bdev->bd_disk = disk;
                bdev->bd_queue = disk->queue;
                bdev->bd_contains = bdev;
@@ -1463,8 +1483,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                        bdev->bd_queue = NULL;
                                        mutex_unlock(&bdev->bd_mutex);
                                        disk_unblock_events(disk);
-                                       put_disk(disk);
-                                       module_put(owner);
+                                       put_disk_and_module(disk);
                                        goto restart;
                                }
                        }
@@ -1524,15 +1543,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                        if (ret)
                                goto out_unlock_bdev;
                }
-               /* only one opener holds refs to the module and disk */
-               put_disk(disk);
-               module_put(owner);
        }
        bdev->bd_openers++;
        if (for_part)
                bdev->bd_part_count++;
        mutex_unlock(&bdev->bd_mutex);
        disk_unblock_events(disk);
+       /* only one opener holds refs to the module and disk */
+       if (!first_open)
+               put_disk_and_module(disk);
        return 0;
 
  out_clear:
@@ -1546,8 +1565,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  out_unlock_bdev:
        mutex_unlock(&bdev->bd_mutex);
        disk_unblock_events(disk);
-       put_disk(disk);
-       module_put(owner);
+       put_disk_and_module(disk);
  out:
        bdput(bdev);
 
@@ -1770,8 +1788,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                        disk->fops->release(disk, mode);
        }
        if (!bdev->bd_openers) {
-               struct module *owner = disk->fops->owner;
-
                disk_put_part(bdev->bd_part);
                bdev->bd_part = NULL;
                bdev->bd_disk = NULL;
@@ -1779,8 +1795,7 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                        victim = bdev->bd_contains;
                bdev->bd_contains = NULL;
 
-               put_disk(disk);
-               module_put(owner);
+               put_disk_and_module(disk);
        }
        mutex_unlock(&bdev->bd_mutex);
        bdput(bdev);
index f94b2d8c744a1bfee2dee175ee8f8453aa1e0e8f..26484648d0903298cbb68bff095232873e94673a 100644 (file)
@@ -1519,6 +1519,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
                if (!node)
                        break;
                bytenr = node->val;
+               shared.share_count = 0;
                cond_resched();
        }
 
index 1a462ab85c49888a3962b026a74ede8c47182fc1..da308774b8a4538c4bbea595a11b49a2a8c5ca5e 100644 (file)
@@ -2974,7 +2974,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
        kfree(fs_info->super_copy);
        kfree(fs_info->super_for_commit);
        security_free_mnt_opts(&fs_info->security_opts);
-       kfree(fs_info);
+       kvfree(fs_info);
 }
 
 /* tree mod log functions from ctree.c */
@@ -3095,7 +3095,10 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
                          u64 inode_objectid, u64 ref_objectid, int ins_len,
                          int cow);
 
-int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
+int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot,
+                              const char *name,
+                              int name_len, struct btrfs_inode_ref **ref_ret);
+int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
                                   u64 ref_objectid, const char *name,
                                   int name_len,
                                   struct btrfs_inode_extref **extref_ret);
index 39c968f801572ec004223893d44525c64bb9ecc0..65e1a76bf7557441b22b16d4765a531b18b23562 100644 (file)
 #include "transaction.h"
 #include "print-tree.h"
 
-static int find_name_in_backref(struct btrfs_path *path, const char *name,
-                        int name_len, struct btrfs_inode_ref **ref_ret)
+int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot,
+                              const char *name,
+                              int name_len, struct btrfs_inode_ref **ref_ret)
 {
-       struct extent_buffer *leaf;
        struct btrfs_inode_ref *ref;
        unsigned long ptr;
        unsigned long name_ptr;
@@ -33,9 +33,8 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
        u32 cur_offset = 0;
        int len;
 
-       leaf = path->nodes[0];
-       item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       item_size = btrfs_item_size_nr(leaf, slot);
+       ptr = btrfs_item_ptr_offset(leaf, slot);
        while (cur_offset < item_size) {
                ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
                len = btrfs_inode_ref_name_len(leaf, ref);
@@ -44,18 +43,19 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
                if (len != name_len)
                        continue;
                if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
-                       *ref_ret = ref;
+                       if (ref_ret)
+                               *ref_ret = ref;
                        return 1;
                }
        }
        return 0;
 }
 
-int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
+int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
+                                  u64 ref_objectid,
                                   const char *name, int name_len,
                                   struct btrfs_inode_extref **extref_ret)
 {
-       struct extent_buffer *leaf;
        struct btrfs_inode_extref *extref;
        unsigned long ptr;
        unsigned long name_ptr;
@@ -63,9 +63,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
        u32 cur_offset = 0;
        int ref_name_len;
 
-       leaf = path->nodes[0];
-       item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       item_size = btrfs_item_size_nr(leaf, slot);
+       ptr = btrfs_item_ptr_offset(leaf, slot);
 
        /*
         * Search all extended backrefs in this item. We're only
@@ -113,7 +112,9 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
                return ERR_PTR(ret);
        if (ret > 0)
                return NULL;
-       if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref))
+       if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
+                                           ref_objectid, name, name_len,
+                                           &extref))
                return NULL;
        return extref;
 }
@@ -155,7 +156,8 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
         * This should always succeed so error here will make the FS
         * readonly.
         */
-       if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
+       if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
+                                           ref_objectid,
                                            name, name_len, &extref)) {
                btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
                ret = -EROFS;
@@ -225,7 +227,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
        } else if (ret < 0) {
                goto out;
        }
-       if (!find_name_in_backref(path, name, name_len, &ref)) {
+       if (!btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+                                       name, name_len, &ref)) {
                ret = -ENOENT;
                search_ext_refs = 1;
                goto out;
@@ -293,7 +296,9 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
        ret = btrfs_insert_empty_item(trans, root, path, &key,
                                      ins_len);
        if (ret == -EEXIST) {
-               if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+               if (btrfs_find_name_in_ext_backref(path->nodes[0],
+                                                  path->slots[0],
+                                                  ref_objectid,
                                                   name, name_len, NULL))
                        goto out;
 
@@ -351,7 +356,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
        if (ret == -EEXIST) {
                u32 old_size;
 
-               if (find_name_in_backref(path, name, name_len, &ref))
+               if (btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+                                              name, name_len, &ref))
                        goto out;
 
                old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
@@ -365,7 +371,9 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
                ret = 0;
        } else if (ret < 0) {
                if (ret == -EOVERFLOW) {
-                       if (find_name_in_backref(path, name, name_len, &ref))
+                       if (btrfs_find_name_in_backref(path->nodes[0],
+                                                      path->slots[0],
+                                                      name, name_len, &ref))
                                ret = -EEXIST;
                        else
                                ret = -EMLINK;
index a79299a89b7d0285328e29cecaaff51ff82c5be7..f53470112670b2de73343e635e4304fd08816468 100644 (file)
@@ -2043,12 +2043,15 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
                             struct inode *inode, struct list_head *list)
 {
        struct btrfs_ordered_sum *sum;
+       int ret;
 
        list_for_each_entry(sum, list, list) {
                trans->adding_csums = true;
-               btrfs_csum_file_blocks(trans,
+               ret = btrfs_csum_file_blocks(trans,
                       BTRFS_I(inode)->root->fs_info->csum_root, sum);
                trans->adding_csums = false;
+               if (ret)
+                       return ret;
        }
        return 0;
 }
@@ -3062,7 +3065,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                goto out;
        }
 
-       add_pending_csums(trans, inode, &ordered_extent->list);
+       ret = add_pending_csums(trans, inode, &ordered_extent->list);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out;
+       }
 
        btrfs_ordered_update_i_size(inode, 0, ordered_extent);
        ret = btrfs_update_inode_fallback(trans, root, inode);
index dec0907dfb8a128fe368a951a76f25cc34563702..fcfc20de2df395bfd70aa5541ad34689e72f8deb 100644 (file)
@@ -1370,6 +1370,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
                stripe_start = stripe->physical;
                if (physical >= stripe_start &&
                    physical < stripe_start + rbio->stripe_len &&
+                   stripe->dev->bdev &&
                    bio->bi_disk == stripe->dev->bdev->bd_disk &&
                    bio->bi_partno == stripe->dev->bdev->bd_partno) {
                        return i;
index f0c3f00e97cbe76e1fa8484efc8933842856c8d5..cd2298d185dd121bd1412e571a07952c343b0ab5 100644 (file)
@@ -3268,8 +3268,22 @@ static int relocate_file_extent_cluster(struct inode *inode,
                        nr++;
                }
 
-               btrfs_set_extent_delalloc(inode, page_start, page_end, 0, NULL,
-                                         0);
+               ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
+                                               NULL, 0);
+               if (ret) {
+                       unlock_page(page);
+                       put_page(page);
+                       btrfs_delalloc_release_metadata(BTRFS_I(inode),
+                                                        PAGE_SIZE);
+                       btrfs_delalloc_release_extents(BTRFS_I(inode),
+                                                      PAGE_SIZE);
+
+                       clear_extent_bits(&BTRFS_I(inode)->io_tree,
+                                         page_start, page_end,
+                                         EXTENT_LOCKED | EXTENT_BOUNDARY);
+                       goto out;
+
+               }
                set_page_dirty(page);
 
                unlock_extent(&BTRFS_I(inode)->io_tree,
index f306c608dc2880f1811905e033c1219dd5e356cc..484e2af793de2a86fa206196661d47cda078f0c2 100644 (file)
@@ -5005,6 +5005,9 @@ static int send_hole(struct send_ctx *sctx, u64 end)
        u64 len;
        int ret = 0;
 
+       if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
+               return send_update_extent(sctx, offset, end - offset);
+
        p = fs_path_alloc();
        if (!p)
                return -ENOMEM;
index 6e71a2a783630ac15d2d910fd0613347c4a260ac..4b817947e00f39e327a244f67bbde7ebf2d6b7e0 100644 (file)
@@ -1545,7 +1545,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
         * it for searching for existing supers, so this lets us do that and
         * then open_ctree will properly initialize everything later.
         */
-       fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
+       fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
        if (!fs_info) {
                error = -ENOMEM;
                goto error_sec_opts;
index 4fd19b4d667557f8b45b1ec61ef48f79ebb5f1cf..434457794c279463872a593adf600783f82390cf 100644 (file)
@@ -967,7 +967,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
        ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
 
        if (key->type == BTRFS_INODE_EXTREF_KEY) {
-               if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+               if (btrfs_find_name_in_ext_backref(path->nodes[0],
+                                                  path->slots[0],
+                                                  ref_objectid,
                                                   name, namelen, NULL))
                        match = 1;
 
@@ -1191,7 +1193,8 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
        read_extent_buffer(eb, *name, (unsigned long)&extref->name,
                           *namelen);
 
-       *index = btrfs_inode_extref_index(eb, extref);
+       if (index)
+               *index = btrfs_inode_extref_index(eb, extref);
        if (parent_objectid)
                *parent_objectid = btrfs_inode_extref_parent(eb, extref);
 
@@ -1212,11 +1215,101 @@ static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
 
        read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
 
-       *index = btrfs_inode_ref_index(eb, ref);
+       if (index)
+               *index = btrfs_inode_ref_index(eb, ref);
 
        return 0;
 }
 
+/*
+ * Take an inode reference item from the log tree and iterate all names from the
+ * inode reference item in the subvolume tree with the same key (if it exists).
+ * For any name that is not in the inode reference item from the log tree, do a
+ * proper unlink of that name (that is, remove its entry from the inode
+ * reference item and both dir index keys).
+ */
+static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                struct btrfs_inode *inode,
+                                struct extent_buffer *log_eb,
+                                int log_slot,
+                                struct btrfs_key *key)
+{
+       int ret;
+       unsigned long ref_ptr;
+       unsigned long ref_end;
+       struct extent_buffer *eb;
+
+again:
+       btrfs_release_path(path);
+       ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+       if (ret > 0) {
+               ret = 0;
+               goto out;
+       }
+       if (ret < 0)
+               goto out;
+
+       eb = path->nodes[0];
+       ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
+       ref_end = ref_ptr + btrfs_item_size_nr(eb, path->slots[0]);
+       while (ref_ptr < ref_end) {
+               char *name = NULL;
+               int namelen;
+               u64 parent_id;
+
+               if (key->type == BTRFS_INODE_EXTREF_KEY) {
+                       ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
+                                               NULL, &parent_id);
+               } else {
+                       parent_id = key->offset;
+                       ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
+                                            NULL);
+               }
+               if (ret)
+                       goto out;
+
+               if (key->type == BTRFS_INODE_EXTREF_KEY)
+                       ret = btrfs_find_name_in_ext_backref(log_eb, log_slot,
+                                                            parent_id, name,
+                                                            namelen, NULL);
+               else
+                       ret = btrfs_find_name_in_backref(log_eb, log_slot, name,
+                                                        namelen, NULL);
+
+               if (!ret) {
+                       struct inode *dir;
+
+                       btrfs_release_path(path);
+                       dir = read_one_inode(root, parent_id);
+                       if (!dir) {
+                               ret = -ENOENT;
+                               kfree(name);
+                               goto out;
+                       }
+                       ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
+                                                inode, name, namelen);
+                       kfree(name);
+                       iput(dir);
+                       if (ret)
+                               goto out;
+                       goto again;
+               }
+
+               kfree(name);
+               ref_ptr += namelen;
+               if (key->type == BTRFS_INODE_EXTREF_KEY)
+                       ref_ptr += sizeof(struct btrfs_inode_extref);
+               else
+                       ref_ptr += sizeof(struct btrfs_inode_ref);
+       }
+       ret = 0;
+ out:
+       btrfs_release_path(path);
+       return ret;
+}
+
 /*
  * replay one inode back reference item found in the log tree.
  * eb, slot and key refer to the buffer and key found in the log tree.
@@ -1345,6 +1438,19 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
                }
        }
 
+       /*
+        * Before we overwrite the inode reference item in the subvolume tree
+        * with the item from the log tree, we must unlink all names from the
+        * parent directory that are in the subvolume's tree inode reference
+        * item, otherwise we end up with an inconsistent subvolume tree where
+        * dir index entries exist for a name but there is no inode reference
+        * item with the same name.
+        */
+       ret = unlink_old_inode_refs(trans, root, path, BTRFS_I(inode), eb, slot,
+                                   key);
+       if (ret)
+               goto out;
+
        /* finally write the back reference in the inode */
        ret = overwrite_item(trans, root, path, eb, slot, key);
 out:
@@ -5853,7 +5959,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
         * this will force the logging code to walk the dentry chain
         * up for the file
         */
-       if (S_ISREG(inode->vfs_inode.i_mode))
+       if (!S_ISDIR(inode->vfs_inode.i_mode))
                inode->last_unlink_trans = trans->transid;
 
        /*
index 2ceb924ca0d630334ab81c33b24eef97194b5874..b2d05c6b1c5672a638ba8752333c10508e366885 100644 (file)
@@ -4829,10 +4829,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        ndevs = min(ndevs, devs_max);
 
        /*
-        * the primary goal is to maximize the number of stripes, so use as many
-        * devices as possible, even if the stripes are not maximum sized.
+        * The primary goal is to maximize the number of stripes, so use as
+        * many devices as possible, even if the stripes are not maximum sized.
+        *
+        * The DUP profile stores more than one stripe per device, the
+        * max_avail is the total size so we have to adjust.
         */
-       stripe_size = devices_info[ndevs-1].max_avail;
+       stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes);
        num_stripes = ndevs * dev_stripes;
 
        /*
@@ -4867,8 +4870,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                        stripe_size = devices_info[ndevs-1].max_avail;
        }
 
-       stripe_size = div_u64(stripe_size, dev_stripes);
-
        /* align to BTRFS_STRIPE_LEN */
        stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN);
 
index 6582c4507e6c9d1fdf2876c13c424f0c5830495c..0e5bd3e3344e7983e6bdf38dba1adfba17500eba 100644 (file)
@@ -3964,6 +3964,32 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
                ceph_check_caps(ci, 0, NULL);
 }
 
+/*
+ * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it
+ * looks like the link count will hit 0, drop any other caps (other
+ * than PIN) we don't specifically want (due to the file still being
+ * open).
+ */
+int ceph_drop_caps_for_unlink(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
+
+       spin_lock(&ci->i_ceph_lock);
+       if (inode->i_nlink == 1) {
+               drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
+
+               ci->i_ceph_flags |= CEPH_I_NODELAY;
+               if (__ceph_caps_dirty(ci)) {
+                       struct ceph_mds_client *mdsc =
+                               ceph_inode_to_client(inode)->mdsc;
+                       __cap_delay_requeue_front(mdsc, ci);
+               }
+       }
+       spin_unlock(&ci->i_ceph_lock);
+       return drop;
+}
+
 /*
  * Helpers for embedding cap and dentry lease releases into mds
  * requests.
index 0c4346806e17a6f9a70e35fdafab13440c426d65..f1d9c6cc0491d7f9f33e4073db9785afac669cc8 100644 (file)
@@ -1002,26 +1002,6 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
        return err;
 }
 
-/*
- * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps.  If it
- * looks like the link count will hit 0, drop any other caps (other
- * than PIN) we don't specifically want (due to the file still being
- * open).
- */
-static int drop_caps_for_unlink(struct inode *inode)
-{
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
-
-       spin_lock(&ci->i_ceph_lock);
-       if (inode->i_nlink == 1) {
-               drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
-               ci->i_ceph_flags |= CEPH_I_NODELAY;
-       }
-       spin_unlock(&ci->i_ceph_lock);
-       return drop;
-}
-
 /*
  * rmdir and unlink are differ only by the metadata op code
  */
@@ -1056,7 +1036,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
        set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-       req->r_inode_drop = drop_caps_for_unlink(inode);
+       req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
        err = ceph_mdsc_do_request(mdsc, dir, req);
        if (!err && !req->r_reply_info.head->is_dentry)
                d_delete(dentry);
@@ -1104,8 +1084,10 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
        /* release LINK_RDCACHE on source inode (mds will lock it) */
        req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
-       if (d_really_is_positive(new_dentry))
-               req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry));
+       if (d_really_is_positive(new_dentry)) {
+               req->r_inode_drop =
+                       ceph_drop_caps_for_unlink(d_inode(new_dentry));
+       }
        err = ceph_mdsc_do_request(mdsc, old_dir, req);
        if (!err && !req->r_reply_info.head->is_dentry) {
                /*
index a62d2a9841dc2b0487181155373c03eac60f8a02..fb2bc9c15a2378ceb5712269ab93e677c360f5d0 100644 (file)
@@ -225,6 +225,7 @@ static int parse_fsopt_token(char *c, void *private)
                        return -ENOMEM;
                break;
        case Opt_mds_namespace:
+               kfree(fsopt->mds_namespace);
                fsopt->mds_namespace = kstrndup(argstr[0].from,
                                                argstr[0].to-argstr[0].from,
                                                GFP_KERNEL);
@@ -232,6 +233,7 @@ static int parse_fsopt_token(char *c, void *private)
                        return -ENOMEM;
                break;
        case Opt_fscache_uniq:
+               kfree(fsopt->fscache_uniq);
                fsopt->fscache_uniq = kstrndup(argstr[0].from,
                                               argstr[0].to-argstr[0].from,
                                               GFP_KERNEL);
@@ -711,14 +713,17 @@ static int __init init_caches(void)
                goto bad_dentry;
 
        ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD);
-
        if (!ceph_file_cachep)
                goto bad_file;
 
-       if ((error = ceph_fscache_register()))
-               goto bad_file;
+       error = ceph_fscache_register();
+       if (error)
+               goto bad_fscache;
 
        return 0;
+
+bad_fscache:
+       kmem_cache_destroy(ceph_file_cachep);
 bad_file:
        kmem_cache_destroy(ceph_dentry_cachep);
 bad_dentry:
@@ -836,7 +841,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
        int err;
        unsigned long started = jiffies;  /* note the start time */
        struct dentry *root;
-       int first = 0;   /* first vfsmount for this super_block */
 
        dout("mount start %p\n", fsc);
        mutex_lock(&fsc->client->mount_mutex);
@@ -861,17 +865,17 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
                        path = fsc->mount_options->server_path + 1;
                        dout("mount opening path %s\n", path);
                }
+
+               err = ceph_fs_debugfs_init(fsc);
+               if (err < 0)
+                       goto out;
+
                root = open_root_dentry(fsc, path, started);
                if (IS_ERR(root)) {
                        err = PTR_ERR(root);
                        goto out;
                }
                fsc->sb->s_root = dget(root);
-               first = 1;
-
-               err = ceph_fs_debugfs_init(fsc);
-               if (err < 0)
-                       goto fail;
        } else {
                root = dget(fsc->sb->s_root);
        }
@@ -881,11 +885,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
        mutex_unlock(&fsc->client->mount_mutex);
        return root;
 
-fail:
-       if (first) {
-               dput(fsc->sb->s_root);
-               fsc->sb->s_root = NULL;
-       }
 out:
        mutex_unlock(&fsc->client->mount_mutex);
        return ERR_PTR(err);
index 21b2e5b004eb72ba10057df37c6907766aed5993..1c2086e0fec27a60577c7a676b00fac67fb11102 100644 (file)
@@ -987,7 +987,7 @@ extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
                            struct ceph_mds_session *session);
 extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
 extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
-
+extern int  ceph_drop_caps_for_unlink(struct inode *inode);
 extern int ceph_encode_inode_release(void **p, struct inode *inode,
                                     int mds, int drop, int unless, int force);
 extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
index 7c38f39958bc371d0324197968a732d68da927d8..8945e6cabd93f7ce42709f93b90db94dceb74273 100644 (file)
@@ -647,11 +647,16 @@ static inline struct dentry *lock_parent(struct dentry *dentry)
                spin_unlock(&parent->d_lock);
                goto again;
        }
-       rcu_read_unlock();
-       if (parent != dentry)
+       if (parent != dentry) {
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-       else
+               if (unlikely(dentry->d_lockref.count < 0)) {
+                       spin_unlock(&parent->d_lock);
+                       parent = NULL;
+               }
+       } else {
                parent = NULL;
+       }
+       rcu_read_unlock();
        return parent;
 }
 
@@ -2474,7 +2479,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
 
 retry:
        rcu_read_lock();
-       seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
+       seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
        r_seq = read_seqbegin(&rename_lock);
        dentry = __d_lookup_rcu(parent, name, &d_seq);
        if (unlikely(dentry)) {
@@ -2495,8 +2500,14 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
                rcu_read_unlock();
                goto retry;
        }
+
+       if (unlikely(seq & 1)) {
+               rcu_read_unlock();
+               goto retry;
+       }
+
        hlist_bl_lock(b);
-       if (unlikely(parent->d_inode->i_dir_seq != seq)) {
+       if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
                hlist_bl_unlock(b);
                rcu_read_unlock();
                goto retry;
index a0ca9e48e9937da671739e24d6d7dc2a4867d7ac..1357ef563893a1a8f0d2967eeb4b6e7b0ee6444a 100644 (file)
@@ -1274,8 +1274,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
         */
        if (dio->is_async && iov_iter_rw(iter) == WRITE) {
                retval = 0;
-               if ((iocb->ki_filp->f_flags & O_DSYNC) ||
-                   IS_SYNC(iocb->ki_filp->f_mapping->host))
+               if (iocb->ki_flags & IOCB_DSYNC)
                        retval = dio_set_defer_completion(dio);
                else if (!dio->inode->i_sb->s_dio_done_wq) {
                        /*
index 86d6a4435c87c31fa27b1ed5b194c2aa626cc801..51f940e76c5e329b5d3e43b07a84485e0f512c6c 100644 (file)
@@ -807,9 +807,6 @@ int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
                        iomap->length = hole_size(inode, lblock, &mp);
                else
                        iomap->length = size - pos;
-       } else {
-               if (height <= ip->i_height)
-                       iomap->length = hole_size(inode, lblock, &mp);
        }
        goto out_release;
 }
index 8fe1b0aa2896bc547955b8760bffea0ec326d7e8..b9a254dcc0e77e72873b9b49cf49787bd2154678 100644 (file)
@@ -108,6 +108,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
        pagevec_reinit(pvec);
 }
 
+/*
+ * Mask used when checking the page offset value passed in via system
+ * calls.  This value will be converted to a loff_t which is signed.
+ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
+ * value.  The extra bit (- 1 in the shift value) is to take the sign
+ * bit into account.
+ */
+#define PGOFF_LOFFT_MAX \
+       (((1UL << (PAGE_SHIFT + 1)) - 1) <<  (BITS_PER_LONG - (PAGE_SHIFT + 1)))
+
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct inode *inode = file_inode(file);
@@ -127,12 +137,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        vma->vm_ops = &hugetlb_vm_ops;
 
        /*
-        * Offset passed to mmap (before page shift) could have been
-        * negative when represented as a (l)off_t.
+        * page based offset in vm_pgoff could be sufficiently large to
+        * overflow a (l)off_t when converted to byte offset.
         */
-       if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
+       if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
                return -EINVAL;
 
+       /* must be huge page aligned */
        if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
                return -EINVAL;
 
index 9c36d614bf89602121427c27d443365689d31aac..2dee4e03ff1ccf468e0c8447c25197ceee69340f 100644 (file)
@@ -709,6 +709,7 @@ static struct pernet_operations lockd_net_ops = {
        .exit = lockd_exit_net,
        .id = &lockd_net_id,
        .size = sizeof(struct lockd_net),
+       .async = true,
 };
 
 
index 921ae32dbc8053813e70b2a6454bf7dc4f85306a..cafa365eeb70a55215806875685bd9ff047884cd 100644 (file)
@@ -559,9 +559,10 @@ static int __nd_alloc_stack(struct nameidata *nd)
 static bool path_connected(const struct path *path)
 {
        struct vfsmount *mnt = path->mnt;
+       struct super_block *sb = mnt->mnt_sb;
 
-       /* Only bind mounts can have disconnected paths */
-       if (mnt->mnt_root == mnt->mnt_sb->s_root)
+       /* Bind mounts and multi-root filesystems can have disconnected paths */
+       if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root))
                return true;
 
        return is_subdir(path->dentry, mnt->mnt_root);
index 2435af56b87e4472a725f7ef1c6e3b252dd82ae9..a50d7813e3ea8dd03dd7c5e02b36574be815ad8b 100644 (file)
@@ -572,7 +572,7 @@ __be32 nfs4_callback_sequence(void *argp, void *resp,
 }
 
 static bool
-validate_bitmap_values(unsigned long mask)
+validate_bitmap_values(unsigned int mask)
 {
        return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
 }
@@ -596,17 +596,15 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
                goto out;
 
        status = cpu_to_be32(NFS4_OK);
-       if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
-                    &args->craa_type_mask))
+       if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_RDATA_DLG))
                flags = FMODE_READ;
-       if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
-                    &args->craa_type_mask))
+       if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_WDATA_DLG))
                flags |= FMODE_WRITE;
-       if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
-                    &args->craa_type_mask))
-               pnfs_recall_all_layouts(cps->clp);
        if (flags)
                nfs_expire_unused_delegation_types(cps->clp, flags);
+
+       if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT))
+               pnfs_recall_all_layouts(cps->clp);
 out:
        dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
        return status;
index 8c10b0562e75d2fb0b3c92fe0628d3a79fede48b..621c517b325c664a81a609483ab7b9830f12e25a 100644 (file)
@@ -86,10 +86,10 @@ struct nfs_direct_req {
        struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
        int                     mirror_count;
 
+       loff_t                  io_start;       /* Start offset for I/O */
        ssize_t                 count,          /* bytes actually processed */
                                max_count,      /* max expected count */
                                bytes_left,     /* bytes left to be sent */
-                               io_start,       /* start of IO */
                                error;          /* any reported error */
        struct completion       completion;     /* wait for i/o completion */
 
index 7d893543cf3b37acdb7afe7ad423cecb63204697..6c3083c992e56a207f07226ef82febde24dac6f3 100644 (file)
@@ -2122,6 +2122,7 @@ static struct pernet_operations nfs_net_ops = {
        .exit = nfs_net_exit,
        .id   = &nfs_net_id,
        .size = sizeof(struct nfs_net),
+       .async = true,
 };
 
 /*
index 49f848fd1f04779108d86a5eee618e21d11304e6..7327930ad970ab98338e8f21a4fb363559205522 100644 (file)
@@ -873,7 +873,7 @@ static void nfs3_nlm_release_call(void *data)
        }
 }
 
-const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
+static const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
        .nlmclnt_alloc_call = nfs3_nlm_alloc_call,
        .nlmclnt_unlock_prepare = nfs3_nlm_unlock_prepare,
        .nlmclnt_release_call = nfs3_nlm_release_call,
index 04612c24d39431b15ff58027a7ebfe7b5450d774..979631411a0e4ea5a7eedc3c668d36a57c20ced6 100644 (file)
@@ -868,8 +868,10 @@ static int nfs4_set_client(struct nfs_server *server,
        if (IS_ERR(clp))
                return PTR_ERR(clp);
 
-       if (server->nfs_client == clp)
+       if (server->nfs_client == clp) {
+               nfs_put_client(clp);
                return -ELOOP;
+       }
 
        /*
         * Query for the lease time on clientid setup or renewal
@@ -1244,11 +1246,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
                                clp->cl_proto, clnt->cl_timeout,
                                clp->cl_minorversion, net);
        clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
-       nfs_put_client(clp);
        if (error != 0) {
                nfs_server_insert_lists(server);
                return error;
        }
+       nfs_put_client(clp);
 
        if (server->nfs_client->cl_hostname == NULL)
                server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
index c13e826614b5798ccd7628398944c140ae07f983..ee723aa153a3300633bc434f52156f3cf443f3dd 100644 (file)
@@ -292,8 +292,11 @@ pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
 void
 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
 {
-       struct inode *inode = lo->plh_inode;
+       struct inode *inode;
 
+       if (!lo)
+               return;
+       inode = lo->plh_inode;
        pnfs_layoutreturn_before_put_layout_hdr(lo);
 
        if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
@@ -1241,10 +1244,12 @@ bool pnfs_roc(struct inode *ino,
        spin_lock(&ino->i_lock);
        lo = nfsi->layout;
        if (!lo || !pnfs_layout_is_valid(lo) ||
-           test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+           test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+               lo = NULL;
                goto out_noroc;
+       }
+       pnfs_get_layout_hdr(lo);
        if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
-               pnfs_get_layout_hdr(lo);
                spin_unlock(&ino->i_lock);
                wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
                                TASK_UNINTERRUPTIBLE);
@@ -1312,10 +1317,12 @@ bool pnfs_roc(struct inode *ino,
                struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
                if (ld->prepare_layoutreturn)
                        ld->prepare_layoutreturn(args);
+               pnfs_put_layout_hdr(lo);
                return true;
        }
        if (layoutreturn)
                pnfs_send_layoutreturn(lo, &stateid, iomode, true);
+       pnfs_put_layout_hdr(lo);
        return false;
 }
 
index 29bacdc56f6a9fcf83225844360088e180d32ad7..5e470e233c83d242856322bb27c86e50d206cfe8 100644 (file)
@@ -2631,6 +2631,8 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
                /* initial superblock/root creation */
                mount_info->fill_super(s, mount_info);
                nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
+               if (!(server->flags & NFS_MOUNT_UNSHARED))
+                       s->s_iflags |= SB_I_MULTIROOT;
        }
 
        mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
index 7428a669d7a77b5fd82a7b8da348308ac058d155..e7d8ceae8f26b4ad5f6b9c68df14f13d858ab308 100644 (file)
@@ -1876,40 +1876,43 @@ int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
        return status;
 }
 
-int nfs_commit_inode(struct inode *inode, int how)
+static int __nfs_commit_inode(struct inode *inode, int how,
+               struct writeback_control *wbc)
 {
        LIST_HEAD(head);
        struct nfs_commit_info cinfo;
        int may_wait = how & FLUSH_SYNC;
-       int error = 0;
-       int res;
+       int ret, nscan;
 
        nfs_init_cinfo_from_inode(&cinfo, inode);
        nfs_commit_begin(cinfo.mds);
-       res = nfs_scan_commit(inode, &head, &cinfo);
-       if (res)
-               error = nfs_generic_commit_list(inode, &head, how, &cinfo);
+       for (;;) {
+               ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
+               if (ret <= 0)
+                       break;
+               ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
+               if (ret < 0)
+                       break;
+               ret = 0;
+               if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
+                       if (nscan < wbc->nr_to_write)
+                               wbc->nr_to_write -= nscan;
+                       else
+                               wbc->nr_to_write = 0;
+               }
+               if (nscan < INT_MAX)
+                       break;
+               cond_resched();
+       }
        nfs_commit_end(cinfo.mds);
-       if (res == 0)
-               return res;
-       if (error < 0)
-               goto out_error;
-       if (!may_wait)
-               goto out_mark_dirty;
-       error = wait_on_commit(cinfo.mds);
-       if (error < 0)
-               return error;
-       return res;
-out_error:
-       res = error;
-       /* Note: If we exit without ensuring that the commit is complete,
-        * we must mark the inode as dirty. Otherwise, future calls to
-        * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
-        * that the data is on the disk.
-        */
-out_mark_dirty:
-       __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-       return res;
+       if (ret || !may_wait)
+               return ret;
+       return wait_on_commit(cinfo.mds);
+}
+
+int nfs_commit_inode(struct inode *inode, int how)
+{
+       return __nfs_commit_inode(inode, how, NULL);
 }
 EXPORT_SYMBOL_GPL(nfs_commit_inode);
 
@@ -1919,11 +1922,11 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        int flags = FLUSH_SYNC;
        int ret = 0;
 
-       /* no commits means nothing needs to be done */
-       if (!atomic_long_read(&nfsi->commit_info.ncommit))
-               return ret;
-
        if (wbc->sync_mode == WB_SYNC_NONE) {
+               /* no commits means nothing needs to be done */
+               if (!atomic_long_read(&nfsi->commit_info.ncommit))
+                       goto check_requests_outstanding;
+
                /* Don't commit yet if this is a non-blocking flush and there
                 * are a lot of outstanding writes for this mapping.
                 */
@@ -1934,16 +1937,16 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
                flags = 0;
        }
 
-       ret = nfs_commit_inode(inode, flags);
-       if (ret >= 0) {
-               if (wbc->sync_mode == WB_SYNC_NONE) {
-                       if (ret < wbc->nr_to_write)
-                               wbc->nr_to_write -= ret;
-                       else
-                               wbc->nr_to_write = 0;
-               }
-               return 0;
-       }
+       ret = __nfs_commit_inode(inode, flags, wbc);
+       if (!ret) {
+               if (flags & FLUSH_SYNC)
+                       return 0;
+       } else if (atomic_long_read(&nfsi->commit_info.ncommit))
+               goto out_mark_dirty;
+
+check_requests_outstanding:
+       if (!atomic_read(&nfsi->commit_info.rpcs_out))
+               return ret;
 out_mark_dirty:
        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
        return ret;
index 5be08f02a76bcb7f405bce8169f53ebf34c452a2..8c743a405df69d007e235dc802aef3644cb90995 100644 (file)
@@ -118,6 +118,7 @@ static struct pernet_operations grace_net_ops = {
        .exit = grace_exit_net,
        .id   = &grace_net_id,
        .size = sizeof(struct list_head),
+       .async = true,
 };
 
 static int __init
index 150521c9671b9f5d163cb18b7b6135709c112738..61b770e398093ae6fcc13150c8ec6736df2bee21 100644 (file)
@@ -268,6 +268,35 @@ free_blocked_lock(struct nfsd4_blocked_lock *nbl)
        kfree(nbl);
 }
 
+static void
+remove_blocked_locks(struct nfs4_lockowner *lo)
+{
+       struct nfs4_client *clp = lo->lo_owner.so_client;
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+       struct nfsd4_blocked_lock *nbl;
+       LIST_HEAD(reaplist);
+
+       /* Dequeue all blocked locks */
+       spin_lock(&nn->blocked_locks_lock);
+       while (!list_empty(&lo->lo_blocked)) {
+               nbl = list_first_entry(&lo->lo_blocked,
+                                       struct nfsd4_blocked_lock,
+                                       nbl_list);
+               list_del_init(&nbl->nbl_list);
+               list_move(&nbl->nbl_lru, &reaplist);
+       }
+       spin_unlock(&nn->blocked_locks_lock);
+
+       /* Now free them */
+       while (!list_empty(&reaplist)) {
+               nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
+                                       nbl_lru);
+               list_del_init(&nbl->nbl_lru);
+               posix_unblock_lock(&nbl->nbl_lock);
+               free_blocked_lock(nbl);
+       }
+}
+
 static int
 nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
 {
@@ -1866,6 +1895,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
 static void
 __destroy_client(struct nfs4_client *clp)
 {
+       int i;
        struct nfs4_openowner *oo;
        struct nfs4_delegation *dp;
        struct list_head reaplist;
@@ -1895,6 +1925,16 @@ __destroy_client(struct nfs4_client *clp)
                nfs4_get_stateowner(&oo->oo_owner);
                release_openowner(oo);
        }
+       for (i = 0; i < OWNER_HASH_SIZE; i++) {
+               struct nfs4_stateowner *so, *tmp;
+
+               list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i],
+                                        so_strhash) {
+                       /* Should be no openowners at this point */
+                       WARN_ON_ONCE(so->so_is_open_owner);
+                       remove_blocked_locks(lockowner(so));
+               }
+       }
        nfsd4_return_all_client_layouts(clp);
        nfsd4_shutdown_callback(clp);
        if (clp->cl_cb_conn.cb_xprt)
@@ -6355,6 +6395,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
        }
        spin_unlock(&clp->cl_lock);
        free_ol_stateid_reaplist(&reaplist);
+       remove_blocked_locks(lo);
        nfs4_put_stateowner(&lo->lo_owner);
 
        return status;
@@ -7140,6 +7181,8 @@ nfs4_state_destroy_net(struct net *net)
                }
        }
 
+       WARN_ON(!list_empty(&nn->blocked_locks_lru));
+
        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                while (!list_empty(&nn->unconf_id_hashtbl[i])) {
                        clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
@@ -7206,7 +7249,6 @@ nfs4_state_shutdown_net(struct net *net)
        struct nfs4_delegation *dp = NULL;
        struct list_head *pos, *next, reaplist;
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-       struct nfsd4_blocked_lock *nbl;
 
        cancel_delayed_work_sync(&nn->laundromat_work);
        locks_end_grace(&nn->nfsd4_manager);
@@ -7227,24 +7269,6 @@ nfs4_state_shutdown_net(struct net *net)
                nfs4_put_stid(&dp->dl_stid);
        }
 
-       BUG_ON(!list_empty(&reaplist));
-       spin_lock(&nn->blocked_locks_lock);
-       while (!list_empty(&nn->blocked_locks_lru)) {
-               nbl = list_first_entry(&nn->blocked_locks_lru,
-                                       struct nfsd4_blocked_lock, nbl_lru);
-               list_move(&nbl->nbl_lru, &reaplist);
-               list_del_init(&nbl->nbl_list);
-       }
-       spin_unlock(&nn->blocked_locks_lock);
-
-       while (!list_empty(&reaplist)) {
-               nbl = list_first_entry(&reaplist,
-                                       struct nfsd4_blocked_lock, nbl_lru);
-               list_del_init(&nbl->nbl_lru);
-               posix_unblock_lock(&nbl->nbl_lock);
-               free_blocked_lock(nbl);
-       }
-
        nfsd4_client_tracking_exit(net);
        nfs4_state_destroy_net(net);
 }
index 406e72de88f6f893a4e81189e6b8e1a9751573ae..ce6ff5a0a6e4e8b75d4f588981d9c710432bb52b 100644 (file)
@@ -24,6 +24,8 @@ config OVERLAY_FS_REDIRECT_DIR
          an overlay which has redirects on a kernel that doesn't support this
          feature will have unexpected results.
 
+         If unsure, say N.
+
 config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
        bool "Overlayfs: follow redirects even if redirects are turned off"
        default y
@@ -32,8 +34,13 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
          Disable this to get a possibly more secure configuration, but that
          might not be backward compatible with previous kernels.
 
+         If backward compatibility is not an issue, then it is safe and
+         recommended to say N here.
+
          For more information, see Documentation/filesystems/overlayfs.txt
 
+         If unsure, say Y.
+
 config OVERLAY_FS_INDEX
        bool "Overlayfs: turn on inodes index feature by default"
        depends on OVERLAY_FS
@@ -51,6 +58,8 @@ config OVERLAY_FS_INDEX
          That is, mounting an overlay which has an inodes index on a kernel
          that doesn't support this feature will have unexpected results.
 
+         If unsure, say N.
+
 config OVERLAY_FS_NFS_EXPORT
        bool "Overlayfs: turn on NFS export feature by default"
        depends on OVERLAY_FS
@@ -72,3 +81,8 @@ config OVERLAY_FS_NFS_EXPORT
          Note, that the NFS export feature is not backward compatible.
          That is, mounting an overlay which has a full index on a kernel
          that doesn't support this feature will have unexpected results.
+
+         Most users should say N here and enable this feature on a case-by-
+         case basis with the "nfs_export=on" mount option.
+
+         Say N unless you fully understand the consequences.
index bb94ce9da5c8723b9f35a9195f5e8926c9e83b91..87bd4148f4fb5811547fa2b44a7965af7f22e52a 100644 (file)
 #include <linux/ratelimit.h>
 #include "overlayfs.h"
 
+static int ovl_encode_maybe_copy_up(struct dentry *dentry)
+{
+       int err;
+
+       if (ovl_dentry_upper(dentry))
+               return 0;
+
+       err = ovl_want_write(dentry);
+       if (!err) {
+               err = ovl_copy_up(dentry);
+               ovl_drop_write(dentry);
+       }
+
+       if (err) {
+               pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n",
+                                   dentry, err);
+       }
+
+       return err;
+}
+
+/*
+ * Before encoding a non-upper directory file handle from real layer N, we need
+ * to check if it will be possible to reconnect an overlay dentry from the real
+ * lower decoded dentry. This is done by following the overlay ancestry up to a
+ * "layer N connected" ancestor and verifying that all parents along the way are
+ * "layer N connectable". If an ancestor that is NOT "layer N connectable" is
+ * found, we need to copy up an ancestor, which is "layer N connectable", thus
+ * making that ancestor "layer N connected". For example:
+ *
+ * layer 1: /a
+ * layer 2: /a/b/c
+ *
+ * The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is
+ * copied up and renamed, upper dir /a will be indexed by lower dir /a from
+ * layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*)
+ * in ovl_lookup_real_ancestor() will not be able to lookup a connected overlay
+ * dentry from the connected lower dentry /a/b/c.
+ *
+ * To avoid this problem on decode time, we need to copy up an ancestor of
+ * /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is
+ * /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected"
+ * and when the time comes to decode the file handle from lower dentry /a/b/c,
+ * ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding
+ * a connected overlay dentry will be accomplished.
+ *
+ * (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an
+ * entry /a in the lower layers above layer N and find the indexed dir /a from
+ * layer 1. If that improvement is made, then the check for "layer N connected"
+ * will need to verify there are no redirects in lower layers above N. In the
+ * example above, /a will be "layer 2 connectable". However, if layer 2 dir /a
+ * is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable":
+ *
+ * layer 1: /A (redirect = /a)
+ * layer 2: /a/b/c
+ */
+
+/* Return the lowest layer for encoding a connectable file handle */
+static int ovl_connectable_layer(struct dentry *dentry)
+{
+       struct ovl_entry *oe = OVL_E(dentry);
+
+       /* We can get overlay root from root of any layer */
+       if (dentry == dentry->d_sb->s_root)
+               return oe->numlower;
+
+       /*
+        * If it's an unindexed merge dir, then it's not connectable with any
+        * lower layer
+        */
+       if (ovl_dentry_upper(dentry) &&
+           !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+               return 0;
+
+       /* We can get upper/overlay path from indexed/lower dentry */
+       return oe->lowerstack[0].layer->idx;
+}
+
+/*
+ * @dentry is "connected" if all ancestors up to root or a "connected" ancestor
+ * have the same uppermost lower layer as the origin's layer. We may need to
+ * copy up a "connectable" ancestor to make it "connected". A "connected" dentry
+ * cannot become non "connected", so cache positive result in dentry flags.
+ *
+ * Return the connected origin layer or < 0 on error.
+ */
+static int ovl_connect_layer(struct dentry *dentry)
+{
+       struct dentry *next, *parent = NULL;
+       int origin_layer;
+       int err = 0;
+
+       if (WARN_ON(dentry == dentry->d_sb->s_root) ||
+           WARN_ON(!ovl_dentry_lower(dentry)))
+               return -EIO;
+
+       origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx;
+       if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry))
+               return origin_layer;
+
+       /* Find the topmost origin layer connectable ancestor of @dentry */
+       next = dget(dentry);
+       for (;;) {
+               parent = dget_parent(next);
+               if (WARN_ON(parent == next)) {
+                       err = -EIO;
+                       break;
+               }
+
+               /*
+                * If @parent is not origin layer connectable, then copy up
+                * @next which is origin layer connectable and we are done.
+                */
+               if (ovl_connectable_layer(parent) < origin_layer) {
+                       err = ovl_encode_maybe_copy_up(next);
+                       break;
+               }
+
+               /* If @parent is connected or indexed we are done */
+               if (ovl_dentry_test_flag(OVL_E_CONNECTED, parent) ||
+                   ovl_test_flag(OVL_INDEX, d_inode(parent)))
+                       break;
+
+               dput(next);
+               next = parent;
+       }
+
+       dput(parent);
+       dput(next);
+
+       if (!err)
+               ovl_dentry_set_flag(OVL_E_CONNECTED, dentry);
+
+       return err ?: origin_layer;
+}
+
 /*
  * We only need to encode origin if there is a chance that the same object was
  * encoded pre copy up and then we need to stay consistent with the same
  * L = lower file handle
  *
  * (*) Connecting an overlay dir from real lower dentry is not always
- * possible when there are redirects in lower layers. To mitigate this case,
- * we copy up the lower dir first and then encode an upper dir file handle.
+ * possible when there are redirects in lower layers and non-indexed merge dirs.
+ * To mitigate those case, we may copy up the lower dir ancestor before encode
+ * a lower dir file handle.
+ *
+ * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
  */
-static bool ovl_should_encode_origin(struct dentry *dentry)
+static int ovl_check_encode_origin(struct dentry *dentry)
 {
        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 
+       /* Upper file handle for pure upper */
        if (!ovl_dentry_lower(dentry))
-               return false;
+               return 0;
 
        /*
-        * Decoding a merge dir, whose origin's parent is under a redirected
-        * lower dir is not always possible. As a simple aproximation, we do
-        * not encode lower dir file handles when overlay has multiple lower
-        * layers and origin is below the topmost lower layer.
+        * Upper file handle for non-indexed upper.
         *
-        * TODO: copy up only the parent that is under redirected lower.
+        * Root is never indexed, so if there's an upper layer, encode upper for
+        * root.
         */
-       if (d_is_dir(dentry) && ofs->upper_mnt &&
-           OVL_E(dentry)->lowerstack[0].layer->idx > 1)
-               return false;
-
-       /* Decoding a non-indexed upper from origin is not implemented */
        if (ovl_dentry_upper(dentry) &&
            !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
-               return false;
-
-       return true;
-}
-
-static int ovl_encode_maybe_copy_up(struct dentry *dentry)
-{
-       int err;
-
-       if (ovl_dentry_upper(dentry))
                return 0;
 
-       err = ovl_want_write(dentry);
-       if (err)
-               return err;
-
-       err = ovl_copy_up(dentry);
+       /*
+        * Decoding a merge dir, whose origin's ancestor is under a redirected
+        * lower dir or under a non-indexed upper is not always possible.
+        * ovl_connect_layer() will try to make origin's layer "connected" by
+        * copying up a "connectable" ancestor.
+        */
+       if (d_is_dir(dentry) && ofs->upper_mnt)
+               return ovl_connect_layer(dentry);
 
-       ovl_drop_write(dentry);
-       return err;
+       /* Lower file handle for indexed and non-upper dir/non-dir */
+       return 1;
 }
 
 static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
 {
-       struct dentry *origin = ovl_dentry_lower(dentry);
        struct ovl_fh *fh = NULL;
-       int err;
+       int err, enc_lower;
 
        /*
-        * If we should not encode a lower dir file handle, copy up and encode
-        * an upper dir file handle.
+        * Check if we should encode a lower or upper file handle and maybe
+        * copy up an ancestor to make lower file handle connectable.
         */
-       if (!ovl_should_encode_origin(dentry)) {
-               err = ovl_encode_maybe_copy_up(dentry);
-               if (err)
-                       goto fail;
-
-               origin = NULL;
-       }
+       err = enc_lower = ovl_check_encode_origin(dentry);
+       if (enc_lower < 0)
+               goto fail;
 
-       /* Encode an upper or origin file handle */
-       fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin);
+       /* Encode an upper or lower file handle */
+       fh = ovl_encode_fh(enc_lower ? ovl_dentry_lower(dentry) :
+                                      ovl_dentry_upper(dentry), !enc_lower);
        err = PTR_ERR(fh);
        if (IS_ERR(fh))
                goto fail;
@@ -355,8 +477,8 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
                dput(upper);
        }
 
-       if (!this)
-               return NULL;
+       if (IS_ERR_OR_NULL(this))
+               return this;
 
        if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
                dput(this);
@@ -498,7 +620,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb,
                        if (err == -ECHILD) {
                                this = ovl_lookup_real_ancestor(sb, real,
                                                                layer);
-                               err = IS_ERR(this) ? PTR_ERR(this) : 0;
+                               err = PTR_ERR_OR_ZERO(this);
                        }
                        if (!err) {
                                dput(connected);
index fcd97b783fa1ffa82dc7ec2eb4b82f03a0be50ef..3b1bd469accdfe767afc276def9f319f57ebde68 100644 (file)
@@ -669,38 +669,59 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
        return inode;
 }
 
+/*
+ * Does overlay inode need to be hashed by lower inode?
+ */
+static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
+                            struct dentry *lower, struct dentry *index)
+{
+       struct ovl_fs *ofs = sb->s_fs_info;
+
+       /* No, if pure upper */
+       if (!lower)
+               return false;
+
+       /* Yes, if already indexed */
+       if (index)
+               return true;
+
+       /* Yes, if won't be copied up */
+       if (!ofs->upper_mnt)
+               return true;
+
+       /* No, if lower hardlink is or will be broken on copy up */
+       if ((upper || !ovl_indexdir(sb)) &&
+           !d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+               return false;
+
+       /* No, if non-indexed upper with NFS export */
+       if (sb->s_export_op && upper)
+               return false;
+
+       /* Otherwise, hash by lower inode for fsnotify */
+       return true;
+}
+
 struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
                            struct dentry *lowerdentry, struct dentry *index,
                            unsigned int numlower)
 {
-       struct ovl_fs *ofs = sb->s_fs_info;
        struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
        struct inode *inode;
-       /* Already indexed or could be indexed on copy up? */
-       bool indexed = (index || (ovl_indexdir(sb) && !upperdentry));
-       struct dentry *origin = indexed ? lowerdentry : NULL;
+       bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index);
        bool is_dir;
 
-       if (WARN_ON(upperdentry && indexed && !lowerdentry))
-               return ERR_PTR(-EIO);
-
        if (!realinode)
                realinode = d_inode(lowerdentry);
 
        /*
-        * Copy up origin (lower) may exist for non-indexed non-dir upper, but
-        * we must not use lower as hash key in that case.
-        * Hash non-dir that is or could be indexed by origin inode.
-        * Hash dir that is or could be merged by origin inode.
-        * Hash pure upper and non-indexed non-dir by upper inode.
-        * Hash non-indexed dir by upper inode for NFS export.
+        * Copy up origin (lower) may exist for non-indexed upper, but we must
+        * not use lower as hash key if this is a broken hardlink.
         */
        is_dir = S_ISDIR(realinode->i_mode);
-       if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt))
-               origin = lowerdentry;
-
-       if (upperdentry || origin) {
-               struct inode *key = d_inode(origin ?: upperdentry);
+       if (upperdentry || bylower) {
+               struct inode *key = d_inode(bylower ? lowerdentry :
+                                                     upperdentry);
                unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
 
                inode = iget5_locked(sb, (unsigned long) key,
@@ -728,6 +749,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
                        nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
                set_nlink(inode, nlink);
        } else {
+               /* Lower hardlink that will be broken on copy up */
                inode = new_inode(sb);
                if (!inode)
                        goto out_nomem;
index de3e6da1d5a51732f4e0ad3fbad3feb4b6664841..70fcfcc684cc0a07566aeacb7fe564f2156bc0d2 100644 (file)
@@ -913,9 +913,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                stack[ctr].layer = lower.layer;
                ctr++;
 
-               if (d.stop)
-                       break;
-
                /*
                 * Following redirects can have security consequences: it's like
                 * a symlink into the lower layer without the permission checks.
@@ -933,6 +930,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                        goto out_put;
                }
 
+               if (d.stop)
+                       break;
+
                if (d.redirect && d.redirect[0] == '/' && poe != roe) {
                        poe = roe;
                        /* Find the current layer on the root dentry */
index 0df25a9c94bd777f41f83ad278586732e3104126..225ff11711474fe80d9d4151f6267aaab587f034 100644 (file)
@@ -40,6 +40,7 @@ enum ovl_inode_flag {
 enum ovl_entry_flag {
        OVL_E_UPPER_ALIAS,
        OVL_E_OPAQUE,
+       OVL_E_CONNECTED,
 };
 
 /*
index 9ee37c76091d685275dcc39945b29186900c2669..7c24619ae7fc5229a5d3af6c9298b51064498325 100644 (file)
@@ -1359,6 +1359,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
        /* Root is always merge -> can have whiteouts */
        ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
+       ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
        ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
                       ovl_dentry_lower(root_dentry));
 
index 8664db25a9a6f013922e889aa110927d4e3096fa..215c225b2ca17e462da85528026d07cdf8647ce2 100644 (file)
@@ -106,6 +106,7 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
 {
        return sysfs_do_create_link(kobj, target, name, 0);
 }
+EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn);
 
 /**
  *     sysfs_delete_link - remove symlink in object's directory.
index fd975524f4603387e28078d13b175b5df2443021..05c66e05ae20f0bbe08c713de9feddf13c813982 100644 (file)
@@ -767,7 +767,7 @@ int
 xfs_scrub_agfl(
        struct xfs_scrub_context        *sc)
 {
-       struct xfs_scrub_agfl_info      sai = { 0 };
+       struct xfs_scrub_agfl_info      sai;
        struct xfs_agf                  *agf;
        xfs_agnumber_t                  agno;
        unsigned int                    agflcount;
@@ -795,6 +795,7 @@ xfs_scrub_agfl(
                xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
                goto out;
        }
+       memset(&sai, 0, sizeof(sai));
        sai.sz_entries = agflcount;
        sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS);
        if (!sai.entries) {
index 66e1edbfb2b2bcd7d278226a33f0531bb0043b97..046469fcc1b8a66075806080ca175277e2716dcd 100644 (file)
@@ -955,15 +955,29 @@ static inline bool imap_needs_alloc(struct inode *inode,
                (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
 }
 
+static inline bool needs_cow_for_zeroing(struct xfs_bmbt_irec *imap, int nimaps)
+{
+       return nimaps &&
+               imap->br_startblock != HOLESTARTBLOCK &&
+               imap->br_state != XFS_EXT_UNWRITTEN;
+}
+
 static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
 {
        /*
-        * COW writes will allocate delalloc space, so we need to make sure
-        * to take the lock exclusively here.
+        * COW writes may allocate delalloc space or convert unwritten COW
+        * extents, so we need to make sure to take the lock exclusively here.
         */
        if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO)))
                return true;
-       if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE))
+
+       /*
+        * Extents not yet cached requires exclusive access, don't block.
+        * This is an opencoded xfs_ilock_data_map_shared() to cater for the
+        * non-blocking behaviour.
+        */
+       if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+           !(ip->i_df.if_flags & XFS_IFEXTENTS))
                return true;
        return false;
 }
@@ -993,16 +1007,18 @@ xfs_file_iomap_begin(
                return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
        }
 
-       if (need_excl_ilock(ip, flags)) {
+       if (need_excl_ilock(ip, flags))
                lockmode = XFS_ILOCK_EXCL;
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-       } else {
-               lockmode = xfs_ilock_data_map_shared(ip);
-       }
+       else
+               lockmode = XFS_ILOCK_SHARED;
 
-       if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
-               error = -EAGAIN;
-               goto out_unlock;
+       if (flags & IOMAP_NOWAIT) {
+               if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
+                       return -EAGAIN;
+               if (!xfs_ilock_nowait(ip, lockmode))
+                       return -EAGAIN;
+       } else {
+               xfs_ilock(ip, lockmode);
        }
 
        ASSERT(offset <= mp->m_super->s_maxbytes);
@@ -1024,7 +1040,9 @@ xfs_file_iomap_begin(
                        goto out_unlock;
        }
 
-       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+       if (xfs_is_reflink_inode(ip) &&
+           ((flags & IOMAP_WRITE) ||
+            ((flags & IOMAP_ZERO) && needs_cow_for_zeroing(&imap, nimaps)))) {
                if (flags & IOMAP_DIRECT) {
                        /*
                         * A reflinked inode will result in CoW alloc.
index 3a55d6fc271b1e6d50aa6ce96c9e84b7c5886e43..7a39f40645f7dddbd41740bce4404dbf36fd635b 100644 (file)
@@ -23,6 +23,7 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
+#include "xfs_shared.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_trans.h"
@@ -456,10 +457,12 @@ xfs_cui_recover(
         * transaction.  Normally, any work that needs to be deferred
         * gets attached to the same defer_ops that scheduled the
         * refcount update.  However, we're in log recovery here, so we
-        * we create our own defer_ops and use that to finish up any
-        * work that doesn't fit.
+        * we use the passed in defer_ops and to finish up any work that
+        * doesn't fit.  We need to reserve enough blocks to handle a
+        * full btree split on either end of the refcount range.
         */
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+                       mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
        if (error)
                return error;
        cudp = xfs_trans_get_cud(tp, cuip);
index f3b139c9aa1674a3f652ec622286e9aba616d5c3..49d3124863a81f719efd1774b0b6ac7f651b0a26 100644 (file)
@@ -23,6 +23,7 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
+#include "xfs_shared.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_trans.h"
@@ -470,7 +471,8 @@ xfs_rui_recover(
                }
        }
 
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+                       mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp);
        if (error)
                return error;
        rudp = xfs_trans_get_rud(tp, ruip);
index 7aba628dc5279858a7608a78839b0e32753bbee6..93588ea3d3d2ca2d664edb4e1b853f5d0dad6a73 100644 (file)
@@ -250,6 +250,7 @@ xfs_parseargs(
                                return -EINVAL;
                        break;
                case Opt_logdev:
+                       kfree(mp->m_logname);
                        mp->m_logname = match_strdup(args);
                        if (!mp->m_logname)
                                return -ENOMEM;
@@ -258,6 +259,7 @@ xfs_parseargs(
                        xfs_warn(mp, "%s option not allowed on this system", p);
                        return -EINVAL;
                case Opt_rtdev:
+                       kfree(mp->m_rtname);
                        mp->m_rtname = match_strdup(args);
                        if (!mp->m_rtname)
                                return -ENOMEM;
index 2cfa3075d148b60196c2d63ede83ea4e182ab1fe..bfbb44a5ad38965f00c82b19135b3d81ab20142d 100644 (file)
@@ -983,6 +983,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 int pud_clear_huge(pud_t *pud);
 int pmd_clear_huge(pmd_t *pmd);
+int pud_free_pmd_page(pud_t *pud);
+int pmd_free_pte_page(pmd_t *pmd);
 #else  /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
 static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
 {
@@ -1008,6 +1010,14 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 {
        return 0;
 }
+static inline int pud_free_pmd_page(pud_t *pud)
+{
+       return 0;
+}
+static inline int pmd_free_pte_page(pmd_t *pmd)
+{
+       return 0;
+}
 #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
 
 #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
index d32b688eb346c9252eb66d73be49fdd477e290de..d23dcdd1bd95a20ba98e4ae65b57d6f225cad7c3 100644 (file)
@@ -56,6 +56,7 @@ struct drm_printer;
 #define DRIVER_ATOMIC                  0x10000
 #define DRIVER_KMS_LEGACY_CONTEXT      0x20000
 #define DRIVER_SYNCOBJ                  0x40000
+#define DRIVER_PREFER_XBGR_30BPP        0x80000
 
 /**
  * struct drm_driver - DRM driver structure
index cdbd142ca7f2ea4513a7bcf44f877f9b3bbbebc3..02924ae2527e6580a553814d30fab930af071a19 100644 (file)
@@ -360,6 +360,7 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
+void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
 
 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 
index d0eb659fa733eb91b57a135932f45b1eea8d9975..ce547a25e8aed5d61efd5c0ce9b5c450af4fabcc 100644 (file)
@@ -511,6 +511,7 @@ void zero_fill_bio(struct bio *bio);
 extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
 extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
+extern const char *bio_devname(struct bio *bio, char *buffer);
 
 #define bio_set_dev(bio, bdev)                         \
 do {                                           \
@@ -529,9 +530,6 @@ do {                                                \
 #define bio_dev(bio) \
        disk_devt((bio)->bi_disk)
 
-#define bio_devname(bio, buf) \
-       __bdevname(bio_dev(bio), (buf))
-
 #ifdef CONFIG_BLK_CGROUP
 int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 void bio_disassociate_task(struct bio *bio);
index a7f16e0f8d68b94bc5743a75c41b538794725724..8a4566691c8f7009bc02d31c039c123b4b2fcac3 100644 (file)
@@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)                                     \
 ({                                                                            \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled && sk) {                                        \
+       if (cgroup_bpf_enabled) {                                              \
                __ret = __cgroup_bpf_run_filter_sk(sk,                         \
                                                 BPF_CGROUP_INET_SOCK_CREATE); \
        }                                                                      \
index 66df387106de4bcb62547b9698cd8872269e7f37..819229c80ecaed8343cf12f3ec663af7ba031353 100644 (file)
@@ -21,6 +21,7 @@ struct bpf_verifier_env;
 struct perf_event;
 struct bpf_prog;
 struct bpf_map;
+struct sock;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
index 19b8349a38094cbfcb52ff3ade1e8893018bb0c0..5e2e8a49fb21fc16dbffae29987f75e55385f725 100644 (file)
@@ -13,6 +13,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
index 9f242b876fde2c29b1d3c023a3934ef01d04cfa9..f8e76d01a5ade411c231c926e55e59a71dde6cc5 100644 (file)
@@ -755,13 +755,13 @@ struct sock_cgroup_data {
  * updaters and return part of the previous pointer as the prioidx or
  * classid.  Such races are short-lived and the result isn't critical.
  */
-static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd)
+static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
 {
        /* fallback to 1 which is always the ID of the root cgroup */
        return (skcd->is_data & 1) ? skcd->prioidx : 1;
 }
 
-static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd)
+static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
 {
        /* fallback to 0 which is the unconfigured default classid */
        return (skcd->is_data & 1) ? skcd->classid : 0;
index 8a9643857c4a13be60726a18bfe51ccfb1557c46..16c3027074a22f6aea08e0157a5848f824e08afd 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/if.h>
 #include <linux/fs.h>
 #include <linux/aio_abi.h>     /* for aio_context_t */
+#include <linux/uaccess.h>
 #include <linux/unistd.h>
 
 #include <asm/compat.h>
@@ -229,13 +230,13 @@ typedef struct compat_siginfo {
                                short int _addr_lsb;    /* Valid LSB of the reported address. */
                                /* used when si_code=SEGV_BNDERR */
                                struct {
-                                       short _dummy_bnd;
+                                       compat_uptr_t _dummy_bnd;
                                        compat_uptr_t _lower;
                                        compat_uptr_t _upper;
                                } _addr_bnd;
                                /* used when si_code=SEGV_PKUERR */
                                struct {
-                                       short _dummy_pkey;
+                                       compat_uptr_t _dummy_pkey;
                                        u32 _pkey;
                                } _addr_pkey;
                        };
@@ -550,8 +551,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
 asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
 
 extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat);
-extern int put_compat_sigset(compat_sigset_t __user *compat,
-                            const sigset_t *set, unsigned int size);
+
+/*
+ * Defined inline such that size can be compile time constant, which avoids
+ * CONFIG_HARDENED_USERCOPY complaining about copies from task_struct
+ */
+static inline int
+put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
+                 unsigned int size)
+{
+       /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */
+#ifdef __BIG_ENDIAN
+       compat_sigset_t v;
+       switch (_NSIG_WORDS) {
+       case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
+       case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2];
+       case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1];
+       case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0];
+       }
+       return copy_to_user(compat, &v, size) ? -EFAULT : 0;
+#else
+       return copy_to_user(compat, set, size) ? -EFAULT : 0;
+#endif
+}
 
 asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
                compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
index d02a4df3f4737a2ae98cce7928c3dcf4c4167164..d3f264a5b04d9c999a94273c18272416576b17c1 100644 (file)
@@ -27,3 +27,8 @@
 #if __has_feature(address_sanitizer)
 #define __SANITIZE_ADDRESS__
 #endif
+
+/* Clang doesn't have a way to turn it off per-function, yet. */
+#ifdef __noretpoline
+#undef __noretpoline
+#endif
index 901c1ccb3374aaeba42ab3cda3ed4c3e46d75085..e2c7f4369effdbcf9cb46b1904c3cbe84debd2ca 100644 (file)
 #define __weak         __attribute__((weak))
 #define __alias(symbol)        __attribute__((alias(#symbol)))
 
+#ifdef RETPOLINE
+#define __noretpoline __attribute__((indirect_branch("keep")))
+#endif
+
 /*
  * it doesn't make sense on ARM (currently the only user of __naked)
  * to trace naked functions because then mcount is called without
index 2ec41a7eb54f54bf9ed104f69294bccbd8cd6a30..ebe41811ed34164a9a7000fee9a3a45f55d2480c 100644 (file)
@@ -371,6 +371,11 @@ struct ethtool_ops {
                            u8 *hfunc);
        int     (*set_rxfh)(struct net_device *, const u32 *indir,
                            const u8 *key, const u8 hfunc);
+       int     (*get_rxfh_context)(struct net_device *, u32 *indir, u8 *key,
+                                   u8 *hfunc, u32 rss_context);
+       int     (*set_rxfh_context)(struct net_device *, const u32 *indir,
+                                   const u8 *key, const u8 hfunc,
+                                   u32 *rss_context, bool delete);
        void    (*get_channels)(struct net_device *, struct ethtool_channels *);
        int     (*set_channels)(struct net_device *, struct ethtool_channels *);
        int     (*get_dump_flag)(struct net_device *, struct ethtool_dump *);
index 276932d759750dea5d0865308f589fcb7c61b191..109d05ccea9a4f2cc6de63871f473378088c4a8f 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
 
-#include <net/xdp.h>
 #include <net/sch_generic.h>
 
 #include <uapi/linux/filter.h>
@@ -30,6 +29,7 @@ struct sk_buff;
 struct sock;
 struct seccomp_data;
 struct bpf_prog_aux;
+struct xdp_rxq_info;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -507,6 +507,22 @@ struct xdp_buff {
        struct xdp_rxq_info *rxq;
 };
 
+struct sk_msg_buff {
+       void *data;
+       void *data_end;
+       __u32 apply_bytes;
+       __u32 cork_bytes;
+       int sg_copybreak;
+       int sg_start;
+       int sg_curr;
+       int sg_end;
+       struct scatterlist sg_data[MAX_SKB_FRAGS];
+       bool sg_copy[MAX_SKB_FRAGS];
+       __u32 key;
+       __u32 flags;
+       struct bpf_map *map;
+};
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
@@ -771,6 +787,7 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
 void bpf_warn_invalid_xdp_action(u32 act);
 
 struct sock *do_sk_redirect_map(struct sk_buff *skb);
+struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
index 2a815560fda0e162c5c27da8249b9f3921328c21..c6baf767619ed3fdd819308031554a54bc60e630 100644 (file)
@@ -1317,6 +1317,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_I_CGROUPWB  0x00000001      /* cgroup-aware writeback enabled */
 #define SB_I_NOEXEC    0x00000002      /* Ignore executables on this fs */
 #define SB_I_NODEV     0x00000004      /* Ignore devices on this fs */
+#define SB_I_MULTIROOT 0x00000008      /* Multiple roots to the dentry tree */
 
 /* sb->s_iflags to limit user namespace mounts */
 #define SB_I_USERNS_VISIBLE            0x00000010 /* fstype already mounted */
@@ -3198,7 +3199,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma)
        if (!vma_is_dax(vma))
                return false;
        inode = file_inode(vma->vm_file);
-       if (inode->i_mode == S_IFCHR)
+       if (S_ISCHR(inode->i_mode))
                return false; /* device-dax */
        return true;
 }
index 5e3531027b5113cc7a9fde0ec0ffc0cdf3389b97..c826b0b5232aff63877bb441fb62e43128b4e976 100644 (file)
@@ -198,6 +198,7 @@ struct gendisk {
        void *private_data;
 
        int flags;
+       struct rw_semaphore lookup_sem;
        struct kobject *slave_dir;
 
        struct timer_rand_state *random;
@@ -600,8 +601,9 @@ extern void delete_partition(struct gendisk *, int);
 extern void printk_all_partitions(void);
 
 extern struct gendisk *__alloc_disk_node(int minors, int node_id);
-extern struct kobject *get_disk(struct gendisk *disk);
+extern struct kobject *get_disk_and_module(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
+extern void put_disk_and_module(struct gendisk *disk);
 extern void blk_register_region(dev_t devt, unsigned long range,
                        struct module *module,
                        struct kobject *(*probe)(dev_t, int *, void *),
index c5b0a75a78121935e3ab2874c03a1b9bc123f1cf..fd00170b494f7efee6f4a58ef19a4da10321564d 100644 (file)
@@ -25,6 +25,7 @@ struct ptr_ring *tun_get_tx_ring(struct file *file);
 bool tun_is_xdp_buff(void *ptr);
 void *tun_xdp_to_ptr(void *ptr);
 void *tun_ptr_to_xdp(void *ptr);
+void tun_ptr_free(void *ptr);
 #else
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -50,5 +51,8 @@ static inline void *tun_ptr_to_xdp(void *ptr)
 {
        return NULL;
 }
+static inline void tun_ptr_free(void *ptr)
+{
+}
 #endif /* CONFIG_TUN */
 #endif /* __IF_TUN_H */
index 5e6a2d4dc366033772027940b5c4b742e17d0173..c4a1cff9c76861b3eef7b1eea97d8591532bf798 100644 (file)
@@ -300,30 +300,34 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features,
 }
 
 /**
- * __vlan_insert_tag - regular VLAN tag inserting
+ * __vlan_insert_inner_tag - inner VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_proto: VLAN encapsulation protocol
  * @vlan_tci: VLAN TCI to insert
+ * @mac_len: MAC header length including outer vlan headers
  *
- * Inserts the VLAN tag into @skb as part of the payload
+ * Inserts the VLAN tag into @skb as part of the payload at offset mac_len
  * Returns error if skb_cow_head failes.
  *
  * Does not change skb->protocol so this function can be used during receive.
  */
-static inline int __vlan_insert_tag(struct sk_buff *skb,
-                                   __be16 vlan_proto, u16 vlan_tci)
+static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
+                                         __be16 vlan_proto, u16 vlan_tci,
+                                         unsigned int mac_len)
 {
        struct vlan_ethhdr *veth;
 
        if (skb_cow_head(skb, VLAN_HLEN) < 0)
                return -ENOMEM;
 
-       veth = skb_push(skb, VLAN_HLEN);
+       skb_push(skb, VLAN_HLEN);
 
-       /* Move the mac addresses to the beginning of the new header. */
-       memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
+       /* Move the mac header sans proto to the beginning of the new header. */
+       memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN);
        skb->mac_header -= VLAN_HLEN;
 
+       veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN);
+
        /* first, the ethernet type */
        veth->h_vlan_proto = vlan_proto;
 
@@ -334,12 +338,30 @@ static inline int __vlan_insert_tag(struct sk_buff *skb,
 }
 
 /**
- * vlan_insert_tag - regular VLAN tag inserting
+ * __vlan_insert_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_proto: VLAN encapsulation protocol
  * @vlan_tci: VLAN TCI to insert
  *
  * Inserts the VLAN tag into @skb as part of the payload
+ * Returns error if skb_cow_head failes.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
+ */
+static inline int __vlan_insert_tag(struct sk_buff *skb,
+                                   __be16 vlan_proto, u16 vlan_tci)
+{
+       return __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN);
+}
+
+/**
+ * vlan_insert_inner_tag - inner VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_proto: VLAN encapsulation protocol
+ * @vlan_tci: VLAN TCI to insert
+ * @mac_len: MAC header length including outer vlan headers
+ *
+ * Inserts the VLAN tag into @skb as part of the payload at offset mac_len
  * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
  *
  * Following the skb_unshare() example, in case of error, the calling function
@@ -347,12 +369,14 @@ static inline int __vlan_insert_tag(struct sk_buff *skb,
  *
  * Does not change skb->protocol so this function can be used during receive.
  */
-static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
-                                             __be16 vlan_proto, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb,
+                                                   __be16 vlan_proto,
+                                                   u16 vlan_tci,
+                                                   unsigned int mac_len)
 {
        int err;
 
-       err = __vlan_insert_tag(skb, vlan_proto, vlan_tci);
+       err = __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, mac_len);
        if (err) {
                dev_kfree_skb_any(skb);
                return NULL;
@@ -360,6 +384,26 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
        return skb;
 }
 
+/**
+ * vlan_insert_tag - regular VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_proto: VLAN encapsulation protocol
+ * @vlan_tci: VLAN TCI to insert
+ *
+ * Inserts the VLAN tag into @skb as part of the payload
+ * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
+ */
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
+                                             __be16 vlan_proto, u16 vlan_tci)
+{
+       return vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN);
+}
+
 /**
  * vlan_insert_tag_set_proto - regular VLAN tag inserting
  * @skb: skbuff to tag
index 506a9815113159651da4b98ec5d72700dbe6271f..bc27cf03c41ea5a4b19d87bb82da0f3d36e9390e 100644 (file)
@@ -6,10 +6,10 @@
 #include <linux/types.h>
 
 /* Built-in __init functions needn't be compiled with retpoline */
-#if defined(RETPOLINE) && !defined(MODULE)
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#if defined(__noretpoline) && !defined(MODULE)
+#define __noinitretpoline __noretpoline
 #else
-#define __noretpoline
+#define __noinitretpoline
 #endif
 
 /* These macros are used to mark some functions or 
@@ -47,7 +47,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init         __section(.init.text) __cold  __latent_entropy __noretpoline
+#define __init         __section(.init.text) __cold  __latent_entropy __noinitretpoline
 #define __initdata     __section(.init.data)
 #define __initconst    __section(.init.rodata)
 #define __exitdata     __section(.exit.data)
index c00c4c33e432e0bd7e6a1ddf2775e5a45b24fa4f..b26eccc78fb1d708c7dc1ed1506770826a5214d0 100644 (file)
 
 #define ICH_HCR_EN                     (1 << 0)
 #define ICH_HCR_UIE                    (1 << 1)
+#define ICH_HCR_NPIE                   (1 << 3)
 #define ICH_HCR_TC                     (1 << 10)
 #define ICH_HCR_TALL0                  (1 << 11)
 #define ICH_HCR_TALL1                  (1 << 12)
index d3453ee072fc8aa859544e07598398884239d56f..68d8b1f73682be899af097adf1d95f6452c8a0c8 100644 (file)
@@ -84,6 +84,7 @@
 
 #define GICH_HCR_EN                    (1 << 0)
 #define GICH_HCR_UIE                   (1 << 1)
+#define GICH_HCR_NPIE                  (1 << 3)
 
 #define GICH_LR_VIRTUALID              (0x3ff << 0)
 #define GICH_LR_PHYSID_CPUID_SHIFT     (10)
index b6a29c126cc49285f62995b59dc517f17cdc3af6..2168cc6b8b301dffb8a8333fc699a516c6ab51c7 100644 (file)
@@ -151,6 +151,7 @@ extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
 extern void jump_label_init(void);
+extern void jump_label_invalidate_init(void);
 extern void jump_label_lock(void);
 extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
@@ -198,6 +199,8 @@ static __always_inline void jump_label_init(void)
        static_key_initialized = true;
 }
 
+static inline void jump_label_invalidate_init(void) {}
+
 static __always_inline bool static_key_false(struct static_key *key)
 {
        if (unlikely(static_key_count(key) > 0))
index ce51455e2adf631229d21b43e6afb76b2496790c..3fd291503576f1407e633851bec2b785152e6576 100644 (file)
@@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option);
 extern char *next_arg(char *args, char **param, char **val);
 
 extern int core_kernel_text(unsigned long addr);
+extern int init_kernel_text(unsigned long addr);
 extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
index ac0062b74aed048923c9f08b4a9d74d176ff6c96..6930c63126c78a9ef665b5b5653a60a8773b4d4c 100644 (file)
@@ -1105,7 +1105,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
 }
 #endif
-void kvm_arch_irq_routing_update(struct kvm *kvm);
 
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
@@ -1114,6 +1113,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
+void kvm_arch_irq_routing_update(struct kvm *kvm);
+
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
        /*
@@ -1272,4 +1273,7 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
 
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+               unsigned long start, unsigned long end);
+
 #endif
index 8be5077efb5fb246042d2f126348906b04e61c37..f92ea77836526666b1c410423a3c15a50e1eab85 100644 (file)
@@ -187,7 +187,6 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
                            unsigned long  *end_pfn);
 void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
                          unsigned long *out_end_pfn, int *out_nid);
-unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
 
 /**
  * for_each_mem_pfn_range - early memory pfn range iterator
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
new file mode 100644 (file)
index 0000000..70e7e56
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_H__
+#define __MLX5_ACCEL_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_accel_esp_aes_gcm_keymat_iv_algo {
+       MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ,
+};
+
+enum mlx5_accel_esp_flags {
+       MLX5_ACCEL_ESP_FLAGS_TUNNEL            = 0,    /* Default */
+       MLX5_ACCEL_ESP_FLAGS_TRANSPORT         = 1UL << 0,
+       MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED     = 1UL << 1,
+       MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2,
+};
+
+enum mlx5_accel_esp_action {
+       MLX5_ACCEL_ESP_ACTION_DECRYPT,
+       MLX5_ACCEL_ESP_ACTION_ENCRYPT,
+};
+
+enum mlx5_accel_esp_keymats {
+       MLX5_ACCEL_ESP_KEYMAT_AES_NONE,
+       MLX5_ACCEL_ESP_KEYMAT_AES_GCM,
+};
+
+enum mlx5_accel_esp_replay {
+       MLX5_ACCEL_ESP_REPLAY_NONE,
+       MLX5_ACCEL_ESP_REPLAY_BMP,
+};
+
+struct aes_gcm_keymat {
+       u64   seq_iv;
+       enum mlx5_accel_esp_aes_gcm_keymat_iv_algo iv_algo;
+
+       u32   salt;
+       u32   icv_len;
+
+       u32   key_len;
+       u32   aes_key[256 / 32];
+};
+
+struct mlx5_accel_esp_xfrm_attrs {
+       enum mlx5_accel_esp_action action;
+       u32   esn;
+       u32   spi;
+       u32   seq;
+       u32   tfc_pad;
+       u32   flags;
+       u32   sa_handle;
+       enum mlx5_accel_esp_replay replay_type;
+       union {
+               struct {
+                       u32 size;
+
+               } bmp;
+       } replay;
+       enum mlx5_accel_esp_keymats keymat_type;
+       union {
+               struct aes_gcm_keymat aes_gcm;
+       } keymat;
+};
+
+struct mlx5_accel_esp_xfrm {
+       struct mlx5_core_dev  *mdev;
+       struct mlx5_accel_esp_xfrm_attrs attrs;
+};
+
+enum {
+       MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0,
+};
+
+enum mlx5_accel_ipsec_cap {
+       MLX5_ACCEL_IPSEC_CAP_DEVICE             = 1 << 0,
+       MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA  = 1 << 1,
+       MLX5_ACCEL_IPSEC_CAP_ESP                = 1 << 2,
+       MLX5_ACCEL_IPSEC_CAP_IPV6               = 1 << 3,
+       MLX5_ACCEL_IPSEC_CAP_LSO                = 1 << 4,
+       MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER      = 1 << 5,
+       MLX5_ACCEL_IPSEC_CAP_ESN                = 1 << 6,
+       MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN       = 1 << 7,
+};
+
+#ifdef CONFIG_MLX5_ACCEL
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+                          const struct mlx5_accel_esp_xfrm_attrs *attrs,
+                          u32 flags);
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+                              const struct mlx5_accel_esp_xfrm_attrs *attrs);
+
+#else
+
+static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; }
+
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+                          const struct mlx5_accel_esp_xfrm_attrs *attrs,
+                          u32 flags) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {}
+static inline int
+mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+                          const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; }
+
+#endif
+#endif
index bfea26af6de53c9169b48e058ce16cc80a7d72a4..cded85ab6fe4556b2279df83084da174cce1c3d0 100644 (file)
@@ -462,8 +462,8 @@ struct mlx5_core_srq {
        struct mlx5_core_rsc_common     common; /* must be first */
        u32             srqn;
        int             max;
-       int             max_gs;
-       int             max_avail_gather;
+       size_t          max_gs;
+       size_t          max_avail_gather;
        int             wqe_shift;
        void (*event)   (struct mlx5_core_srq *, enum mlx5_event);
 
@@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
        return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
 }
 
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+       (MLX5_CAP_GEN(mdev, vport_group_manager) && \
+        (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+        mlx5_core_is_pf(mdev))
+
 static inline int mlx5_get_gid_table_len(u16 param)
 {
        if (param > 4) {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
new file mode 100644 (file)
index 0000000..d3c9db4
--- /dev/null
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _MLX5_ESWITCH_
+#define _MLX5_ESWITCH_
+
+#include <linux/mlx5/driver.h>
+
+enum {
+       SRIOV_NONE,
+       SRIOV_LEGACY,
+       SRIOV_OFFLOADS
+};
+
+enum {
+       REP_ETH,
+       REP_IB,
+       NUM_REP_TYPES,
+};
+
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
+       int                    (*load)(struct mlx5_core_dev *dev,
+                                      struct mlx5_eswitch_rep *rep);
+       void                   (*unload)(struct mlx5_eswitch_rep *rep);
+       void                   *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+       void                    *priv;
+       bool                   valid;
+};
+
+struct mlx5_eswitch_rep {
+       struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+       u16                    vport;
+       u8                     hw_id[ETH_ALEN];
+       u16                    vlan;
+       u32                    vlan_refcount;
+};
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+                                    int vport_index,
+                                    struct mlx5_eswitch_rep_if *rep_if,
+                                    u8 rep_type);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+                                      int vport_index,
+                                      u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+                                int vport,
+                                u8 rep_type);
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+                                               int vport);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
+                                   int vport, u32 sqn);
+#endif
index a0b48afcb4222aa548b62d8d4e8d87f605273bf8..b957e52434f8f7ceb78d7b8aecafc5ad2d563476 100644 (file)
@@ -40,6 +40,8 @@
 
 enum {
        MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO  = 1 << 16,
+       MLX5_FLOW_CONTEXT_ACTION_ENCRYPT        = 1 << 17,
+       MLX5_FLOW_CONTEXT_ACTION_DECRYPT        = 1 << 18,
 };
 
 enum {
@@ -69,6 +71,7 @@ enum mlx5_flow_namespace_type {
        MLX5_FLOW_NAMESPACE_ESW_INGRESS,
        MLX5_FLOW_NAMESPACE_SNIFFER_RX,
        MLX5_FLOW_NAMESPACE_SNIFFER_TX,
+       MLX5_FLOW_NAMESPACE_EGRESS,
 };
 
 struct mlx5_flow_table;
@@ -141,9 +144,11 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
 
 struct mlx5_flow_act {
        u32 action;
+       bool has_flow_tag;
        u32 flow_tag;
        u32 encap_id;
        u32 modify_id;
+       uintptr_t esp_id;
 };
 
 #define MLX5_DECLARE_FLOW_ACT(name) \
diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h
new file mode 100644 (file)
index 0000000..7b476bb
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_HELPERS_
+#define _MLX5_FS_HELPERS_
+
+#include <linux/mlx5/mlx5_ifc.h>
+
+#define MLX5_FS_IPV4_VERSION 4
+#define MLX5_FS_IPV6_VERSION 6
+
+static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c,
+                                                 const u32 *match_v, u8 match)
+{
+       const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+                                            outer_headers);
+       const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+                                            outer_headers);
+
+       return MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_protocol) == 0xff &&
+               MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol) == match;
+}
+
+static inline bool mlx5_fs_is_outer_tcp_flow(const u32 *match_c,
+                                            const u32 *match_v)
+{
+       return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_TCP);
+}
+
+static inline bool mlx5_fs_is_outer_udp_flow(const u32 *match_c,
+                                            const u32 *match_v)
+{
+       return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_UDP);
+}
+
+static inline bool mlx5_fs_is_vxlan_flow(const u32 *match_c)
+{
+       void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+                                          misc_parameters);
+
+       return MLX5_GET(fte_match_set_misc, misc_params_c, vxlan_vni);
+}
+
+static inline bool _mlx5_fs_is_outer_ipv_flow(struct mlx5_core_dev *mdev,
+                                             const u32 *match_c,
+                                             const u32 *match_v, int version)
+{
+       int match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                                 ft_field_support.outer_ip_version);
+       const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+                                            outer_headers);
+       const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+                                            outer_headers);
+
+       if (!match_ipv) {
+               u16 ethertype;
+
+               switch (version) {
+               case MLX5_FS_IPV4_VERSION:
+                       ethertype = ETH_P_IP;
+                       break;
+               case MLX5_FS_IPV6_VERSION:
+                       ethertype = ETH_P_IPV6;
+                       break;
+               default:
+                       return false;
+               }
+
+               return MLX5_GET(fte_match_set_lyr_2_4, headers_c,
+                               ethertype) == 0xffff &&
+                       MLX5_GET(fte_match_set_lyr_2_4, headers_v,
+                                ethertype) == ethertype;
+       }
+
+       return MLX5_GET(fte_match_set_lyr_2_4, headers_c,
+                       ip_version) == 0xf &&
+               MLX5_GET(fte_match_set_lyr_2_4, headers_v,
+                        ip_version) == version;
+}
+
+static inline bool
+mlx5_fs_is_outer_ipv4_flow(struct mlx5_core_dev *mdev, const u32 *match_c,
+                          const u32 *match_v)
+{
+       return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v,
+                                         MLX5_FS_IPV4_VERSION);
+}
+
+static inline bool
+mlx5_fs_is_outer_ipv6_flow(struct mlx5_core_dev *mdev, const u32 *match_c,
+                          const u32 *match_v)
+{
+       return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v,
+                                         MLX5_FS_IPV6_VERSION);
+}
+
+static inline bool mlx5_fs_is_outer_ipsec_flow(const u32 *match_c)
+{
+       void *misc_params_c =
+                       MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+
+       return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+}
+
+#endif
index f4e417686f620cb2926461631d313d4aef868a54..14ad84afe8babec960d406ce646a03f3cfc7dc9e 100644 (file)
@@ -295,7 +295,9 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         inner_tcp_dport[0x1];
        u8         inner_tcp_flags[0x1];
        u8         reserved_at_37[0x9];
-       u8         reserved_at_40[0x1a];
+       u8         reserved_at_40[0x17];
+       u8         outer_esp_spi[0x1];
+       u8         reserved_at_58[0x2];
        u8         bth_dst_qp[0x1];
 
        u8         reserved_at_5b[0x25];
@@ -437,7 +439,9 @@ struct mlx5_ifc_fte_match_set_misc_bits {
 
        u8         reserved_at_120[0x28];
        u8         bth_dst_qp[0x18];
-       u8         reserved_at_160[0xa0];
+       u8         reserved_at_160[0x20];
+       u8         outer_esp_spi[0x20];
+       u8         reserved_at_1a0[0x60];
 };
 
 struct mlx5_ifc_cmd_pas_bits {
@@ -1091,6 +1095,7 @@ enum mlx5_flow_destination_type {
        MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE   = 0x1,
        MLX5_FLOW_DESTINATION_TYPE_TIR          = 0x2,
 
+       MLX5_FLOW_DESTINATION_TYPE_PORT         = 0x99,
        MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
 };
 
index 255a88d08078e8a33b5163d3d5747f7b6185bd78..ec052491ba3d7a38cb84ca0109d8820eb9b07bdb 100644 (file)
@@ -373,7 +373,10 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
 struct mlx5_ifc_ipsec_extended_cap_bits {
        u8         encapsulation[0x20];
 
-       u8         reserved_0[0x15];
+       u8         reserved_0[0x12];
+       u8         v2_command[0x1];
+       u8         udp_encap[0x1];
+       u8         rx_no_trailer[0x1];
        u8         ipv4_fragment[0x1];
        u8         ipv6[0x1];
        u8         esn[0x1];
@@ -429,4 +432,91 @@ struct mlx5_ifc_ipsec_counters_bits {
        u8         dropped_cmd[0x40];
 };
 
+enum mlx5_ifc_fpga_ipsec_response_syndrome {
+       MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0,
+       MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
+       MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2,
+       MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+};
+
+struct mlx5_ifc_fpga_ipsec_cmd_resp {
+       __be32 syndrome;
+       union {
+               __be32 sw_sa_handle;
+               __be32 flags;
+       };
+       u8 reserved[24];
+} __packed;
+
+enum mlx5_ifc_fpga_ipsec_cmd_opcode {
+       MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0,
+       MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1,
+       MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2,
+       MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3,
+       MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4,
+       MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5,
+};
+
+enum mlx5_ifc_fpga_ipsec_cap {
+       MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0),
+};
+
+struct mlx5_ifc_fpga_ipsec_cmd_cap {
+       __be32 cmd;
+       __be32 flags;
+       u8 reserved[24];
+} __packed;
+
+enum mlx5_ifc_fpga_ipsec_sa_flags {
+       MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0),
+       MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1),
+       MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2),
+       MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3),
+       MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4),
+       MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5),
+       MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6),
+       MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7),
+};
+
+enum mlx5_ifc_fpga_ipsec_sa_enc_mode {
+       MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0,
+       MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1,
+       MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3,
+};
+
+struct mlx5_ifc_fpga_ipsec_sa_v1 {
+       __be32 cmd;
+       u8 key_enc[32];
+       u8 key_auth[32];
+       __be32 sip[4];
+       __be32 dip[4];
+       union {
+               struct {
+                       __be32 reserved;
+                       u8 salt_iv[8];
+                       __be32 salt;
+               } __packed gcm;
+               struct {
+                       u8 salt[16];
+               } __packed cbc;
+       };
+       __be32 spi;
+       __be32 sw_sa_handle;
+       __be16 tfclen;
+       u8 enc_mode;
+       u8 reserved1[2];
+       u8 flags;
+       u8 reserved2[2];
+};
+
+struct mlx5_ifc_fpga_ipsec_sa {
+       struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1;
+       __be16 udp_sp;
+       __be16 udp_dp;
+       u8 reserved1[4];
+       __be32 esn;
+       __be16 vid;     /* only 12 bits, rest is reserved */
+       __be16 reserved2;
+} __packed;
+
 #endif /* MLX5_IFC_FPGA_H */
index 5396521a776ac48a8673f9528426a87215e36dc4..7ed82e4f11b3536bdb9b4113cbb46a6ecdde84b1 100644 (file)
@@ -4,11 +4,10 @@
 
 #include <linux/in.h>
 #include <linux/pim.h>
-#include <linux/rhashtable.h>
-#include <net/sock.h>
 #include <net/fib_rules.h>
 #include <net/fib_notifier.h>
 #include <uapi/linux/mroute.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IP_MROUTE
 static inline int ip_mroute_opt(int opt)
@@ -56,18 +55,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
 }
 #endif
 
-struct vif_device {
-       struct net_device       *dev;                   /* Device we are using */
-       struct netdev_phys_item_id dev_parent_id;       /* Device parent ID    */
-       unsigned long   bytes_in,bytes_out;
-       unsigned long   pkt_in,pkt_out;         /* Statistics                   */
-       unsigned long   rate_limit;             /* Traffic shaping (NI)         */
-       unsigned char   threshold;              /* TTL threshold                */
-       unsigned short  flags;                  /* Control flags                */
-       __be32          local,remote;           /* Addresses(remote for tunnels)*/
-       int             link;                   /* Physical interface index     */
-};
-
 struct vif_entry_notifier_info {
        struct fib_notifier_info info;
        struct net_device *dev;
@@ -78,34 +65,6 @@ struct vif_entry_notifier_info {
 
 #define VIFF_STATIC 0x8000
 
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-
-struct mr_table {
-       struct list_head        list;
-       possible_net_t          net;
-       u32                     id;
-       struct sock __rcu       *mroute_sk;
-       struct timer_list       ipmr_expire_timer;
-       struct list_head        mfc_unres_queue;
-       struct vif_device       vif_table[MAXVIFS];
-       struct rhltable         mfc_hash;
-       struct list_head        mfc_cache_list;
-       int                     maxvif;
-       atomic_t                cache_resolve_queue_len;
-       bool                    mroute_do_assert;
-       bool                    mroute_do_pim;
-       int                     mroute_reg_vif_num;
-};
-
-/* mfc_flags:
- * MFC_STATIC - the entry was added statically (not by a routing daemon)
- * MFC_OFFLOAD - the entry was offloaded to the hardware
- */
-enum {
-       MFC_STATIC = BIT(0),
-       MFC_OFFLOAD = BIT(1),
-};
-
 struct mfc_cache_cmp_arg {
        __be32 mfc_mcastgrp;
        __be32 mfc_origin;
@@ -113,28 +72,13 @@ struct mfc_cache_cmp_arg {
 
 /**
  * struct mfc_cache - multicast routing entries
- * @mnode: rhashtable list
+ * @_c: Common multicast routing information; has to be first [for casting]
  * @mfc_mcastgrp: destination multicast group address
  * @mfc_origin: source address
  * @cmparg: used for rhashtable comparisons
- * @mfc_parent: source interface (iif)
- * @mfc_flags: entry flags
- * @expires: unresolved entry expire time
- * @unresolved: unresolved cached skbs
- * @last_assert: time of last assert
- * @minvif: minimum VIF id
- * @maxvif: maximum VIF id
- * @bytes: bytes that have passed for this entry
- * @pkt: packets that have passed for this entry
- * @wrong_if: number of wrong source interface hits
- * @lastuse: time of last use of the group (traffic or update)
- * @ttls: OIF TTL threshold array
- * @refcount: reference count for this entry
- * @list: global entry list
- * @rcu: used for entry destruction
  */
 struct mfc_cache {
-       struct rhlist_head mnode;
+       struct mr_mfc _c;
        union {
                struct {
                        __be32 mfc_mcastgrp;
@@ -142,28 +86,6 @@ struct mfc_cache {
                };
                struct mfc_cache_cmp_arg cmparg;
        };
-       vifi_t mfc_parent;
-       int mfc_flags;
-
-       union {
-               struct {
-                       unsigned long expires;
-                       struct sk_buff_head unresolved;
-               } unres;
-               struct {
-                       unsigned long last_assert;
-                       int minvif;
-                       int maxvif;
-                       unsigned long bytes;
-                       unsigned long pkt;
-                       unsigned long wrong_if;
-                       unsigned long lastuse;
-                       unsigned char ttls[MAXVIFS];
-                       refcount_t refcount;
-               } res;
-       } mfc_un;
-       struct list_head list;
-       struct rcu_head rcu;
 };
 
 struct mfc_entry_notifier_info {
@@ -187,12 +109,12 @@ static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
 
 static inline void ipmr_cache_put(struct mfc_cache *c)
 {
-       if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+       if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount))
                ipmr_cache_free(c);
 }
 static inline void ipmr_cache_hold(struct mfc_cache *c)
 {
-       refcount_inc(&c->mfc_un.res.refcount);
+       refcount_inc(&c->_c.mfc_un.res.refcount);
 }
 
 #endif
index 3014c52bfd86e6f971ec9e569732d34bf8702282..1ac38e6819f5408cafe44b5f925586ad9c587ad3 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/skbuff.h>      /* for struct sk_buff_head */
 #include <net/net_namespace.h>
 #include <uapi/linux/mroute6.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
@@ -62,57 +63,24 @@ static inline void ip6_mr_cleanup(void)
 }
 #endif
 
-struct mif_device {
-       struct net_device       *dev;                   /* Device we are using */
-       unsigned long   bytes_in,bytes_out;
-       unsigned long   pkt_in,pkt_out;         /* Statistics                   */
-       unsigned long   rate_limit;             /* Traffic shaping (NI)         */
-       unsigned char   threshold;              /* TTL threshold                */
-       unsigned short  flags;                  /* Control flags                */
-       int             link;                   /* Physical interface index     */
-};
-
 #define VIFF_STATIC 0x8000
 
-struct mfc6_cache {
-       struct list_head list;
-       struct in6_addr mf6c_mcastgrp;                  /* Group the entry belongs to   */
-       struct in6_addr mf6c_origin;                    /* Source of packet             */
-       mifi_t mf6c_parent;                     /* Source interface             */
-       int mfc_flags;                          /* Flags on line                */
+struct mfc6_cache_cmp_arg {
+       struct in6_addr mf6c_mcastgrp;
+       struct in6_addr mf6c_origin;
+};
 
+struct mfc6_cache {
+       struct mr_mfc _c;
        union {
                struct {
-                       unsigned long expires;
-                       struct sk_buff_head unresolved; /* Unresolved buffers           */
-               } unres;
-               struct {
-                       unsigned long last_assert;
-                       int minvif;
-                       int maxvif;
-                       unsigned long bytes;
-                       unsigned long pkt;
-                       unsigned long wrong_if;
-                       unsigned long lastuse;
-                       unsigned char ttls[MAXMIFS];    /* TTL thresholds               */
-               } res;
-       } mfc_un;
+                       struct in6_addr mf6c_mcastgrp;
+                       struct in6_addr mf6c_origin;
+               };
+               struct mfc6_cache_cmp_arg cmparg;
+       };
 };
 
-#define MFC_STATIC             1
-#define MFC_NOTIFY             2
-
-#define MFC6_LINES             64
-
-#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
-                         (__force u32)(a)->s6_addr32[1] ^ \
-                         (__force u32)(a)->s6_addr32[2] ^ \
-                         (__force u32)(a)->s6_addr32[3] ^ \
-                         (__force u32)(g)->s6_addr32[0] ^ \
-                         (__force u32)(g)->s6_addr32[1] ^ \
-                         (__force u32)(g)->s6_addr32[2] ^ \
-                         (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)
-
 #define MFC_ASSERT_THRESH (3*HZ)               /* Maximal freq. of asserts */
 
 struct rtmsg;
@@ -120,12 +88,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
                           struct rtmsg *rtm, u32 portid);
 
 #ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
-static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-       return NULL;
+       return false;
 }
 static inline int ip6mr_sk_done(struct sock *sk)
 {
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
new file mode 100644 (file)
index 0000000..c2560cb
--- /dev/null
@@ -0,0 +1,346 @@
+#ifndef __LINUX_MROUTE_BASE_H
+#define __LINUX_MROUTE_BASE_H
+
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/spinlock.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/**
+ * struct vif_device - interface representor for multicast routing
+ * @dev: network device being used
+ * @bytes_in: statistic; bytes ingressing
+ * @bytes_out: statistic; bytes egresing
+ * @pkt_in: statistic; packets ingressing
+ * @pkt_out: statistic; packets egressing
+ * @rate_limit: Traffic shaping (NI)
+ * @threshold: TTL threshold
+ * @flags: Control flags
+ * @link: Physical interface index
+ * @dev_parent_id: device parent id
+ * @local: Local address
+ * @remote: Remote address for tunnels
+ */
+struct vif_device {
+       struct net_device *dev;
+       unsigned long bytes_in, bytes_out;
+       unsigned long pkt_in, pkt_out;
+       unsigned long rate_limit;
+       unsigned char threshold;
+       unsigned short flags;
+       int link;
+
+       /* Currently only used by ipmr */
+       struct netdev_phys_item_id dev_parent_id;
+       __be32 local, remote;
+};
+
+#ifndef MAXVIFS
+/* This one is nasty; value is defined in uapi using different symbols for
+ * mroute and morute6 but both map into same 32.
+ */
+#define MAXVIFS        32
+#endif
+
+#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+
+/* mfc_flags:
+ * MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
+ */
+enum {
+       MFC_STATIC = BIT(0),
+       MFC_OFFLOAD = BIT(1),
+};
+
+/**
+ * struct mr_mfc - common multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ */
+struct mr_mfc {
+       struct rhlist_head mnode;
+       unsigned short mfc_parent;
+       int mfc_flags;
+
+       union {
+               struct {
+                       unsigned long expires;
+                       struct sk_buff_head unresolved;
+               } unres;
+               struct {
+                       unsigned long last_assert;
+                       int minvif;
+                       int maxvif;
+                       unsigned long bytes;
+                       unsigned long pkt;
+                       unsigned long wrong_if;
+                       unsigned long lastuse;
+                       unsigned char ttls[MAXVIFS];
+                       refcount_t refcount;
+               } res;
+       } mfc_un;
+       struct list_head list;
+       struct rcu_head rcu;
+};
+
+struct mr_table;
+
+/**
+ * struct mr_table_ops - callbacks and info for protocol-specific ops
+ * @rht_params: parameters for accessing the MFC hash
+ * @cmparg_any: a hash key to be used for matching on (*,*) routes
+ */
+struct mr_table_ops {
+       const struct rhashtable_params *rht_params;
+       void *cmparg_any;
+};
+
+/**
+ * struct mr_table - a multicast routing table
+ * @list: entry within a list of multicast routing tables
+ * @net: net where this table belongs
+ * @ops: protocol specific operations
+ * @id: identifier of the table
+ * @mroute_sk: socket associated with the table
+ * @ipmr_expire_timer: timer for handling unresolved routes
+ * @mfc_unres_queue: list of unresolved MFC entries
+ * @vif_table: array containing all possible vifs
+ * @mfc_hash: Hash table of all resolved routes for easy lookup
+ * @mfc_cache_list: list of resovled routes for possible traversal
+ * @maxvif: Identifier of highest value vif currently in use
+ * @cache_resolve_queue_len: current size of unresolved queue
+ * @mroute_do_assert: Whether to inform userspace on wrong ingress
+ * @mroute_do_pim: Whether to receive IGMP PIMv1
+ * @mroute_reg_vif_num: PIM-device vif index
+ */
+struct mr_table {
+       struct list_head        list;
+       possible_net_t          net;
+       struct mr_table_ops     ops;
+       u32                     id;
+       struct sock __rcu       *mroute_sk;
+       struct timer_list       ipmr_expire_timer;
+       struct list_head        mfc_unres_queue;
+       struct vif_device       vif_table[MAXVIFS];
+       struct rhltable         mfc_hash;
+       struct list_head        mfc_cache_list;
+       int                     maxvif;
+       atomic_t                cache_resolve_queue_len;
+       bool                    mroute_do_assert;
+       bool                    mroute_do_pim;
+       int                     mroute_reg_vif_num;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void vif_device_init(struct vif_device *v,
+                    struct net_device *dev,
+                    unsigned long rate_limit,
+                    unsigned char threshold,
+                    unsigned short flags,
+                    unsigned short get_iflink_mask);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+              struct mr_table_ops *ops,
+              void (*expire_func)(struct timer_list *t),
+              void (*table_set)(struct mr_table *mrt,
+                                struct net *net));
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_find_parent(struct mr_table *mrt,
+                        void *hasharg, int parent);
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                  struct mr_mfc *c, struct rtmsg *rtm);
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+                    struct mr_table *(*iter)(struct net *net,
+                                             struct mr_table *mrt),
+                    int (*fill)(struct mr_table *mrt,
+                                struct sk_buff *skb,
+                                u32 portid, u32 seq, struct mr_mfc *c,
+                                int cmd, int flags),
+                    spinlock_t *lock);
+#else
+static inline void vif_device_init(struct vif_device *v,
+                                  struct net_device *dev,
+                                  unsigned long rate_limit,
+                                  unsigned char threshold,
+                                  unsigned short flags,
+                                  unsigned short get_iflink_mask)
+{
+}
+
+static inline void *
+mr_table_alloc(struct net *net, u32 id,
+              struct mr_table_ops *ops,
+              void (*expire_func)(struct timer_list *t),
+              void (*table_set)(struct mr_table *mrt,
+                                struct net *net))
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_find_parent(struct mr_table *mrt,
+                                      void *hasharg, int parent)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_find_any_parent(struct mr_table *mrt,
+                                          int vifi)
+{
+       return NULL;
+}
+
+static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
+                                            int vifi, void *hasharg)
+{
+       return NULL;
+}
+
+static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                                struct mr_mfc *c, struct rtmsg *rtm)
+{
+       return -EINVAL;
+}
+
+static inline int
+mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+                struct mr_table *(*iter)(struct net *net,
+                                         struct mr_table *mrt),
+                int (*fill)(struct mr_table *mrt,
+                            struct sk_buff *skb,
+                            u32 portid, u32 seq, struct mr_mfc *c,
+                            int cmd, int flags),
+                spinlock_t *lock)
+{
+       return -EINVAL;
+}
+#endif
+
+static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
+{
+       return mr_mfc_find_parent(mrt, hasharg, -1);
+}
+
+#ifdef CONFIG_PROC_FS
+struct mr_vif_iter {
+       struct seq_net_private p;
+       struct mr_table *mrt;
+       int ct;
+};
+
+struct mr_mfc_iter {
+       struct seq_net_private p;
+       struct mr_table *mrt;
+       struct list_head *cache;
+
+       /* Lock protecting the mr_table's unresolved queue */
+       spinlock_t *lock;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos);
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       return *pos ? mr_vif_seq_idx(seq_file_net(seq),
+                                    seq->private, *pos - 1)
+                   : SEQ_START_TOKEN;
+}
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_seq_idx(struct net *net,
+                    struct mr_mfc_iter *it, loff_t pos);
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+                     loff_t *pos);
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+                                    struct mr_table *mrt, spinlock_t *lock)
+{
+       struct mr_mfc_iter *it = seq->private;
+
+       it->mrt = mrt;
+       it->cache = NULL;
+       it->lock = lock;
+
+       return *pos ? mr_mfc_seq_idx(seq_file_net(seq),
+                                    seq->private, *pos - 1)
+                   : SEQ_START_TOKEN;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+       struct mr_mfc_iter *it = seq->private;
+       struct mr_table *mrt = it->mrt;
+
+       if (it->cache == &mrt->mfc_unres_queue)
+               spin_unlock_bh(it->lock);
+       else if (it->cache == &mrt->mfc_cache_list)
+               rcu_read_unlock();
+}
+#else
+static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter,
+                                  loff_t pos)
+{
+       return NULL;
+}
+
+static inline void *mr_vif_seq_next(struct seq_file *seq,
+                                   void *v, loff_t *pos)
+{
+       return NULL;
+}
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_seq_idx(struct net *net,
+                                  struct mr_mfc_iter *it, loff_t pos)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+                                   loff_t *pos)
+{
+       return NULL;
+}
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+                                    struct mr_table *mrt, spinlock_t *lock)
+{
+       return NULL;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+}
+#endif
+#endif
+#endif
index f25c13423bd4774d3d602d2ec7afb561a39614d5..cb3bbed4e6339cfd89586c15a1d65b1d7af61106 100644 (file)
@@ -66,6 +66,11 @@ struct mutex {
 #endif
 };
 
+/*
+ * Internal helper function; C doesn't allow us to hide it :/
+ *
+ * DO NOT USE (outside of mutex code).
+ */
 static inline struct task_struct *__mutex_owner(struct mutex *lock)
 {
        return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
index 000d1aada74fea597be27d3fe68d9268b837dfe0..2248a052061d8aeb0ae08d233f181f09cba6384b 100644 (file)
@@ -222,6 +222,7 @@ enum {
 int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
 int sock_register(const struct net_proto_family *fam);
 void sock_unregister(int family);
+bool sock_is_registered(int family);
 int __sock_create(struct net *net, int family, int type, int proto,
                  struct socket **res, int kern);
 int sock_create(int family, int type, int proto, struct socket **res);
index dbe6344b727ac60cdc6bf85a38632a1a5a6f890d..913b1cc882cf0d4d18058673a11f86509b5b98d9 100644 (file)
@@ -585,6 +585,15 @@ struct netdev_queue {
 #endif
 } ____cacheline_aligned_in_smp;
 
+extern int sysctl_fb_tunnels_only_for_init_net;
+
+static inline bool net_has_fallback_tunnels(const struct net *net)
+{
+       return net == &init_net ||
+              !IS_ENABLED(CONFIG_SYSCTL) ||
+              !sysctl_fb_tunnels_only_for_init_net;
+}
+
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
 {
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
@@ -1381,8 +1390,6 @@ struct net_device_ops {
  * @IFF_MACVLAN: Macvlan device
  * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account
  *     underlying stacked devices
- * @IFF_IPVLAN_MASTER: IPvlan master device
- * @IFF_IPVLAN_SLAVE: IPvlan slave device
  * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
@@ -1392,6 +1399,7 @@ struct net_device_ops {
  * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
  *     entity (i.e. the master device for bridged veth)
  * @IFF_MACSEC: device is a MACsec device
+ * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
  */
 enum netdev_priv_flags {
        IFF_802_1Q_VLAN                 = 1<<0,
@@ -1412,16 +1420,15 @@ enum netdev_priv_flags {
        IFF_LIVE_ADDR_CHANGE            = 1<<15,
        IFF_MACVLAN                     = 1<<16,
        IFF_XMIT_DST_RELEASE_PERM       = 1<<17,
-       IFF_IPVLAN_MASTER               = 1<<18,
-       IFF_IPVLAN_SLAVE                = 1<<19,
-       IFF_L3MDEV_MASTER               = 1<<20,
-       IFF_NO_QUEUE                    = 1<<21,
-       IFF_OPENVSWITCH                 = 1<<22,
-       IFF_L3MDEV_SLAVE                = 1<<23,
-       IFF_TEAM                        = 1<<24,
-       IFF_RXFH_CONFIGURED             = 1<<25,
-       IFF_PHONY_HEADROOM              = 1<<26,
-       IFF_MACSEC                      = 1<<27,
+       IFF_L3MDEV_MASTER               = 1<<18,
+       IFF_NO_QUEUE                    = 1<<19,
+       IFF_OPENVSWITCH                 = 1<<20,
+       IFF_L3MDEV_SLAVE                = 1<<21,
+       IFF_TEAM                        = 1<<22,
+       IFF_RXFH_CONFIGURED             = 1<<23,
+       IFF_PHONY_HEADROOM              = 1<<24,
+       IFF_MACSEC                      = 1<<25,
+       IFF_NO_RX_HANDLER               = 1<<26,
 };
 
 #define IFF_802_1Q_VLAN                        IFF_802_1Q_VLAN
@@ -1442,8 +1449,6 @@ enum netdev_priv_flags {
 #define IFF_LIVE_ADDR_CHANGE           IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN                    IFF_MACVLAN
 #define IFF_XMIT_DST_RELEASE_PERM      IFF_XMIT_DST_RELEASE_PERM
-#define IFF_IPVLAN_MASTER              IFF_IPVLAN_MASTER
-#define IFF_IPVLAN_SLAVE               IFF_IPVLAN_SLAVE
 #define IFF_L3MDEV_MASTER              IFF_L3MDEV_MASTER
 #define IFF_NO_QUEUE                   IFF_NO_QUEUE
 #define IFF_OPENVSWITCH                        IFF_OPENVSWITCH
@@ -1451,6 +1456,7 @@ enum netdev_priv_flags {
 #define IFF_TEAM                       IFF_TEAM
 #define IFF_RXFH_CONFIGURED            IFF_RXFH_CONFIGURED
 #define IFF_MACSEC                     IFF_MACSEC
+#define IFF_NO_RX_HANDLER              IFF_NO_RX_HANDLER
 
 /**
  *     struct net_device - The DEVICE structure.
@@ -4223,16 +4229,6 @@ static inline bool netif_is_macvlan_port(const struct net_device *dev)
        return dev->priv_flags & IFF_MACVLAN_PORT;
 }
 
-static inline bool netif_is_ipvlan(const struct net_device *dev)
-{
-       return dev->priv_flags & IFF_IPVLAN_SLAVE;
-}
-
-static inline bool netif_is_ipvlan_port(const struct net_device *dev)
-{
-       return dev->priv_flags & IFF_IPVLAN_MASTER;
-}
-
 static inline bool netif_is_bond_master(const struct net_device *dev)
 {
        return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING;
index 1313b35c3ab7914a26c463aa9113756dd4e4ceae..14529511c4b8466ab81986ee9d874538bbe0e09a 100644 (file)
@@ -285,6 +285,8 @@ unsigned int *xt_alloc_entry_offsets(unsigned int size);
 bool xt_find_jump_offset(const unsigned int *offsets,
                         unsigned int target, unsigned int size);
 
+int xt_check_proc_name(const char *name, unsigned int size);
+
 int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
                   bool inv_proto);
 int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
index fbc98e2c8228d0b8ca17e4af9be1cb1cddaccd27..e791ebc65c9c0776325cdc5e72de5d995038d7e0 100644 (file)
@@ -5,6 +5,7 @@
 
 #ifndef _LINUX_NOSPEC_H
 #define _LINUX_NOSPEC_H
+#include <asm/barrier.h>
 
 /**
  * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
@@ -29,26 +30,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 }
 #endif
 
-/*
- * Warn developers about inappropriate array_index_nospec() usage.
- *
- * Even if the CPU speculates past the WARN_ONCE branch, the
- * sign bit of @index is taken into account when generating the
- * mask.
- *
- * This warning is compiled out when the compiler can infer that
- * @index and @size are less than LONG_MAX.
- */
-#define array_index_mask_nospec_check(index, size)                             \
-({                                                                             \
-       if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,                      \
-           "array_index_nospec() limited to range of [0, LONG_MAX]\n"))        \
-               _mask = 0;                                                      \
-       else                                                                    \
-               _mask = array_index_mask_nospec(index, size);                   \
-       _mask;                                                                  \
-})
-
 /*
  * array_index_nospec - sanitize an array index after a bounds check
  *
@@ -67,12 +48,11 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 ({                                                                     \
        typeof(index) _i = (index);                                     \
        typeof(size) _s = (size);                                       \
-       unsigned long _mask = array_index_mask_nospec_check(_i, _s);    \
+       unsigned long _mask = array_index_mask_nospec(_i, _s);          \
                                                                        \
        BUILD_BUG_ON(sizeof(_i) > sizeof(long));                        \
        BUILD_BUG_ON(sizeof(_s) > sizeof(long));                        \
                                                                        \
-       _i &= _mask;                                                    \
-       _i;                                                             \
+       (typeof(_i)) (_i & _mask);                                      \
 })
 #endif /* _LINUX_NOSPEC_H */
index 88865e0ebf4dd551f8c41bd9fbc0f78d7899b552..091033a6b836fd78bd4a22dfdb622eb4b39c32b8 100644 (file)
@@ -13,7 +13,6 @@ struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
                                             unsigned int devfn);
 int of_pci_get_devfn(struct device_node *np);
-int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
 int of_pci_get_max_link_speed(struct device_node *node);
@@ -33,12 +32,6 @@ static inline int of_pci_get_devfn(struct device_node *np)
        return -EINVAL;
 }
 
-static inline int
-of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin)
-{
-       return 0;
-}
-
 static inline int
 of_pci_parse_bus_range(struct device_node *node, struct resource *res)
 {
@@ -67,6 +60,16 @@ of_pci_get_max_link_speed(struct device_node *node)
 static inline void of_pci_check_probe_only(void) { }
 #endif
 
+#if IS_ENABLED(CONFIG_OF_IRQ)
+int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
+#else
+static inline int
+of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+       return 0;
+}
+#endif
+
 #if defined(CONFIG_OF_ADDRESS)
 int of_pci_get_host_bridge_resources(struct device_node *dev,
                        unsigned char busno, unsigned char bus_max,
index 864d167a10739e464ec534ad3b32a96757e316ac..009cdf3d65b63e0db3ddcf243d2ddfdff7be14fd 100644 (file)
  * calls io_destroy() or the process exits.
  *
  * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it
- * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove
- * the kioctx from the proccess's list of kioctxs - after that, there can't be
- * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop
- * the initial ref with percpu_ref_put().
+ * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref.
+ * After that, there can't be any new users of the kioctx (from lookup_ioctx())
+ * and it's then safe to drop the initial ref with percpu_ref_put().
+ *
+ * Note that the free path, free_ioctx(), needs to go through explicit call_rcu()
+ * to synchronize with RCU protected lookup_ioctx().  percpu_ref operations don't
+ * imply RCU grace periods of any kind and if a user wants to combine percpu_ref
+ * with RCU protection, it must be done explicitly.
  *
  * Code that does a two stage shutdown like this often needs some kind of
  * explicit synchronization to ensure the initial refcount can only be dropped
@@ -113,8 +117,10 @@ void percpu_ref_reinit(struct percpu_ref *ref);
  * Must be used to drop the initial ref on a percpu refcount; must be called
  * precisely once before shutdown.
  *
- * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the
- * percpu counters and dropping the initial ref.
+ * Switches @ref into atomic mode before gathering up the percpu counters
+ * and dropping the initial ref.
+ *
+ * There are no implied RCU grace periods between kill and release.
  */
 static inline void percpu_ref_kill(struct percpu_ref *ref)
 {
index 5a0c3e53e7c2057e24e8b652ce223b376cd734ad..f0b5870a6d40b66437e8f3f9bf5d894c677fbc58 100644 (file)
@@ -924,6 +924,7 @@ void phy_device_remove(struct phy_device *phydev);
 int phy_init_hw(struct phy_device *phydev);
 int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
+int __phy_resume(struct phy_device *phydev);
 int phy_loopback(struct phy_device *phydev, bool enable);
 struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
                              phy_interface_t interface);
@@ -983,6 +984,10 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev)
 {
        return 0;
 }
+int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad,
+                               u16 regnum);
+int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
+                                u16 regnum, u16 val);
 
 /* Clause 45 PHY */
 int genphy_c45_restart_aneg(struct phy_device *phydev);
@@ -994,6 +999,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
 
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
 static inline int phy_read_status(struct phy_device *phydev)
 {
        if (!phydev->drv)
@@ -1011,7 +1024,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
 int phy_drivers_register(struct phy_driver *new_driver, int n,
                         struct module *owner);
 void phy_state_machine(struct work_struct *work);
-void phy_change(struct phy_device *phydev);
 void phy_change_work(struct work_struct *work);
 void phy_mac_interrupt(struct phy_device *phydev);
 void phy_start_machine(struct phy_device *phydev);
index c9df2527e0cdd60cef38e7d614dd72f5df21a4fc..668a21f04b09665018ffaf91989f32995fbbdfc4 100644 (file)
@@ -766,8 +766,10 @@ static inline void *__rhashtable_insert_fast(
                if (!key ||
                    (params.obj_cmpfn ?
                     params.obj_cmpfn(&arg, rht_obj(ht, head)) :
-                    rhashtable_compare(&arg, rht_obj(ht, head))))
+                    rhashtable_compare(&arg, rht_obj(ht, head)))) {
+                       pprev = &head->next;
                        continue;
+               }
 
                data = rht_obj(ht, head);
 
index 3573b4bf2fdf68e6625b14a0b3c623b57485c508..562a175c35a9a9af40268d81c15013d1d6500d4a 100644 (file)
@@ -33,6 +33,7 @@ extern void rtnl_lock(void);
 extern void rtnl_unlock(void);
 extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
+extern int rtnl_lock_killable(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
 extern struct rw_semaphore net_sem;
index e724d5a3dd80e5cee3bb4f31d06d76b3a9cf901e..ebce9e24906a7121c6c24c806be252dbd7887334 100644 (file)
@@ -422,10 +422,11 @@ struct sfp_upstream_ops {
 #if IS_ENABLED(CONFIG_SFP)
 int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
                   unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-                                   const struct sfp_eeprom_id *id);
 void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
                       unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+                                    const struct sfp_eeprom_id *id,
+                                    unsigned long *link_modes);
 
 int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
 int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
@@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
        return PORT_OTHER;
 }
 
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-                                               const struct sfp_eeprom_id *id)
-{
-       return PHY_INTERFACE_MODE_NA;
-}
-
 static inline void sfp_parse_support(struct sfp_bus *bus,
                                     const struct sfp_eeprom_id *id,
                                     unsigned long *support)
 {
 }
 
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+                                                  const struct sfp_eeprom_id *id,
+                                                  unsigned long *link_modes)
+{
+       return PHY_INTERFACE_MODE_NA;
+}
+
 static inline int sfp_get_module_info(struct sfp_bus *bus,
                                      struct ethtool_modinfo *modinfo)
 {
index 9bc1750ca3d38b23b7c8226c8d2ec14aba036f52..47082f54ec1f645bed95a37d8c101efd160c60dc 100644 (file)
@@ -3288,8 +3288,7 @@ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
-unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
-bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu);
+bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
 bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
@@ -4041,6 +4040,12 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb)
        return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
+/* Note: Should be called only if skb_is_gso(skb) is true */
+static inline bool skb_is_gso_sctp(const struct sk_buff *skb)
+{
+       return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP;
+}
+
 static inline void skb_gso_reset(struct sk_buff *skb)
 {
        skb_shinfo(skb)->gso_size = 0;
@@ -4048,6 +4053,22 @@ static inline void skb_gso_reset(struct sk_buff *skb)
        skb_shinfo(skb)->gso_type = 0;
 }
 
+static inline void skb_increase_gso_size(struct skb_shared_info *shinfo,
+                                        u16 increment)
+{
+       if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
+               return;
+       shinfo->gso_size += increment;
+}
+
+static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo,
+                                        u16 decrement)
+{
+       if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
+               return;
+       shinfo->gso_size -= decrement;
+}
+
 void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
@@ -4107,38 +4128,6 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb)
        return !skb->head_frag || skb_cloned(skb);
 }
 
-/**
- * skb_gso_network_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_network_seglen is used to determine the real size of the
- * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
- *
- * The MAC/L2 header is not accounted for.
- */
-static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
-{
-       unsigned int hdr_len = skb_transport_header(skb) -
-                              skb_network_header(skb);
-       return hdr_len + skb_gso_transport_seglen(skb);
-}
-
-/**
- * skb_gso_mac_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_mac_seglen is used to determine the real size of the
- * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
- * headers (TCP/UDP).
- */
-static inline unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
-{
-       unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
-       return hdr_len + skb_gso_transport_seglen(skb);
-}
-
 /* Local Checksum Offload.
  * Compute outer checksum based on the assumption that the
  * inner checksum will be offloaded later.
index 1ce1f768a58ca5a75e9cd6a0e2f3a2dd385d3910..60e01482a9c4a290f9a1265b2e0681d03911511f 100644 (file)
@@ -287,6 +287,7 @@ struct ucred {
 #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
 #define MSG_BATCH      0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
+#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
 
 #define MSG_ZEROCOPY   0x4000000       /* Use user data in kernel path */
 #define MSG_FASTOPEN   0x20000000      /* Send data in TCP SYN */
index 0a6c71e0ad01ec4f17845b0496b4a88a94f33949..47f8af22f2168f0376d77068ecbc2df930ae8f79 100644 (file)
@@ -364,6 +364,7 @@ struct tty_file_private {
 #define TTY_PTY_LOCK           16      /* pty private */
 #define TTY_NO_WRITE_SPLIT     17      /* Preserve write boundaries to driver */
 #define TTY_HUPPED             18      /* Post driver->hangup() */
+#define TTY_HUPPING            19      /* Hangup in progress */
 #define TTY_LDISC_HALTED       22      /* Line discipline is halted */
 
 /* Values for tty->flow_change */
index 5bdbd9f49395f883ca2dc5aa0d7bbde11f379063..07ee0f84a46caa9e2b1c446f96009f63b3b99f50 100644 (file)
@@ -90,6 +90,28 @@ static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
 #endif
 }
 
+static inline unsigned long
+u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
+{
+       unsigned long flags = 0;
+
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       local_irq_save(flags);
+       write_seqcount_begin(&syncp->seq);
+#endif
+       return flags;
+}
+
+static inline void
+u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
+                               unsigned long flags)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       write_seqcount_end(&syncp->seq);
+       local_irq_restore(flags);
+#endif
+}
+
 static inline void u64_stats_update_begin_raw(struct u64_stats_sync *syncp)
 {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
index f1fcec2fd5f87f45530a70d082a70c9feb67ea3e..b7a99ce56bc9ad6cd4c8ff2c5681d6fbb781b875 100644 (file)
@@ -63,4 +63,7 @@
  */
 #define USB_QUIRK_DISCONNECT_SUSPEND           BIT(12)
 
+/* Device needs a pause after every control message. */
+#define USB_QUIRK_DELAY_CTRL_MSG               BIT(13)
+
 #endif /* __LINUX_USB_QUIRKS_H */
index bc0cda180c8b701a154182b5dbc6a7f5b9da840a..0c3301421c57746c086996e67401a167c82fe98c 100644 (file)
@@ -456,7 +456,6 @@ extern int schedule_on_each_cpu(work_func_t func);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern bool flush_work(struct work_struct *work);
-extern bool cancel_work(struct work_struct *work);
 extern bool cancel_work_sync(struct work_struct *work);
 
 extern bool flush_delayed_work(struct delayed_work *dwork);
index c4df6cee48e6ab41520d73d4a9e12a7be76b0f07..bf00a5a41a90643437e3a750456ff534e9fa7945 100644 (file)
@@ -117,7 +117,7 @@ struct dmx_ts_feed {
  *               specified by @filter_value that will be used on the filter
  *               match logic.
  * @filter_mode:  Contains a 16 bytes (128 bits) filter mode.
- * @parent:      Pointer to struct dmx_section_feed.
+ * @parent:      Back-pointer to struct dmx_section_feed.
  * @priv:        Pointer to private data of the API client.
  *
  *
@@ -130,8 +130,9 @@ struct dmx_section_filter {
        u8 filter_value[DMX_MAX_FILTER_SIZE];
        u8 filter_mask[DMX_MAX_FILTER_SIZE];
        u8 filter_mode[DMX_MAX_FILTER_SIZE];
-       struct dmx_section_feed *parent; /* Back-pointer */
-       void *priv; /* Pointer to private data of the API client */
+       struct dmx_section_feed *parent;
+
+       void *priv;
 };
 
 /**
@@ -193,6 +194,10 @@ struct dmx_section_feed {
  * @buffer2:           Pointer to the tail of the filtered TS packets, or NULL.
  * @buffer2_length:    Length of the TS data in buffer2.
  * @source:            Indicates which TS feed is the source of the callback.
+ * @buffer_flags:      Address where buffer flags are stored. Those are
+ *                     used to report discontinuity users via DVB
+ *                     memory mapped API, as defined by
+ *                     &enum dmx_buffer_flags.
  *
  * This function callback prototype, provided by the client of the demux API,
  * is called from the demux code. The function is only called when filtering
@@ -245,7 +250,8 @@ typedef int (*dmx_ts_cb)(const u8 *buffer1,
                         size_t buffer1_length,
                         const u8 *buffer2,
                         size_t buffer2_length,
-                        struct dmx_ts_feed *source);
+                        struct dmx_ts_feed *source,
+                        u32 *buffer_flags);
 
 /**
  * typedef dmx_section_cb - DVB demux TS filter callback function prototype
@@ -261,6 +267,10 @@ typedef int (*dmx_ts_cb)(const u8 *buffer1,
  *                     including headers and CRC.
  * @source:            Indicates which section feed is the source of the
  *                     callback.
+ * @buffer_flags:      Address where buffer flags are stored. Those are
+ *                     used to report discontinuity users via DVB
+ *                     memory mapped API, as defined by
+ *                     &enum dmx_buffer_flags.
  *
  * This function callback prototype, provided by the client of the demux API,
  * is called from the demux code. The function is only called when
@@ -286,7 +296,8 @@ typedef int (*dmx_section_cb)(const u8 *buffer1,
                              size_t buffer1_len,
                              const u8 *buffer2,
                              size_t buffer2_len,
-                             struct dmx_section_filter *source);
+                             struct dmx_section_filter *source,
+                             u32 *buffer_flags);
 
 /*
  * DVB Front-End
index 2f5cb2c7b6a75dcf582d9f8fb6e97985c8b47888..baafa3b8aca4140ec3fe426138bada16e0a1da6f 100644 (file)
@@ -163,6 +163,7 @@ struct dmxdev_filter {
  * @demux:             pointer to &struct dmx_demux.
  * @filternum:         number of filters.
  * @capabilities:      demux capabilities as defined by &enum dmx_demux_caps.
+ * @may_do_mmap:       flag used to indicate if the device may do mmap.
  * @exit:              flag to indicate that the demux is being released.
  * @dvr_orig_fe:       pointer to &struct dmx_frontend.
  * @dvr_buffer:                embedded &struct dvb_ringbuffer for DVB output.
@@ -180,6 +181,7 @@ struct dmxdev {
        int filternum;
        int capabilities;
 
+       unsigned int may_do_mmap:1;
        unsigned int exit:1;
 #define DMXDEV_CAP_DUPLEX 1
        struct dmx_frontend *dvr_orig_fe;
index b07092038f4bde977cd50a40c16e263d2f4293ed..3b6aeca7a49e44f5dd2a602842d14362951404ae 100644 (file)
@@ -115,6 +115,8 @@ struct dvb_demux_filter {
  * @pid:       PID to be filtered.
  * @timeout:   feed timeout.
  * @filter:    pointer to &struct dvb_demux_filter.
+ * @buffer_flags: Buffer flags used to report discontinuity users via DVB
+ *               memory mapped API, as defined by &enum dmx_buffer_flags.
  * @ts_type:   type of TS, as defined by &enum ts_filter_type.
  * @pes_type:  type of PES, as defined by &enum dmx_ts_pes.
  * @cc:                MPEG-TS packet continuity counter
@@ -145,6 +147,8 @@ struct dvb_demux_feed {
        ktime_t timeout;
        struct dvb_demux_filter *filter;
 
+       u32 buffer_flags;
+
        enum ts_filter_type ts_type;
        enum dmx_ts_pes pes_type;
 
index 01d1202d1a55b8ed30d92e4ad46b20cbd17409a0..8cb88452cd6c287ced6cea133419096ef1a22955 100644 (file)
@@ -85,6 +85,12 @@ struct dvb_buffer {
  * @nonblocking:
  *             If different than zero, device is operating on non-blocking
  *             mode.
+ * @flags:     buffer flags as defined by &enum dmx_buffer_flags.
+ *             Filled only at &DMX_DQBUF. &DMX_QBUF should zero this field.
+ * @count:     monotonic counter for filled buffers. Helps to identify
+ *             data stream loses. Filled only at &DMX_DQBUF. &DMX_QBUF should
+ *             zero this field.
+ *
  * @name:      name of the device type. Currently, it can either be
  *             "dvr" or "demux_filter".
  */
@@ -100,10 +106,14 @@ struct dvb_vb2_ctx {
        int     buf_siz;
        int     buf_cnt;
        int     nonblocking;
+
+       enum dmx_buffer_flags flags;
+       u32     count;
+
        char    name[DVB_VB2_NAME_MAX + 1];
 };
 
-#ifndef DVB_MMAP
+#ifndef CONFIG_DVB_MMAP
 static inline int dvb_vb2_init(struct dvb_vb2_ctx *ctx,
                               const char *name, int non_blocking)
 {
@@ -114,7 +124,7 @@ static inline int dvb_vb2_release(struct dvb_vb2_ctx *ctx)
        return 0;
 };
 #define dvb_vb2_is_streaming(ctx) (0)
-#define dvb_vb2_fill_buffer(ctx, file, wait) (0)
+#define dvb_vb2_fill_buffer(ctx, file, wait, flags) (0)
 
 static inline __poll_t dvb_vb2_poll(struct dvb_vb2_ctx *ctx,
                                    struct file *file,
@@ -153,9 +163,13 @@ int dvb_vb2_is_streaming(struct dvb_vb2_ctx *ctx);
  * @ctx:       control struct for VB2 handler
  * @src:       place where the data is stored
  * @len:       number of bytes to be copied from @src
+ * @buffer_flags:
+ *             pointer to buffer flags as defined by &enum dmx_buffer_flags.
+ *             can be NULL.
  */
 int dvb_vb2_fill_buffer(struct dvb_vb2_ctx *ctx,
-                       const unsigned char *src, int len);
+                       const unsigned char *src, int len,
+                       enum dmx_buffer_flags *buffer_flags);
 
 /**
  * dvb_vb2_poll - Wrapper to vb2_core_streamon() for Digital TV
index 336da258885a5515b70835e77f752354cb8e2946..9cce0d80d37aed6c428830f8662c011a1d5ebe92 100644 (file)
@@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit);
 struct net_device *mvme147lance_probe(int unit);
 struct net_device *tc515_probe(int unit);
 struct net_device *lance_probe(int unit);
-struct net_device *mac89x0_probe(int unit);
 struct net_device *cops_probe(int unit);
 struct net_device *ltpc_probe(void);
 
index 9c2f22695025e612e42a1e0e650cf64943b87a2a..9e59ebfded625426c2c3764225d58081b704cf0a 100644 (file)
@@ -97,6 +97,7 @@ struct tc_action_ops {
                        const struct tc_action_ops *,
                        struct netlink_ext_ack *);
        void    (*stats_update)(struct tc_action *, u64, u32, u64);
+       size_t  (*get_fill_size)(const struct tc_action *act);
        struct net_device *(*get_dev)(const struct tc_action *a);
 };
 
@@ -148,7 +149,6 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                   struct tc_action **a, const struct tc_action_ops *ops,
                   int bind, bool cpustats);
-void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
@@ -166,7 +166,8 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
                    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
                    struct nlattr *est, char *name, int ovr, int bind,
-                   struct list_head *actions, struct netlink_ext_ack *extack);
+                   struct list_head *actions, size_t *attr_size,
+                   struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
                                    char *name, int ovr, int bind,
index c4185a7b0e9029aea30da1a40649fa97de0fe0d6..132e5b95167ad617e5511bac831cc5fb206f247f 100644 (file)
@@ -69,8 +69,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
                  const struct net_device *dev, int strict);
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-                           const struct net_device *dev, int strict,
-                           u32 banned_flags);
+                           const struct net_device *dev, bool skip_dev_check,
+                           int strict, u32 banned_flags);
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
index 56e905cd4b07f0403f5495b313574b7ffd976458..fc40843baed3b23956dcae307855cc46a7a2ae94 100644 (file)
@@ -4410,10 +4410,12 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
  *     of it being pushed into the SKB
  * @addr: the device MAC address
  * @iftype: the virtual interface type
+ * @data_offset: offset of payload after the 802.11 header
  * Return: 0 on success. Non-zero on error.
  */
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-                                 const u8 *addr, enum nl80211_iftype iftype);
+                                 const u8 *addr, enum nl80211_iftype iftype,
+                                 u8 data_offset);
 
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -4425,7 +4427,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
                                         enum nl80211_iftype iftype)
 {
-       return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype);
+       return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
 }
 
 /**
index 8d1c3f276dea76f00c0d702463aa1f10c6ccb301..e21d8cadd48096f56956903e108467b9bd25edab 100644 (file)
@@ -253,6 +253,18 @@ struct devlink_resource_size_params {
        enum devlink_resource_unit unit;
 };
 
+static inline void
+devlink_resource_size_params_init(struct devlink_resource_size_params *size_params,
+                                 u64 size_min, u64 size_max,
+                                 u64 size_granularity,
+                                 enum devlink_resource_unit unit)
+{
+       size_params->size_min = size_min;
+       size_params->size_max = size_max;
+       size_params->size_granularity = size_granularity;
+       size_params->unit = unit;
+}
+
 /**
  * struct devlink_resource - devlink resource
  * @name: name of the resource
@@ -274,7 +286,7 @@ struct devlink_resource {
        u64 size_new;
        bool size_valid;
        struct devlink_resource *parent;
-       struct devlink_resource_size_params *size_params;
+       struct devlink_resource_size_params size_params;
        struct list_head list;
        struct list_head resource_list;
        const struct devlink_resource_ops *resource_ops;
@@ -394,11 +406,10 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ipv6;
 
 int devlink_resource_register(struct devlink *devlink,
                              const char *resource_name,
-                             bool top_hierarchy,
                              u64 resource_size,
                              u64 resource_id,
                              u64 parent_resource_id,
-                             struct devlink_resource_size_params *size_params,
+                             const struct devlink_resource_size_params *size_params,
                              const struct devlink_resource_ops *resource_ops);
 void devlink_resources_unregister(struct devlink *devlink,
                                  struct devlink_resource *resource);
@@ -548,11 +559,10 @@ devlink_dpipe_match_put(struct sk_buff *skb,
 static inline int
 devlink_resource_register(struct devlink *devlink,
                          const char *resource_name,
-                         bool top_hierarchy,
                          u64 resource_size,
                          u64 resource_id,
                          u64 parent_resource_id,
-                         struct devlink_resource_size_params *size_params,
+                         const struct devlink_resource_size_params *size_params,
                          const struct devlink_resource_ops *resource_ops)
 {
        return 0;
index 0ad17b63684d1b93583209b431065f20b5f9bb60..60fb4ec8ba616060f17963caa7cb2ac23847917b 100644 (file)
@@ -359,7 +359,7 @@ struct dsa_switch_ops {
        void    (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
        void    (*get_ethtool_stats)(struct dsa_switch *ds,
                                     int port, uint64_t *data);
-       int     (*get_sset_count)(struct dsa_switch *ds);
+       int     (*get_sset_count)(struct dsa_switch *ds, int port);
 
        /*
         * ethtool Wake-on-LAN
index c63d2c37f6e92bb57c055b37d463c7dc6ecaa70a..b3219cd8a5a1eef9e1f19004397aa1cd6ae0948e 100644 (file)
@@ -356,6 +356,7 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
  *     skb_tunnel_rx - prepare skb for rx reinsert
  *     @skb: buffer
  *     @dev: tunnel device
+ *     @net: netns for packet i/o
  *
  *     After decapsulation, packet is going to re-enter (netif_rx()) our stack,
  *     so make some cleanups, and perform accounting.
index 72fd5067c35349a7a2cb76f42b2dd4c218e0c091..67634675e9197cdbd8225e0e4aa1547d8f09f036 100644 (file)
@@ -54,7 +54,7 @@ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
  *     local BH must be disabled.
  */
 void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
-                      const struct in6_addr *addr);
+                      const struct in6_addr *saddr);
 
 /**
  *     dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
@@ -71,7 +71,7 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
  *     dst_cache_reset - invalidate the cache contents
  *     @dst_cache: the cache
  *
- *     This do not free the cached dst to avoid races and contentions.
+ *     This does not free the cached dst to avoid races and contentions.
  *     the dst will be freed on later cache lookup.
  */
 static inline void dst_cache_reset(struct dst_cache *dst_cache)
index bb7f467da7fcd6957ada0111b9d412f298ebc729..29ba069a1d93689497ece08c2a0201620c96466c 100644 (file)
@@ -21,4 +21,3 @@ struct ethoc_platform_data {
 };
 
 #endif /* !LINUX_NET_ETHOC_H */
-
index b3d216249240b6b0e9370c6b00b171ab179bcde4..e5cfcfc7dd93c96b08c57458b71baf15418ff8ab 100644 (file)
@@ -27,7 +27,7 @@ struct fib_rule {
        u8                      action;
        u8                      l3mdev;
        u8                      proto;
-       /* 1 byte hole, try to use */
+       u8                      ip_proto;
        u32                     target;
        __be64                  tun_id;
        struct fib_rule __rcu   *ctarget;
@@ -40,11 +40,14 @@ struct fib_rule {
        char                    iifname[IFNAMSIZ];
        char                    oifname[IFNAMSIZ];
        struct fib_kuid_range   uid_range;
+       struct fib_rule_port_range      sport_range;
+       struct fib_rule_port_range      dport_range;
        struct rcu_head         rcu;
 };
 
 struct fib_lookup_arg {
        void                    *lookup_ptr;
+       const void              *lookup_data;
        void                    *result;
        struct fib_rule         *rule;
        u32                     table;
@@ -110,7 +113,11 @@ struct fib_rule_notifier_info {
        [FRA_GOTO]      = { .type = NLA_U32 }, \
        [FRA_L3MDEV]    = { .type = NLA_U8 }, \
        [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \
-       [FRA_PROTOCOL]  = { .type = NLA_U8 }
+       [FRA_PROTOCOL]  = { .type = NLA_U8 }, \
+       [FRA_IP_PROTO]  = { .type = NLA_U8 }, \
+       [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
+       [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
@@ -144,6 +151,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
        return frh->table;
 }
 
+static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range)
+{
+       return range->start != 0 && range->end != 0;
+}
+
+static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
+                                        __be16 port)
+{
+       return ntohs(port) >= a->start &&
+               ntohs(port) <= a->end;
+}
+
+static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
+{
+       return a->start != 0 && a->end != 0 && a->end < 0xffff &&
+               a->start <= a->end;
+}
+
+static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
+                                              struct fib_rule_port_range *b)
+{
+       return a->start == b->start &&
+               a->end == b->end;
+}
+
+static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
+{
+       return rule->ip_proto ||
+               fib_rule_port_range_set(&rule->sport_range) ||
+               fib_rule_port_range_set(&rule->dport_range);
+}
+
 struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
                                         struct net *);
 void fib_rules_unregister(struct fib_rules_ops *);
index f1624fd5b1d03630c09f34106ba4aa2de45d7e5c..8ce21793094e32248ac2656ad5c7a9d986b35970 100644 (file)
@@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
        fl4->daddr = daddr;
        fl4->saddr = saddr;
 }
-                                     
+
 
 struct flowi6 {
        struct flowi_common     __fl_common;
@@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family)
 
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
-static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
-{
-       struct flow_keys keys;
-
-       return __get_hash_from_flowi6(fl6, &keys);
-}
-
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys);
-
-static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4)
-{
-       struct flow_keys keys;
-
-       return __get_hash_from_flowi4(fl4, &keys);
-}
-
 #endif
index f90585decbce647789d145d48c9ccd6615dc8a49..797142eee9cdec2c6cbc67fdaeaaa98a5aeddd24 100644 (file)
@@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
                     bool *csum_err, __be16 proto, int nhs);
 
+bool is_gretap_dev(const struct net_device *dev);
+bool is_ip6gretap_dev(const struct net_device *dev);
+
 static inline int gre_calc_hlen(__be16 o_flags)
 {
        int addend = 4;
index c1a93ce35e6239f3b4f0bd560ffe2fcd8baf41d7..b68fea022a82e19976a9dfbaa1cdc44192e1fc08 100644 (file)
@@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops {
        u16         net_header_len;
        u16         net_frag_header_len;
        u16         sockaddr_len;
-       int         (*setsockopt)(struct sock *sk, int level, int optname, 
+       int         (*setsockopt)(struct sock *sk, int level, int optname,
                                  char __user *optval, unsigned int optlen);
-       int         (*getsockopt)(struct sock *sk, int level, int optname, 
+       int         (*getsockopt)(struct sock *sk, int level, int optname,
                                  char __user *optval, int __user *optlen);
 #ifdef CONFIG_COMPAT
        int         (*compat_setsockopt)(struct sock *sk,
@@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops {
 
 /** inet_connection_sock - INET connection oriented sock
  *
- * @icsk_accept_queue:    FIFO of established children 
+ * @icsk_accept_queue:    FIFO of established children
  * @icsk_bind_hash:       Bind node
  * @icsk_timeout:         Timeout
  * @icsk_retransmit_timer: Resend (no ack)
@@ -122,7 +122,7 @@ struct inet_connection_sock {
                unsigned long     timeout;       /* Currently scheduled timeout            */
                __u32             lrcvtime;      /* timestamp of last received data packet */
                __u16             last_seg_size; /* Size of last incoming segment          */
-               __u16             rcv_mss;       /* MSS used for delayed ACK decisions     */ 
+               __u16             rcv_mss;       /* MSS used for delayed ACK decisions     */
        } icsk_ack;
        struct {
                int               enabled;
@@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[];
 static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
-       
+
        if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
                icsk->icsk_pending = 0;
 #ifdef INET_CSK_CLEAR_TIMERS
index 746abff9ce517ecbe1d580e5f53edfc93383da1a..36f8f7811093c37de06194dc7410b7596f8bf9fa 100644 (file)
@@ -91,6 +91,17 @@ static inline int inet_sdif(struct sk_buff *skb)
        return 0;
 }
 
+/* Special input handler for packets caught by router alert option.
+   They are selected only by protocol field, and then processed likely
+   local ones; but only if someone wants them! Otherwise, router
+   not running rsvpd will kill RSVP.
+
+   It is user level problem, what it will make with them.
+   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
+   but receiver should be enough clever f.e. to forward mtrace requests,
+   sent to multicast group to reach destination designated router.
+ */
+
 struct ip_ra_chain {
        struct ip_ra_chain __rcu *next;
        struct sock             *sk;
@@ -101,8 +112,6 @@ struct ip_ra_chain {
        struct rcu_head         rcu;
 };
 
-extern struct ip_ra_chain __rcu *ip_ra_chain;
-
 /* IP flags. */
 #define IP_CE          0x8000          /* Flag: "Congestion"           */
 #define IP_DF          0x4000          /* Flag: "Don't Fragment"       */
@@ -186,15 +195,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 void ip4_datagram_release_cb(struct sock *sk);
 
 struct ip_reply_arg {
-       struct kvec iov[1];   
+       struct kvec iov[1];
        int         flags;
        __wsum      csum;
        int         csumoffset; /* u16 offset of csum in iov[0].iov_base */
-                               /* -1 if not needed */ 
+                               /* -1 if not needed */
        int         bound_dev_if;
        u8          tos;
        kuid_t      uid;
-}; 
+};
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
 
@@ -328,6 +337,13 @@ int ip_decrease_ttl(struct iphdr *iph)
        return --iph->ttl;
 }
 
+static inline int ip_mtu_locked(const struct dst_entry *dst)
+{
+       const struct rtable *rt = (const struct rtable *)dst;
+
+       return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
+}
+
 static inline
 int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
 {
@@ -335,7 +351,7 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
 
        return  pmtudisc == IP_PMTUDISC_DO ||
                (pmtudisc == IP_PMTUDISC_WANT &&
-                !(dst_metric_locked(dst, RTAX_MTU)));
+                !ip_mtu_locked(dst));
 }
 
 static inline bool ip_sk_accept_pmtu(const struct sock *sk)
@@ -361,7 +377,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
        struct net *net = dev_net(dst->dev);
 
        if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
-           dst_metric_locked(dst, RTAX_MTU) ||
+           ip_mtu_locked(dst) ||
            !forwarding)
                return dst_mtu(dst);
 
@@ -577,13 +593,13 @@ int ip_frag_mem(struct net *net);
 /*
  *     Functions provided by ip_forward.c
  */
+
 int ip_forward(struct sk_buff *skb);
+
 /*
  *     Functions provided by ip_options.c
  */
+
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
                      __be32 daddr, struct rtable *rt, int is_frag);
 
index 34ec321d6a03baaf1efdfe591a5ca1f27b1d276c..5e86fd9dc8573856a6e4f259ea1504054f25d9c1 100644 (file)
@@ -350,7 +350,8 @@ struct fib6_table {
 
 typedef struct rt6_info *(*pol_lookup_t)(struct net *,
                                         struct fib6_table *,
-                                        struct flowi6 *, int);
+                                        struct flowi6 *,
+                                        const struct sk_buff *, int);
 
 struct fib6_entry_notifier_info {
        struct fib_notifier_info info; /* must be first */
@@ -364,6 +365,7 @@ struct fib6_entry_notifier_info {
 struct fib6_table *fib6_get_table(struct net *net, u32 id);
 struct fib6_table *fib6_new_table(struct net *net, u32 id);
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+                                  const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup);
 
 struct fib6_node *fib6_lookup(struct fib6_node *root,
@@ -415,6 +417,24 @@ void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
 int fib6_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib6_rules_seq_read(struct net *net);
+
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi6 *fl6,
+                                                struct flow_keys *flkeys)
+{
+       unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+       if (!net->ipv6.fib6_rules_require_fldissect)
+               return false;
+
+       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       fl6->fl6_sport = flkeys->ports.src;
+       fl6->fl6_dport = flkeys->ports.dst;
+       fl6->flowi6_proto = flkeys->basic.ip_proto;
+
+       return true;
+}
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -436,5 +456,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net)
 {
        return 0;
 }
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi6 *fl6,
+                                                struct flow_keys *flkeys)
+{
+       return false;
+}
 #endif
 #endif
index 27d23a65f3cd0be2255859614690151e2d01b352..0084013d6bedb4a7a014b1c5811820d02e79789f 100644 (file)
@@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
 void ip6_route_input(struct sk_buff *skb);
 struct dst_entry *ip6_route_input_lookup(struct net *net,
                                         struct net_device *dev,
-                                        struct flowi6 *fl6, int flags);
+                                        struct flowi6 *fl6,
+                                        const struct sk_buff *skb, int flags);
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
                                         struct flowi6 *fl6, int flags);
@@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-                                  int flags);
+                                  const struct sk_buff *skb, int flags);
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-                              int ifindex, struct flowi6 *fl6, int flags);
+                              int ifindex, struct flowi6 *fl6,
+                              const struct sk_buff *skb, int flags);
 
 void ip6_route_init_special_entries(void);
 int ip6_route_init(void);
@@ -126,8 +128,10 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 }
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-                           const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+                           const struct in6_addr *saddr, int oif,
+                           const struct sk_buff *skb, int flags);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+                      const struct sk_buff *skb, struct flow_keys *hkeys);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
@@ -179,6 +183,9 @@ void rt6_disable_ip(struct net_device *dev, unsigned long event);
 void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
 void rt6_multipath_rebalance(struct rt6_info *rt);
 
+void rt6_uncached_list_add(struct rt6_info *rt);
+void rt6_uncached_list_del(struct rt6_info *rt);
+
 static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
 {
        const struct dst_entry *dst = skb_dst(skb);
@@ -266,4 +273,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
               ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
               !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
 }
+
 #endif
index f80524396c062db27dd4907a79a451fd9535e172..81d0f2107ff1a2e1d475712369452783145eac2b 100644 (file)
@@ -59,6 +59,7 @@ struct fib_nh_exception {
        int                             fnhe_genid;
        __be32                          fnhe_daddr;
        u32                             fnhe_pmtu;
+       bool                            fnhe_mtu_locked;
        __be32                          fnhe_gw;
        unsigned long                   fnhe_expires;
        struct rtable __rcu             *fnhe_rth_input;
@@ -157,7 +158,7 @@ struct fib_result_nl {
        unsigned char   nh_sel;
        unsigned char   type;
        unsigned char   scope;
-       int             err;      
+       int             err;
 };
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -293,6 +294,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net)
        return 0;
 }
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi4 *fl4,
+                                                struct flow_keys *flkeys)
+{
+       return false;
+}
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -341,6 +349,24 @@ bool fib4_rule_default(const struct fib_rule *rule);
 int fib4_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib4_rules_seq_read(struct net *net);
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+                                                struct sk_buff *skb,
+                                                struct flowi4 *fl4,
+                                                struct flow_keys *flkeys)
+{
+       unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+       if (!net->ipv4.fib_rules_require_fldissect)
+               return false;
+
+       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       fl4->fl4_sport = flkeys->ports.src;
+       fl4->fl4_dport = flkeys->ports.dst;
+       fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+       return true;
+}
+
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
@@ -370,8 +396,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-                      const struct sk_buff *skb);
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+                      const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
 void fib_select_multipath(struct fib_result *res, int hash);
 void fib_select_path(struct net *net, struct fib_result *res,
index 1f16773cfd76b9bcd2deed0973eda1336e02317c..540a4b4417bfbe4654a6da34ed2ee6d7543dbc62 100644 (file)
@@ -180,8 +180,10 @@ struct tnl_ptk_info {
 
 struct ip_tunnel_net {
        struct net_device *fb_tunnel_dev;
+       struct rtnl_link_ops *rtnl_link_ops;
        struct hlist_head tunnels[IP_TNL_HASH_SIZE];
        struct ip_tunnel __rcu *collect_md_tun;
+       int type;
 };
 
 static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
@@ -254,6 +256,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
 
 #ifdef CONFIG_INET
 
+static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
+                                      int proto,
+                                      __be32 daddr, __be32 saddr,
+                                      __be32 key, __u8 tos, int oif,
+                                      __u32 mark)
+{
+       memset(fl4, 0, sizeof(*fl4));
+       fl4->flowi4_oif = oif;
+       fl4->daddr = daddr;
+       fl4->saddr = saddr;
+       fl4->flowi4_tos = tos;
+       fl4->flowi4_proto = proto;
+       fl4->fl4_gre_key = key;
+       fl4->flowi4_mark = mark;
+}
+
 int ip_tunnel_init(struct net_device *dev);
 void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
index 7a98cd583c73ca858342a230daf371f5169f23ff..50a6f0ddb8780f6c9169f4ae0b3b35af2d66cd4b 100644 (file)
 
 #define IPV6_ADDR_ANY          0x0000U
 
-#define IPV6_ADDR_UNICAST              0x0001U 
-#define IPV6_ADDR_MULTICAST            0x0002U 
+#define IPV6_ADDR_UNICAST      0x0001U
+#define IPV6_ADDR_MULTICAST    0x0002U
 
 #define IPV6_ADDR_LOOPBACK     0x0010U
 #define IPV6_ADDR_LINKLOCAL    0x0020U
@@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
 #endif
 }
 
-static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
                                    const struct in6_addr *addr,
                                    int plen)
 {
@@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr,
        addr[1] = wl;
 }
 
-static inline void ipv6_addr_set(struct in6_addr *addr, 
+static inline void ipv6_addr_set(struct in6_addr *addr,
                                     __be32 w1, __be32 w2,
                                     __be32 w3, __be32 w4)
 {
@@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
        }
 
        /*
-        *      we should *never* get to this point since that 
+        *      we should *never* get to this point since that
         *      would mean the addrs are equal
         *
         *      However, we do get to it 8) And exacly, when
@@ -888,6 +888,17 @@ static inline int ip6_default_np_autolabel(struct net *net)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ip6_multipath_hash_policy(const struct net *net)
+{
+       return net->ipv6.sysctl.multipath_hash_policy;
+}
+#else
+static inline int ip6_multipath_hash_policy(const struct net *net)
+{
+       return 0;
+}
+#endif
 
 /*
  *     Header manipulation
index 2fd59ed3be00b5b1e9d6749494819c88be242130..2449982daf753033d7d041b4fa75825f8f65ea7c 100644 (file)
@@ -2077,6 +2077,9 @@ struct ieee80211_txq {
  *     virtual interface might not be given air time for the transmission of
  *     the frame, as it is not synced with the AP/P2P GO yet, and thus the
  *     deauthentication frame might not be transmitted.
+ >
+ * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
+ *     support QoS NDP for AP probing - that's most likely a driver bug.
  *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
@@ -2122,6 +2125,7 @@ enum ieee80211_hw_flags {
        IEEE80211_HW_SUPPORTS_TX_FRAG,
        IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
        IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
+       IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
 
        /* keep last, obviously */
        NUM_IEEE80211_HW_FLAGS
index d4417495773a7e7220da21f4310e4d85bbe3e7da..09e30bdc78767cbf37472a5494839c44069bbb1e 100644 (file)
@@ -40,7 +40,7 @@ struct net_device;
 struct sock;
 struct ctl_table_header;
 struct net_generic;
-struct sock;
+struct uevent_sock;
 struct netns_ipvs;
 
 
@@ -83,6 +83,8 @@ struct net {
        struct sock             *rtnl;                  /* rtnetlink socket */
        struct sock             *genl_sock;
 
+       struct uevent_sock      *uevent_sock;           /* uevent socket */
+
        struct list_head        dev_base_head;
        struct hlist_head       *dev_name_head;
        struct hlist_head       *dev_index_head;
@@ -312,6 +314,20 @@ struct net *get_net_ns_by_id(struct net *net, int id);
 
 struct pernet_operations {
        struct list_head list;
+       /*
+        * Below methods are called without any exclusive locks.
+        * More than one net may be constructed and destructed
+        * in parallel on several cpus. Every pernet_operations
+        * have to keep in mind all other pernet_operations and
+        * to introduce a locking, if they share common resources.
+        *
+        * Exit methods using blocking RCU primitives, such as
+        * synchronize_rcu(), should be implemented via exit_batch.
+        * Then, destruction of a group of net requires single
+        * synchronize_rcu() related to these pernet_operations,
+        * instead of separate synchronize_rcu() for every net.
+        * Please, avoid synchronize_rcu() at all, where it's possible.
+        */
        int (*init)(struct net *net);
        void (*exit)(struct net *net);
        void (*exit_batch)(struct list_head *net_exit_list);
index 40e7bab684905c2a5c924837064f4a4b0e18b1da..d9918261701c95ca1a2f598268549afdc286c6d6 100644 (file)
@@ -26,7 +26,8 @@ enum netevent_notif_type {
        NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
        NETEVENT_REDIRECT,         /* arg is struct netevent_redirect ptr */
        NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
-       NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */
+       NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+       NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
index 44668c29701a72a5116781d4b76267c0068f1143..8491bc9c86b1553ab603e4363e8e38ca7ff547e0 100644 (file)
@@ -49,9 +49,12 @@ struct netns_ipv4 {
 #endif
        struct ipv4_devconf     *devconf_all;
        struct ipv4_devconf     *devconf_dflt;
+       struct ip_ra_chain __rcu *ra_chain;
+       struct mutex            ra_mutex;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        struct fib_rules_ops    *rules_ops;
        bool                    fib_has_custom_rules;
+       unsigned int            fib_rules_require_fldissect;
        struct fib_table __rcu  *fib_main;
        struct fib_table __rcu  *fib_default;
 #endif
@@ -167,6 +170,9 @@ struct netns_ipv4 {
        atomic_t tfo_active_disable_times;
        unsigned long tfo_active_disable_stamp;
 
+       int sysctl_udp_wmem_min;
+       int sysctl_udp_rmem_min;
+
 #ifdef CONFIG_NET_L3_MASTER_DEV
        int sysctl_udp_l3mdev_accept;
 #endif
index 987cc4569cb8ba6159946f9e87feb04cb681ac8c..5b51110435fccb1a45bccdf1faa7d4a5c1c022b7 100644 (file)
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
        int ip6_rt_gc_elasticity;
        int ip6_rt_mtu_expires;
        int ip6_rt_min_advmss;
+       int multipath_hash_policy;
        int flowlabel_consistency;
        int auto_flowlabels;
        int icmpv6_time;
@@ -71,7 +72,8 @@ struct netns_ipv6 {
        unsigned int             ip6_rt_gc_expire;
        unsigned long            ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-       bool                     fib6_has_custom_rules;
+       unsigned int            fib6_rules_require_fldissect;
+       bool                    fib6_has_custom_rules;
        struct rt6_info         *ip6_prohibit_entry;
        struct rt6_info         *ip6_blk_hole_entry;
        struct fib6_table       *fib6_local_tbl;
@@ -84,7 +86,7 @@ struct netns_ipv6 {
        struct sock             *mc_autojoin_sk;
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
-       struct mr6_table        *mrt6;
+       struct mr_table         *mrt6;
 #else
        struct list_head        mr6_tables;
        struct fib_rules_ops    *mr6_rules_ops;
index 87406252f0a36cdeefb20c81a972c602341dce26..e828d31be5dae0ae8c69016dfde50379296484aa 100644 (file)
@@ -806,6 +806,7 @@ enum tc_prio_command {
        TC_PRIO_REPLACE,
        TC_PRIO_DESTROY,
        TC_PRIO_STATS,
+       TC_PRIO_GRAFT,
 };
 
 struct tc_prio_qopt_offload_params {
@@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params {
        struct gnet_stats_queue *qstats;
 };
 
+struct tc_prio_qopt_offload_graft_params {
+       u8 band;
+       u32 child_handle;
+};
+
 struct tc_prio_qopt_offload {
        enum tc_prio_command command;
        u32 handle;
@@ -825,6 +831,8 @@ struct tc_prio_qopt_offload {
        union {
                struct tc_prio_qopt_offload_params replace_params;
                struct tc_qopt_offload_stats stats;
+               struct tc_prio_qopt_offload_graft_params graft_params;
        };
 };
+
 #endif
index 158833ea7988b82f5eabc8a7eb907ef2ca1cecef..dbb032d5921b4b4816ac8a469107e52c857f172c 100644 (file)
@@ -63,7 +63,8 @@ struct rtable {
        __be32                  rt_gateway;
 
        /* Miscellaneous cached information */
-       u32                     rt_pmtu;
+       u32                     rt_mtu_locked:1,
+                               rt_pmtu:31;
 
        struct list_head        rt_uncached;
        struct uncached_list    *rt_uncached_list;
@@ -225,6 +226,9 @@ struct in_ifaddr;
 void fib_add_ifaddr(struct in_ifaddr *);
 void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
 
+void rt_add_uncached_list(struct rtable *rt);
+void rt_del_uncached_list(struct rtable *rt);
+
 static inline void ip_rt_put(struct rtable *rt)
 {
        /* dst_release() accepts a NULL parameter.
diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h
new file mode 100644 (file)
index 0000000..040f07b
--- /dev/null
@@ -0,0 +1,56 @@
+/**
+ * Copyright (c) 2017 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __RSI_HEADER_H__
+#define __RSI_HEADER_H__
+
+#include <linux/skbuff.h>
+
+/* HAL queue information */
+#define RSI_COEX_Q                     0x0
+#define RSI_BT_Q                       0x2
+#define RSI_WLAN_Q                      0x3
+#define RSI_WIFI_MGMT_Q                 0x4
+#define RSI_WIFI_DATA_Q                 0x5
+#define RSI_BT_MGMT_Q                  0x6
+#define RSI_BT_DATA_Q                  0x7
+
+enum rsi_coex_queues {
+       RSI_COEX_Q_INVALID = -1,
+       RSI_COEX_Q_COMMON = 0,
+       RSI_COEX_Q_BT,
+       RSI_COEX_Q_WLAN
+};
+
+enum rsi_host_intf {
+       RSI_HOST_INTF_SDIO = 0,
+       RSI_HOST_INTF_USB
+};
+
+struct rsi_proto_ops {
+       int (*coex_send_pkt)(void *priv, struct sk_buff *skb, u8 hal_queue);
+       enum rsi_host_intf (*get_host_intf)(void *priv);
+       void (*set_bt_context)(void *priv, void *context);
+};
+
+struct rsi_mod_ops {
+       int (*attach)(void *priv, struct rsi_proto_ops *ops);
+       void (*detach)(void *priv);
+       int (*recv_pkt)(void *priv, const u8 *msg);
+};
+
+extern const struct rsi_mod_ops rsi_bt_ops;
+#endif
index e2ab13687fb9741f3977b5452a958f8e886189f4..493e311bbe93ab89ff50612f78e013c1b7fd17e0 100644 (file)
@@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
        return false;
 }
 
-/* Reset all TX qdiscs greater then index of a device.  */
+/* Reset all TX qdiscs greater than index of a device.  */
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
        struct Qdisc *qdisc;
@@ -824,6 +824,16 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
        *to_free = skb;
 }
 
+static inline void __qdisc_drop_all(struct sk_buff *skb,
+                                   struct sk_buff **to_free)
+{
+       if (skb->prev)
+               skb->prev->next = *to_free;
+       else
+               skb->next = *to_free;
+       *to_free = skb;
+}
+
 static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
                                                   struct qdisc_skb_head *qh,
                                                   struct sk_buff **to_free)
@@ -956,6 +966,15 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
        return NET_XMIT_DROP;
 }
 
+static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
+                                struct sk_buff **to_free)
+{
+       __qdisc_drop_all(skb, to_free);
+       qdisc_qstats_drop(sch);
+
+       return NET_XMIT_DROP;
+}
+
 /* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how
    long it will take to send a packet given its size.
  */
index e5c57d0a082d264e3f93cd95f98eb5cdfdea6a58..687e7f80037d08ed3178660ba707477af2b10626 100644 (file)
@@ -62,8 +62,10 @@ struct sctp_auth_bytes {
 /* Definition for a shared key, weather endpoint or association */
 struct sctp_shared_key {
        struct list_head key_list;
-       __u16 key_id;
        struct sctp_auth_bytes *key;
+       refcount_t refcnt;
+       __u16 key_id;
+       __u8 deactivated;
 };
 
 #define key_for_each(__key, __list_head) \
@@ -103,21 +105,22 @@ int sctp_auth_send_cid(enum sctp_cid chunk,
 int sctp_auth_recv_cid(enum sctp_cid chunk,
                       const struct sctp_association *asoc);
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
-                           struct sk_buff *skb,
-                           struct sctp_auth_chunk *auth, gfp_t gfp);
+                             struct sk_buff *skb, struct sctp_auth_chunk *auth,
+                             struct sctp_shared_key *ep_key, gfp_t gfp);
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key);
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key);
 
 /* API Helpers */
 int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id);
 int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
                            struct sctp_hmacalgo *hmacs);
-int sctp_auth_set_key(struct sctp_endpoint *ep,
-                     struct sctp_association *asoc,
+int sctp_auth_set_key(struct sctp_endpoint *ep, struct sctp_association *asoc,
                      struct sctp_authkey *auth_key);
 int sctp_auth_set_active_key(struct sctp_endpoint *ep,
-                     struct sctp_association *asoc,
-                     __u16 key_id);
+                            struct sctp_association *asoc, __u16 key_id);
 int sctp_auth_del_key_id(struct sctp_endpoint *ep,
-                     struct sctp_association *asoc,
-                     __u16 key_id);
+                        struct sctp_association *asoc, __u16 key_id);
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+                          struct sctp_association *asoc, __u16 key_id);
 
 #endif
index b55c6a48a20696328e918155d310132160f87f64..6640f84fe5368f868e49bfb9ea2c3f94639b4e80 100644 (file)
@@ -100,6 +100,7 @@ enum sctp_verb {
        SCTP_CMD_SET_SK_ERR,     /* Set sk_err */
        SCTP_CMD_ASSOC_CHANGE,   /* generate and send assoc_change event */
        SCTP_CMD_ADAPTATION_IND, /* generate and send adaptation event */
+       SCTP_CMD_PEER_NO_AUTH,   /* generate and send authentication event */
        SCTP_CMD_ASSOC_SHKEY,    /* generate the association shared keys */
        SCTP_CMD_T1_RETRAN,      /* Mark for retransmission after T1 timeout  */
        SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */
index f7ae6b0a21d0663bf61e1370202604e0e66896a3..72c5b8fc3232ec65f1d39b3062fbecb3f3ce53e7 100644 (file)
@@ -180,14 +180,7 @@ struct sctp_transport *sctp_epaddr_lookup_transport(
 /*
  * sctp/proc.c
  */
-int sctp_snmp_proc_init(struct net *net);
-void sctp_snmp_proc_exit(struct net *net);
-int sctp_eps_proc_init(struct net *net);
-void sctp_eps_proc_exit(struct net *net);
-int sctp_assocs_proc_init(struct net *net);
-void sctp_assocs_proc_exit(struct net *net);
-int sctp_remaddr_proc_init(struct net *net);
-void sctp_remaddr_proc_exit(struct net *net);
+int __net_init sctp_proc_init(struct net *net);
 
 /*
  * sctp/offload.c
@@ -318,7 +311,6 @@ atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0)
 {.label= #name, .counter= &sctp_dbg_objcnt_## name}
 
 void sctp_dbg_objcnt_init(struct net *);
-void sctp_dbg_objcnt_exit(struct net *);
 
 #else
 
@@ -326,7 +318,6 @@ void sctp_dbg_objcnt_exit(struct net *);
 #define SCTP_DBG_OBJCNT_DEC(name)
 
 static inline void sctp_dbg_objcnt_init(struct net *net) { return; }
-static inline void sctp_dbg_objcnt_exit(struct net *net) { return; }
 
 #endif /* CONFIG_SCTP_DBG_OBJCOUNT */
 
index 2883c43c52587f1d6d1cd2e8ba91de82b730cbcc..2d0e782c90551377ad654bcef1224bbdb75ba394 100644 (file)
@@ -263,7 +263,8 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
                                    __u32 new_cum_tsn, size_t nstreams,
                                    struct sctp_fwdtsn_skip *skiplist);
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc);
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+                                 __u16 key_id);
 struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc,
                                          __u16 stream_num, __be16 *stream_list,
                                          bool out, bool in);
index 03e92dda1813bc262d58c5a0866399d45676b2c9..012fb3e2f4cf60514c1f9544012ad54c565e0ad7 100644 (file)
@@ -577,8 +577,12 @@ struct sctp_chunk {
        /* This points to the sk_buff containing the actual data.  */
        struct sk_buff *skb;
 
-       /* In case of GSO packets, this will store the head one */
-       struct sk_buff *head_skb;
+       union {
+               /* In case of GSO packets, this will store the head one */
+               struct sk_buff *head_skb;
+               /* In case of auth enabled, this will point to the shkey */
+               struct sctp_shared_key *shkey;
+       };
 
        /* These are the SCTP headers by reverse order in a packet.
         * Note that some of these may happen more than once.  In that
@@ -1995,6 +1999,7 @@ struct sctp_association {
         * The current generated assocaition shared key (secret)
         */
        struct sctp_auth_bytes *asoc_shared_key;
+       struct sctp_shared_key *shkey;
 
        /* SCTP AUTH: hmac id of the first peer requested algorithm
         * that we support.
@@ -2112,6 +2117,9 @@ struct sctp_cmsgs {
        struct sctp_initmsg *init;
        struct sctp_sndrcvinfo *srinfo;
        struct sctp_sndinfo *sinfo;
+       struct sctp_prinfo *prinfo;
+       struct sctp_authinfo *authinfo;
+       struct msghdr *addrs_msg;
 };
 
 /* Structure for tracking memory objects */
index b9624581d639ff7f1f9466d7e7cd50c6b87cabf5..709311132d4c1d575abfe82542429ce016fdaef7 100644 (file)
@@ -1138,6 +1138,7 @@ struct proto {
 
 int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
+int sock_load_diag_module(int family, int protocol);
 
 #ifdef SOCK_REFCNT_DEBUG
 static inline void sk_refcnt_debug_inc(struct sock *sk)
@@ -2141,6 +2142,10 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
 
 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+               int sg_start, int *sg_curr, unsigned int *sg_size,
+               int first_coalesce);
+
 /*
  *     Default write policy as shown to user space via poll/select/SIGIO
  */
index 92b06c6e7732ad7c61b580427fc085fa0dff1063..9c9b3768b350abfd51776563d220d5e97ca9da69 100644 (file)
@@ -511,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 #endif
 /* tcp_output.c */
 
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-                    int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
                               int nonagle);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -981,8 +979,8 @@ struct tcp_congestion_ops {
        u32  (*undo_cwnd)(struct sock *sk);
        /* hook for packet ack accounting (optional) */
        void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
-       /* suggest number of segments for each skb to transmit (optional) */
-       u32 (*tso_segs_goal)(struct sock *sk);
+       /* override sysctl_tcp_min_tso_segs */
+       u32 (*min_tso_segs)(struct sock *sk);
        /* returns the multiplier used in tcp_sndbuf_expand (optional) */
        u32 (*sndbuf_expand)(struct sock *sk);
        /* call when packets are delivered to update cwnd and pacing rate,
index 4913430ab8078537e92806c2b96faa4581c95f73..437a746300bf005be916a9aeff61f1f68991b1b0 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/socket.h>
 #include <linux/tcp.h>
 #include <net/tcp.h>
+#include <net/strparser.h>
 
 #include <uapi/linux/tls.h>
 
 
 struct tls_sw_context {
        struct crypto_aead *aead_send;
+       struct crypto_aead *aead_recv;
        struct crypto_wait async_wait;
 
+       /* Receive context */
+       struct strparser strp;
+       void (*saved_data_ready)(struct sock *sk);
+       unsigned int (*sk_poll)(struct file *file, struct socket *sock,
+                               struct poll_table_struct *wait);
+       struct sk_buff *recv_pkt;
+       u8 control;
+       bool decrypted;
+
        /* Sending context */
        char aad_space[TLS_AAD_SPACE_SIZE];
 
@@ -81,23 +92,32 @@ enum {
        TLS_PENDING_CLOSED_RECORD
 };
 
+struct cipher_context {
+       u16 prepend_size;
+       u16 tag_size;
+       u16 overhead_size;
+       u16 iv_size;
+       char *iv;
+       u16 rec_seq_size;
+       char *rec_seq;
+};
+
 struct tls_context {
        union {
                struct tls_crypto_info crypto_send;
                struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128;
        };
+       union {
+               struct tls_crypto_info crypto_recv;
+               struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128;
+       };
 
        void *priv_ctx;
 
-       u8 tx_conf:2;
+       u8 conf:2;
 
-       u16 prepend_size;
-       u16 tag_size;
-       u16 overhead_size;
-       u16 iv_size;
-       char *iv;
-       u16 rec_seq_size;
-       char *rec_seq;
+       struct cipher_context tx;
+       struct cipher_context rx;
 
        struct scatterlist *partially_sent_record;
        u16 partially_sent_offset;
@@ -124,12 +144,19 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
                  unsigned int optlen);
 
 
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx);
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
                    int offset, size_t size, int flags);
 void tls_sw_close(struct sock *sk, long timeout);
-void tls_sw_free_tx_resources(struct sock *sk);
+void tls_sw_free_resources(struct sock *sk);
+int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+                  int nonblock, int flags, int *addr_len);
+unsigned int tls_sw_poll(struct file *file, struct socket *sock,
+                        struct poll_table_struct *wait);
+ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
+                          struct pipe_inode_info *pipe,
+                          size_t len, unsigned int flags);
 
 void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
 void tls_icsk_clean_acked(struct sock *sk);
@@ -170,9 +197,9 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
        return tls_ctx->pending_open_record_frags;
 }
 
-static inline void tls_err_abort(struct sock *sk)
+static inline void tls_err_abort(struct sock *sk, int err)
 {
-       sk->sk_err = EBADMSG;
+       sk->sk_err = err;
        sk->sk_error_report(sk);
 }
 
@@ -190,10 +217,10 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len)
 }
 
 static inline void tls_advance_record_sn(struct sock *sk,
-                                        struct tls_context *ctx)
+                                        struct cipher_context *ctx)
 {
        if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size))
-               tls_err_abort(sk);
+               tls_err_abort(sk, EBADMSG);
        tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
                             ctx->iv_size);
 }
@@ -203,9 +230,9 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
                             size_t plaintext_len,
                             unsigned char record_type)
 {
-       size_t pkt_len, iv_size = ctx->iv_size;
+       size_t pkt_len, iv_size = ctx->tx.iv_size;
 
-       pkt_len = plaintext_len + iv_size + ctx->tag_size;
+       pkt_len = plaintext_len + iv_size + ctx->tx.tag_size;
 
        /* we cover nonce explicit here as well, so buf should be of
         * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE
@@ -217,7 +244,7 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
        buf[3] = pkt_len >> 8;
        buf[4] = pkt_len & 0xFF;
        memcpy(buf + TLS_NONCE_OFFSET,
-              ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
+              ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
 }
 
 static inline void tls_make_aad(char *buf,
index 7d2077665c0b05c1716e808d2bfd6c56af65e3c8..aa027ba1d032ffa2c7483eb6f00dd94590582d85 100644 (file)
@@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
 static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
 static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{ 
-       return 1; 
-} 
+{
+       return 1;
+}
 static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
 {
        return 1;
@@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
 {
        if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
            (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
-            ipv6_addr_any((struct in6_addr *)saddr) || 
+            ipv6_addr_any((struct in6_addr *)saddr) ||
             ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
                return 1;
        return 0;
@@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
 static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
        return -ENOPROTOOPT;
-} 
+}
 
 static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 {
index 73b2387e3f742490528696e97290990c9462d4f6..ff3ed435701f3e05b197ec1e32828f330da2646b 100644 (file)
@@ -1537,10 +1537,6 @@ struct ib_xrcd {
 
        struct mutex            tgt_qp_mutex;
        struct list_head        tgt_qp_list;
-       /*
-        * Implementation details of the RDMA core, don't use in drivers:
-        */
-       struct rdma_restrack_entry res;
 };
 
 struct ib_ah {
index d8d4a902a88dedbc93ac8da1ca99bb5f3d394d65..2280b2351739572c5db73579f8ffc0e16d511ebe 100644 (file)
@@ -68,6 +68,9 @@ struct scsi_cmnd {
        struct list_head list;  /* scsi_cmnd participates in queue lists */
        struct list_head eh_entry; /* entry for the host eh_cmd_q */
        struct delayed_work abort_work;
+
+       struct rcu_head rcu;
+
        int eh_eflags;          /* Used by error handlr */
 
        /*
index 1a1df0d21ee3f9648cc02a6bc067783ae5e5ef03..a8b7bf879cede4240d921a42f915a230163e5e4e 100644 (file)
@@ -571,8 +571,6 @@ struct Scsi_Host {
                struct blk_mq_tag_set   tag_set;
        };
 
-       struct rcu_head rcu;
-
        atomic_t host_busy;                /* commands actually active on low-level */
        atomic_t host_blocked;
 
index c2d1b15da136e9aa9273a90471ac8be7a1b6b413..a91f25151a5b96ce43d61e2f58841aefa4190baa 100644 (file)
@@ -15,6 +15,7 @@
 
 #define ARC_REG_MCIP_BCR       0x0d0
 #define ARC_REG_MCIP_IDU_BCR   0x0D5
+#define ARC_REG_GFRC_BUILD     0x0D6
 #define ARC_REG_MCIP_CMD       0x600
 #define ARC_REG_MCIP_WDATA     0x601
 #define ARC_REG_MCIP_READBACK  0x602
@@ -36,10 +37,14 @@ struct mcip_cmd {
 #define CMD_SEMA_RELEASE               0x12
 
 #define CMD_DEBUG_SET_MASK             0x34
+#define CMD_DEBUG_READ_MASK            0x35
 #define CMD_DEBUG_SET_SELECT           0x36
+#define CMD_DEBUG_READ_SELECT          0x37
 
 #define CMD_GFRC_READ_LO               0x42
 #define CMD_GFRC_READ_HI               0x43
+#define CMD_GFRC_SET_CORE              0x47
+#define CMD_GFRC_READ_CORE             0x48
 
 #define CMD_IDU_ENABLE                 0x71
 #define CMD_IDU_DISABLE                        0x72
index 200f731be557048c779691e3360018acbf4927c2..7b706ff213359eb31271c8869160c984171bce71 100644 (file)
@@ -86,8 +86,8 @@ TRACE_EVENT(mmc_request_start,
                  __entry->stop_flags, __entry->stop_retries,
                  __entry->sbc_opcode, __entry->sbc_arg,
                  __entry->sbc_flags, __entry->sbc_retries,
-                 __entry->blocks, __entry->blk_addr,
-                 __entry->blksz, __entry->data_flags, __entry->tag,
+                 __entry->blocks, __entry->blksz,
+                 __entry->blk_addr, __entry->data_flags, __entry->tag,
                  __entry->can_retune, __entry->doing_retune,
                  __entry->retune_now, __entry->need_retune,
                  __entry->hold_retune, __entry->retune_period)
index 85dc965afd892ccd34a4d9f41673c60412fd97c0..99c902e460c2534609c6c385543e527adc36fa57 100644 (file)
@@ -102,13 +102,13 @@ typedef struct siginfo {
                                short _addr_lsb; /* LSB of the reported address */
                                /* used when si_code=SEGV_BNDERR */
                                struct {
-                                       short _dummy_bnd;
+                                       void *_dummy_bnd;
                                        void __user *_lower;
                                        void __user *_upper;
                                } _addr_bnd;
                                /* used when si_code=SEGV_PKUERR */
                                struct {
-                                       short _dummy_pkey;
+                                       void *_dummy_pkey;
                                        __u32 _pkey;
                                } _addr_pkey;
                        };
index 91a31ffed828ddfbad55967022d3a1df32db5340..9a781f0611df0280be7d952258f486f805f27528 100644 (file)
@@ -63,6 +63,7 @@ struct drm_virtgpu_execbuffer {
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
+#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */
 
 struct drm_virtgpu_getparam {
        __u64 param;
index 5cb360be2a1163bde9e4e6bf731ce51ce8c9aed5..894d8d2f713d96502872c6c1db839e1801793583 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -196,8 +196,6 @@ struct batadv_bla_claim_dst {
        __be16 group;           /* group id */
 };
 
-#pragma pack()
-
 /**
  * struct batadv_ogm_packet - ogm (routing protocol) packet
  * @packet_type: batman-adv packet type, part of the general header
@@ -222,9 +220,6 @@ struct batadv_ogm_packet {
        __u8   reserved;
        __u8   tq;
        __be16 tvlv_len;
-       /* __packed is not needed as the struct size is divisible by 4,
-        * and the largest data type in this struct has a size of 4.
-        */
 };
 
 #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
@@ -249,9 +244,6 @@ struct batadv_ogm2_packet {
        __u8   orig[ETH_ALEN];
        __be16 tvlv_len;
        __be32 throughput;
-       /* __packed is not needed as the struct size is divisible by 4,
-        * and the largest data type in this struct has a size of 4.
-        */
 };
 
 #define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet)
@@ -405,7 +397,6 @@ struct batadv_icmp_packet_rr {
  * misalignment of the payload after the ethernet header. It may also lead to
  * leakage of information when the padding it not initialized before sending.
  */
-#pragma pack(2)
 
 /**
  * struct batadv_unicast_packet - unicast packet for network payload
@@ -533,8 +524,6 @@ struct batadv_coded_packet {
        __be16 coded_len;
 };
 
-#pragma pack()
-
 /**
  * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload
  * @packet_type: batman-adv packet type, part of the general header
@@ -641,4 +630,6 @@ struct batadv_tvlv_mcast_data {
        __u8 reserved[3];
 };
 
+#pragma pack()
+
 #endif /* _UAPI_LINUX_BATADV_PACKET_H_ */
index ae00c99cbed0998d0b5eb1bcae87cf56abd00429..324a0e1143e7b8a303f6520ccff1a6654ec0d2d1 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: MIT */
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
@@ -91,6 +91,53 @@ enum batadv_tt_client_flags {
        BATADV_TT_CLIENT_TEMP    = (1 << 11),
 };
 
+/**
+ * enum batadv_mcast_flags_priv - Private, own multicast flags
+ *
+ * These are internal, multicast related flags. Currently they describe certain
+ * multicast related attributes of the segment this originator bridges into the
+ * mesh.
+ *
+ * Those attributes are used to determine the public multicast flags this
+ * originator is going to announce via TT.
+ *
+ * For netlink, if BATADV_MCAST_FLAGS_BRIDGED is unset then all querier
+ * related flags are undefined.
+ */
+enum batadv_mcast_flags_priv {
+       /**
+        * @BATADV_MCAST_FLAGS_BRIDGED: There is a bridge on top of the mesh
+        * interface.
+        */
+       BATADV_MCAST_FLAGS_BRIDGED                      = (1 << 0),
+
+       /**
+        * @BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS: Whether an IGMP querier
+        * exists in the mesh
+        */
+       BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS          = (1 << 1),
+
+       /**
+        * @BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS: Whether an MLD querier
+        * exists in the mesh
+        */
+       BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS          = (1 << 2),
+
+       /**
+        * @BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING: If an IGMP querier
+        * exists, whether it is potentially shadowing multicast listeners
+        * (i.e. querier is behind our own bridge segment)
+        */
+       BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING       = (1 << 3),
+
+       /**
+        * @BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING: If an MLD querier
+        * exists, whether it is potentially shadowing multicast listeners
+        * (i.e. querier is behind our own bridge segment)
+        */
+       BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING       = (1 << 4),
+};
+
 /**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  */
@@ -272,6 +319,31 @@ enum batadv_nl_attrs {
         */
        BATADV_ATTR_BLA_CRC,
 
+       /**
+        * @BATADV_ATTR_DAT_CACHE_IP4ADDRESS: Client IPv4 address
+        */
+       BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+
+       /**
+        * @BATADV_ATTR_DAT_CACHE_HWADDRESS: Client MAC address
+        */
+       BATADV_ATTR_DAT_CACHE_HWADDRESS,
+
+       /**
+        * @BATADV_ATTR_DAT_CACHE_VID: VLAN ID
+        */
+       BATADV_ATTR_DAT_CACHE_VID,
+
+       /**
+        * @BATADV_ATTR_MCAST_FLAGS: Per originator multicast flags
+        */
+       BATADV_ATTR_MCAST_FLAGS,
+
+       /**
+        * @BATADV_ATTR_MCAST_FLAGS_PRIV: Private, own multicast flags
+        */
+       BATADV_ATTR_MCAST_FLAGS_PRIV,
+
        /* add attributes above here, update the policy in netlink.c */
 
        /**
@@ -361,6 +433,16 @@ enum batadv_nl_commands {
         */
        BATADV_CMD_GET_BLA_BACKBONE,
 
+       /**
+        * @BATADV_CMD_GET_DAT_CACHE: Query list of DAT cache entries
+        */
+       BATADV_CMD_GET_DAT_CACHE,
+
+       /**
+        * @BATADV_CMD_GET_MCAST_FLAGS: Query list of multicast flags
+        */
+       BATADV_CMD_GET_MCAST_FLAGS,
+
        /* add new commands above here */
 
        /**
index 20d1490d63773288d1d91130e96478ee7708bf26..3c50e07ee833116a726c19402f26cf63e91491e9 100644 (file)
@@ -131,7 +131,7 @@ enum {
 #define BLKTRACE_BDEV_SIZE     32
 
 /*
- * User setup structure passed with BLKTRACESTART
+ * User setup structure passed with BLKTRACESETUP
  */
 struct blk_user_trace_setup {
        char name[BLKTRACE_BDEV_SIZE];  /* output */
index db6bdc3751268351da3126f57566639fce355b12..18b7c510c511df9247a82e0bf40c199c14b229b4 100644 (file)
@@ -133,6 +133,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_SOCK_OPS,
        BPF_PROG_TYPE_SK_SKB,
        BPF_PROG_TYPE_CGROUP_DEVICE,
+       BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +144,7 @@ enum bpf_attach_type {
        BPF_SK_SKB_STREAM_PARSER,
        BPF_SK_SKB_STREAM_VERDICT,
        BPF_CGROUP_DEVICE,
+       BPF_SK_MSG_VERDICT,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -231,6 +233,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY           (1U << 3)
 #define BPF_F_WRONLY           (1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID   (1U << 5)
+
+enum bpf_stack_build_id_status {
+       /* user space need an empty entry to identify end of a trace */
+       BPF_STACK_BUILD_ID_EMPTY = 0,
+       /* with valid build_id and offset */
+       BPF_STACK_BUILD_ID_VALID = 1,
+       /* couldn't get build_id, fallback to ip */
+       BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+       __s32           status;
+       unsigned char   build_id[BPF_BUILD_ID_SIZE];
+       union {
+               __u64   offset;
+               __u64   ip;
+       };
+};
+
 union bpf_attr {
        struct { /* anonymous struct used by BPF_MAP_CREATE command */
                __u32   map_type;       /* one of enum bpf_map_type */
@@ -696,6 +720,15 @@ union bpf_attr {
  * int bpf_override_return(pt_regs, rc)
  *     @pt_regs: pointer to struct pt_regs
  *     @rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -757,7 +790,11 @@ union bpf_attr {
        FN(perf_prog_read_value),       \
        FN(getsockopt),                 \
        FN(override_return),            \
-       FN(sock_ops_cb_flags_set),
+       FN(sock_ops_cb_flags_set),      \
+       FN(msg_redirect_map),           \
+       FN(msg_apply_bytes),            \
+       FN(msg_cork_bytes),             \
+       FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -800,6 +837,7 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
 #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
+#define BPF_F_SEQ_NUMBER               (1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
@@ -919,6 +957,14 @@ enum sk_action {
        SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+       void *data;
+       void *data_end;
+};
+
 #define BPF_TAG_SIZE   8
 
 struct bpf_prog_info {
index 8f95303f9d807d10d4fd6850d91a2486b0a490ec..eb1b9d21250c6e83233721a777e5760c26d75481 100644 (file)
@@ -13,6 +13,7 @@
 struct bpf_perf_event_data {
        bpf_user_pt_regs_t regs;
        __u64 sample_period;
+       __u64 addr;
 };
 
 #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
index 5f3c5a918f00d5ed3cb2c82b8803edeae456cad9..b4112f0b6dd36c33299930f4ffc16fc5230e465f 100644 (file)
@@ -211,6 +211,32 @@ struct dmx_stc {
        __u64 stc;
 };
 
+/**
+ * enum dmx_buffer_flags - DMX memory-mapped buffer flags
+ *
+ * @DMX_BUFFER_FLAG_HAD_CRC32_DISCARD:
+ *     Indicates that the Kernel discarded one or more frames due to wrong
+ *     CRC32 checksum.
+ * @DMX_BUFFER_FLAG_TEI:
+ *     Indicates that the Kernel has detected a Transport Error indicator
+ *     (TEI) on a filtered pid.
+ * @DMX_BUFFER_PKT_COUNTER_MISMATCH:
+ *     Indicates that the Kernel has detected a packet counter mismatch
+ *     on a filtered pid.
+ * @DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED:
+ *     Indicates that the Kernel has detected one or more frame discontinuity.
+ * @DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR:
+ *     Received at least one packet with a frame discontinuity indicator.
+ */
+
+enum dmx_buffer_flags {
+       DMX_BUFFER_FLAG_HAD_CRC32_DISCARD               = 1 << 0,
+       DMX_BUFFER_FLAG_TEI                             = 1 << 1,
+       DMX_BUFFER_PKT_COUNTER_MISMATCH                 = 1 << 2,
+       DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED          = 1 << 3,
+       DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR         = 1 << 4,
+};
+
 /**
  * struct dmx_buffer - dmx buffer info
  *
@@ -220,15 +246,24 @@ struct dmx_stc {
  *             offset from the start of the device memory for this plane,
  *             (or a "cookie" that should be passed to mmap() as offset)
  * @length:    size in bytes of the buffer
+ * @flags:     bit array of buffer flags as defined by &enum dmx_buffer_flags.
+ *             Filled only at &DMX_DQBUF.
+ * @count:     monotonic counter for filled buffers. Helps to identify
+ *             data stream loses. Filled only at &DMX_DQBUF.
  *
  * Contains data exchanged by application and driver using one of the streaming
  * I/O methods.
+ *
+ * Please notice that, for &DMX_QBUF, only @index should be filled.
+ * On &DMX_DQBUF calls, all fields will be filled by the Kernel.
  */
 struct dmx_buffer {
        __u32                   index;
        __u32                   bytesused;
        __u32                   offset;
        __u32                   length;
+       __u32                   flags;
+       __u32                   count;
 };
 
 /**
index 28812eda420908ebf73008525d90c1abb3907536..dc64cfaf13da08564a8271e50a4edb89d221b148 100644 (file)
@@ -20,13 +20,11 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP6     3
 #define SO_EE_ORIGIN_TXSTATUS  4
 #define SO_EE_ORIGIN_ZEROCOPY  5
-#define SO_EE_ORIGIN_ZCOOKIE   6
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)     ((struct sockaddr*)((ee)+1))
 
 #define SO_EE_CODE_ZEROCOPY_COPIED     1
-#define        SO_EE_ORIGIN_MAX_ZCOOKIES       8
 
 /**
  *     struct scm_timestamping - timestamps exposed through cmsg
index 44a0b675a6bcd1b242ccacc7b936d267ae915973..20da156aaf64e59101bc3ea92273db443fbb6046 100644 (file)
@@ -914,12 +914,15 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW
  * @data: Command-dependent value
  * @fs: Flow classification rule
+ * @rss_context: RSS context to be affected
  * @rule_cnt: Number of rules to be affected
  * @rule_locs: Array of used rule locations
  *
  * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating
  * the fields included in the flow hash, e.g. %RXH_IP_SRC.  The following
- * structure fields must not be used.
+ * structure fields must not be used, except that if @flow_type includes
+ * the %FLOW_RSS flag, then @rss_context determines which RSS context to
+ * act on.
  *
  * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues
  * on return.
@@ -931,7 +934,9 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * set in @data then special location values should not be used.
  *
  * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an
- * existing rule on entry and @fs contains the rule on return.
+ * existing rule on entry and @fs contains the rule on return; if
+ * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is
+ * filled with the RSS context ID associated with the rule.
  *
  * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the
  * user buffer for @rule_locs on entry.  On return, @data is the size
@@ -942,7 +947,11 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update.
  * @fs.@location either specifies the location to use or is a special
  * location value with %RX_CLS_LOC_SPECIAL flag set.  On return,
- * @fs.@location is the actual rule location.
+ * @fs.@location is the actual rule location.  If @fs.@flow_type
+ * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to
+ * use for flow spreading traffic which matches this rule.  The value
+ * from the rxfh indirection table will be added to @fs.@ring_cookie
+ * to choose which ring to deliver to.
  *
  * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an
  * existing rule on entry.
@@ -963,7 +972,10 @@ struct ethtool_rxnfc {
        __u32                           flow_type;
        __u64                           data;
        struct ethtool_rx_flow_spec     fs;
-       __u32                           rule_cnt;
+       union {
+               __u32                   rule_cnt;
+               __u32                   rss_context;
+       };
        __u32                           rule_locs[0];
 };
 
@@ -990,7 +1002,11 @@ struct ethtool_rxfh_indir {
 /**
  * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
  * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
- * @rss_context: RSS context identifier.
+ * @rss_context: RSS context identifier.  Context 0 is the default for normal
+ *     traffic; other contexts can be referenced as the destination for RX flow
+ *     classification rules.  %ETH_RXFH_CONTEXT_ALLOC is used with command
+ *     %ETHTOOL_SRSSH to allocate a new RSS context; on return this field will
+ *     contain the ID of the newly allocated context.
  * @indir_size: On entry, the array size of the user buffer for the
  *     indirection table, which may be zero, or (for %ETHTOOL_SRSSH),
  *     %ETH_RXFH_INDIR_NO_CHANGE.  On return from %ETHTOOL_GRSSH,
@@ -1009,7 +1025,8 @@ struct ethtool_rxfh_indir {
  * size should be returned.  For %ETHTOOL_SRSSH, an @indir_size of
  * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
  * and a @indir_size of zero means the indir table should be reset to default
- * values. An hfunc of zero means that hash function setting is not requested.
+ * values (if @rss_context == 0) or that the RSS context should be deleted.
+ * An hfunc of zero means that hash function setting is not requested.
  */
 struct ethtool_rxfh {
        __u32   cmd;
@@ -1021,6 +1038,7 @@ struct ethtool_rxfh {
        __u32   rsvd32;
        __u32   rss_config[0];
 };
+#define ETH_RXFH_CONTEXT_ALLOC         0xffffffff
 #define ETH_RXFH_INDIR_NO_CHANGE       0xffffffff
 
 /**
@@ -1635,6 +1653,8 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 /* Flag to enable additional fields in struct ethtool_rx_flow_spec */
 #define        FLOW_EXT        0x80000000
 #define        FLOW_MAC_EXT    0x40000000
+/* Flag to enable RSS spreading of traffic matching rule (nfc only) */
+#define        FLOW_RSS        0x20000000
 
 /* L3-L4 network traffic flow hash options */
 #define        RXH_L2DA        (1 << 1)
index 77d90ae381149de044bbd6034a380ec626c8e4e4..232df14e1287a3297716f0c16a0632194a189d50 100644 (file)
@@ -35,6 +35,11 @@ struct fib_rule_uid_range {
        __u32           end;
 };
 
+struct fib_rule_port_range {
+       __u16           start;
+       __u16           end;
+};
+
 enum {
        FRA_UNSPEC,
        FRA_DST,        /* destination address */
@@ -59,6 +64,9 @@ enum {
        FRA_L3MDEV,     /* iif or oif is l3mdev goto its table */
        FRA_UID_RANGE,  /* UID range */
        FRA_PROTOCOL,   /* Originator of the rule */
+       FRA_IP_PROTO,   /* ip proto */
+       FRA_SPORT_RANGE, /* sport */
+       FRA_DPORT_RANGE, /* dport */
        __FRA_MAX
 };
 
index 2e4a6c1accaac5510d33e66ff675295032285023..3a45b4ad71a3083a16085d69e2891e31dddcdaa8 100644 (file)
@@ -30,6 +30,7 @@
  */
 
 #define ETH_ALEN       6               /* Octets in one ethernet addr   */
+#define ETH_TLEN       2               /* Octets in ethernet type field */
 #define ETH_HLEN       14              /* Total octets in header.       */
 #define ETH_ZLEN       60              /* Min. octets in frame sans FCS */
 #define ETH_DATA_LEN   1500            /* Max. octets in payload        */
index 11d0c0ea2bfabac2b8548fdfeae6f3423bddfa96..68699f654118592527096dc26336f57da6a01cdc 100644 (file)
@@ -959,4 +959,25 @@ enum {
 
 #define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
 
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION         (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+
+enum {
+       IFLA_RMNET_UNSPEC,
+       IFLA_RMNET_MUX_ID,
+       IFLA_RMNET_FLAGS,
+       __IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+       __u32   flags;
+       __u32   mask;
+};
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
index 0fb5ef939732517293f222f2c85d88f2b4c1e973..7b26d4b0b0529649816ec1523d225eec7fa8ee26 100644 (file)
@@ -761,6 +761,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
 #define KVM_GET_EMULATED_CPUID   _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
+#define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
 
 /*
  * Extension capability list.
@@ -934,6 +935,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_AIS_MIGRATION 150
 #define KVM_CAP_PPC_GET_CPU_CHAR 151
 #define KVM_CAP_S390_BPB 152
+#define KVM_CAP_GET_MSR_FEATURES 153
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h
new file mode 100644 (file)
index 0000000..4c292ec
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __UAPI_NCSI_NETLINK_H__
+#define __UAPI_NCSI_NETLINK_H__
+
+/**
+ * enum ncsi_nl_commands - supported NCSI commands
+ *
+ * @NCSI_CMD_UNSPEC: unspecified command to catch errors
+ * @NCSI_CMD_PKG_INFO: list package and channel attributes. Requires
+ *     NCSI_ATTR_IFINDEX. If NCSI_ATTR_PACKAGE_ID is specified returns the
+ *     specific package and its channels - otherwise a dump request returns
+ *     all packages and their associated channels.
+ * @NCSI_CMD_SET_INTERFACE: set preferred package and channel combination.
+ *     Requires NCSI_ATTR_IFINDEX and the preferred NCSI_ATTR_PACKAGE_ID and
+ *     optionally the preferred NCSI_ATTR_CHANNEL_ID.
+ * @NCSI_CMD_CLEAR_INTERFACE: clear any preferred package/channel combination.
+ *     Requires NCSI_ATTR_IFINDEX.
+ * @NCSI_CMD_MAX: highest command number
+ */
+enum ncsi_nl_commands {
+       NCSI_CMD_UNSPEC,
+       NCSI_CMD_PKG_INFO,
+       NCSI_CMD_SET_INTERFACE,
+       NCSI_CMD_CLEAR_INTERFACE,
+
+       __NCSI_CMD_AFTER_LAST,
+       NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_attrs - General NCSI netlink attributes
+ *
+ * @NCSI_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_ATTR_IFINDEX: ifindex of network device using NCSI
+ * @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes
+ * @NCSI_ATTR_PACKAGE_ID: package ID
+ * @NCSI_ATTR_CHANNEL_ID: channel ID
+ * @NCSI_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_attrs {
+       NCSI_ATTR_UNSPEC,
+       NCSI_ATTR_IFINDEX,
+       NCSI_ATTR_PACKAGE_LIST,
+       NCSI_ATTR_PACKAGE_ID,
+       NCSI_ATTR_CHANNEL_ID,
+
+       __NCSI_ATTR_AFTER_LAST,
+       NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_pkg_attrs - NCSI netlink package-specific attributes
+ *
+ * @NCSI_PKG_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_PKG_ATTR: nested array of package attributes
+ * @NCSI_PKG_ATTR_ID: package ID
+ * @NCSI_PKG_ATTR_FORCED: flag signifying a package has been set as preferred
+ * @NCSI_PKG_ATTR_CHANNEL_LIST: nested array of NCSI_CHANNEL_ATTR attributes
+ * @NCSI_PKG_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_pkg_attrs {
+       NCSI_PKG_ATTR_UNSPEC,
+       NCSI_PKG_ATTR,
+       NCSI_PKG_ATTR_ID,
+       NCSI_PKG_ATTR_FORCED,
+       NCSI_PKG_ATTR_CHANNEL_LIST,
+
+       __NCSI_PKG_ATTR_AFTER_LAST,
+       NCSI_PKG_ATTR_MAX = __NCSI_PKG_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_channel_attrs - NCSI netlink channel-specific attributes
+ *
+ * @NCSI_CHANNEL_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_CHANNEL_ATTR: nested array of channel attributes
+ * @NCSI_CHANNEL_ATTR_ID: channel ID
+ * @NCSI_CHANNEL_ATTR_VERSION_MAJOR: channel major version number
+ * @NCSI_CHANNEL_ATTR_VERSION_MINOR: channel minor version number
+ * @NCSI_CHANNEL_ATTR_VERSION_STR: channel version string
+ * @NCSI_CHANNEL_ATTR_LINK_STATE: channel link state flags
+ * @NCSI_CHANNEL_ATTR_ACTIVE: channels with this flag are in
+ *     NCSI_CHANNEL_ACTIVE state
+ * @NCSI_CHANNEL_ATTR_FORCED: flag signifying a channel has been set as
+ *     preferred
+ * @NCSI_CHANNEL_ATTR_VLAN_LIST: nested array of NCSI_CHANNEL_ATTR_VLAN_IDs
+ * @NCSI_CHANNEL_ATTR_VLAN_ID: VLAN ID being filtered on this channel
+ * @NCSI_CHANNEL_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_channel_attrs {
+       NCSI_CHANNEL_ATTR_UNSPEC,
+       NCSI_CHANNEL_ATTR,
+       NCSI_CHANNEL_ATTR_ID,
+       NCSI_CHANNEL_ATTR_VERSION_MAJOR,
+       NCSI_CHANNEL_ATTR_VERSION_MINOR,
+       NCSI_CHANNEL_ATTR_VERSION_STR,
+       NCSI_CHANNEL_ATTR_LINK_STATE,
+       NCSI_CHANNEL_ATTR_ACTIVE,
+       NCSI_CHANNEL_ATTR_FORCED,
+       NCSI_CHANNEL_ATTR_VLAN_LIST,
+       NCSI_CHANNEL_ATTR_VLAN_ID,
+
+       __NCSI_CHANNEL_ATTR_AFTER_LAST,
+       NCSI_CHANNEL_ATTR_MAX = __NCSI_CHANNEL_ATTR_AFTER_LAST - 1
+};
+
+#endif /* __UAPI_NCSI_NETLINK_H__ */
index 7cafb26df5557d947927aa4945c89954e66b6fb0..be05e66c167b12a70db409242519a9b1958b1000 100644 (file)
@@ -475,6 +475,7 @@ enum {
 
 enum {
        TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
+       TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
 };
 
 /* Match-all classifier */
index 3d77fe91239a802367634d3cc29fd3903c1cee19..9008f31c7eb65c90a487d99d3f371a00ba526f3f 100644 (file)
@@ -42,7 +42,7 @@ typedef enum {
        SEV_RET_INVALID_PLATFORM_STATE,
        SEV_RET_INVALID_GUEST_STATE,
        SEV_RET_INAVLID_CONFIG,
-       SEV_RET_INVALID_len,
+       SEV_RET_INVALID_LEN,
        SEV_RET_ALREADY_OWNED,
        SEV_RET_INVALID_CERTIFICATE,
        SEV_RET_POLICY_FAILURE,
index 12e3bca32cadb4cdf1e67bf7c53baff3a4165541..a66b213de3d7a40ee13cb40d900bfc1a18818692 100644 (file)
 #define RDS_CMSG_MASKED_ATOMIC_CSWP    9
 #define RDS_CMSG_RXPATH_LATENCY                11
 #define        RDS_CMSG_ZCOPY_COOKIE           12
+#define        RDS_CMSG_ZCOPY_COMPLETION       13
 
 #define RDS_INFO_FIRST                 10000
 #define RDS_INFO_COUNTERS              10000
@@ -317,6 +318,12 @@ struct rds_rdma_notify {
 #define RDS_RDMA_DROPPED       3
 #define RDS_RDMA_OTHER_ERROR   4
 
+#define        RDS_MAX_ZCOOKIES        8
+struct rds_zcopy_cookies {
+       __u32 num;
+       __u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
 /*
  * Common set of flags for all RDMA related structs
  */
index 4c4db14786bd04360e1b44a7e23275d7bb9f882d..afd4346386e0ad61df6d22504fd3445290e12c36 100644 (file)
@@ -99,6 +99,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_RECVRCVINFO       32
 #define SCTP_RECVNXTINFO       33
 #define SCTP_DEFAULT_SNDINFO   34
+#define SCTP_AUTH_DEACTIVATE_KEY       35
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
@@ -260,6 +261,31 @@ struct sctp_nxtinfo {
        sctp_assoc_t nxt_assoc_id;
 };
 
+/* 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_PRINFO    struct sctp_prinfo
+ */
+struct sctp_prinfo {
+       __u16 pr_policy;
+       __u32 pr_value;
+};
+
+/* 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_AUTHINFO  struct sctp_authinfo
+ */
+struct sctp_authinfo {
+       __u16 auth_keynumber;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
@@ -271,6 +297,8 @@ enum sctp_sinfo_flags {
        SCTP_ADDR_OVER          = (1 << 1), /* Override the primary destination. */
        SCTP_ABORT              = (1 << 2), /* Send an ABORT message to the peer. */
        SCTP_SACK_IMMEDIATELY   = (1 << 3), /* SACK should be sent without delay. */
+       /* 2 bits here have been used by SCTP_PR_SCTP_MASK */
+       SCTP_SENDALL            = (1 << 6),
        SCTP_NOTIFICATION       = MSG_NOTIFICATION, /* Next message is not user msg but notification. */
        SCTP_EOF                = MSG_FIN,  /* Initiate graceful shutdown process. */
 };
@@ -293,6 +321,14 @@ typedef enum sctp_cmsg_type {
 #define SCTP_RCVINFO   SCTP_RCVINFO
        SCTP_NXTINFO,           /* 5.3.6 SCTP Next Receive Information Structure */
 #define SCTP_NXTINFO   SCTP_NXTINFO
+       SCTP_PRINFO,            /* 5.3.7 SCTP PR-SCTP Information Structure */
+#define SCTP_PRINFO    SCTP_PRINFO
+       SCTP_AUTHINFO,          /* 5.3.8 SCTP AUTH Information Structure */
+#define SCTP_AUTHINFO  SCTP_AUTHINFO
+       SCTP_DSTADDRV4,         /* 5.3.9 SCTP Destination IPv4 Address Structure */
+#define SCTP_DSTADDRV4 SCTP_DSTADDRV4
+       SCTP_DSTADDRV6,         /* 5.3.10 SCTP Destination IPv6 Address Structure */
+#define SCTP_DSTADDRV6 SCTP_DSTADDRV6
 } sctp_cmsg_t;
 
 /*
@@ -482,7 +518,12 @@ struct sctp_authkey_event {
        sctp_assoc_t auth_assoc_id;
 };
 
-enum { SCTP_AUTH_NEWKEY = 0, };
+enum {
+       SCTP_AUTH_NEW_KEY,
+#define        SCTP_AUTH_NEWKEY        SCTP_AUTH_NEW_KEY /* compatible with before */
+       SCTP_AUTH_FREE_KEY,
+       SCTP_AUTH_NO_AUTH,
+};
 
 /*
  * 6.1.9. SCTP_SENDER_DRY_EVENT
index b4a4f64635faff48bb457c9caa89a270d6eb2f96..560374c978f90b6b0269a5c68a82355b2547463f 100644 (file)
@@ -241,6 +241,9 @@ enum {
        TCP_NLA_MIN_RTT,        /* minimum RTT */
        TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
        TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
+       TCP_NLA_SNDQ_SIZE,      /* Data (bytes) pending in send queue */
+       TCP_NLA_CA_STATE,       /* ca_state of socket */
+       TCP_NLA_SND_SSTHRESH,   /* Slow start size threshold */
 
 };
 
index 14bacc7e6cef208b3cf1840022358d4403959a90..4ac9f1f02b06db56551b3824c3587a9b0479445b 100644 (file)
@@ -61,50 +61,6 @@ struct tipc_name_seq {
        __u32 upper;
 };
 
-/* TIPC Address Size, Offset, Mask specification for Z.C.N
- */
-#define TIPC_NODE_BITS          12
-#define TIPC_CLUSTER_BITS       12
-#define TIPC_ZONE_BITS          8
-
-#define TIPC_NODE_OFFSET        0
-#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
-#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
-
-#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
-#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
-#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
-
-#define TIPC_NODE_MASK         (TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
-#define TIPC_CLUSTER_MASK      (TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
-#define TIPC_ZONE_MASK         (TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
-
-#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
-
-static inline __u32 tipc_addr(unsigned int zone,
-                             unsigned int cluster,
-                             unsigned int node)
-{
-       return (zone << TIPC_ZONE_OFFSET) |
-               (cluster << TIPC_CLUSTER_OFFSET) |
-               node;
-}
-
-static inline unsigned int tipc_zone(__u32 addr)
-{
-       return addr >> TIPC_ZONE_OFFSET;
-}
-
-static inline unsigned int tipc_cluster(__u32 addr)
-{
-       return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
-}
-
-static inline unsigned int tipc_node(__u32 addr)
-{
-       return addr & TIPC_NODE_MASK;
-}
-
 /*
  * Application-accessible port name types
  */
@@ -117,9 +73,10 @@ static inline unsigned int tipc_node(__u32 addr)
 /*
  * Publication scopes when binding port names and port name sequences
  */
-#define TIPC_ZONE_SCOPE         1
-#define TIPC_CLUSTER_SCOPE      2
-#define TIPC_NODE_SCOPE         3
+enum tipc_scope {
+       TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */
+       TIPC_NODE_SCOPE    = 3
+};
 
 /*
  * Limiting values for messages
@@ -243,7 +200,7 @@ struct sockaddr_tipc {
 struct tipc_group_req {
        __u32 type;      /* group id */
        __u32 instance;  /* member id */
-       __u32 scope;     /* zone/cluster/node */
+       __u32 scope;     /* cluster/node */
        __u32 flags;
 };
 
@@ -268,4 +225,53 @@ struct tipc_sioc_ln_req {
        __u32 bearer_id;
        char linkname[TIPC_MAX_LINK_NAME];
 };
+
+
+/* The macros and functions below are deprecated:
+ */
+
+#define TIPC_ZONE_SCOPE         1
+
+#define TIPC_NODE_BITS          12
+#define TIPC_CLUSTER_BITS       12
+#define TIPC_ZONE_BITS          8
+
+#define TIPC_NODE_OFFSET        0
+#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
+#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
+
+#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
+#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
+#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
+
+#define TIPC_NODE_MASK         (TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
+#define TIPC_CLUSTER_MASK      (TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
+#define TIPC_ZONE_MASK         (TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
+
+#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
+
+static inline __u32 tipc_addr(unsigned int zone,
+                             unsigned int cluster,
+                             unsigned int node)
+{
+       return (zone << TIPC_ZONE_OFFSET) |
+               (cluster << TIPC_CLUSTER_OFFSET) |
+               node;
+}
+
+static inline unsigned int tipc_zone(__u32 addr)
+{
+       return addr >> TIPC_ZONE_OFFSET;
+}
+
+static inline unsigned int tipc_cluster(__u32 addr)
+{
+       return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
+}
+
+static inline unsigned int tipc_node(__u32 addr)
+{
+       return addr & TIPC_NODE_MASK;
+}
+
 #endif
index 469aa67a5ecbdbae5671379bfb3180a17ec7cb30..0affb682e5e398c7d6fa3f9169c6c803b19f6257 100644 (file)
@@ -114,6 +114,13 @@ enum {
        TIPC_NLA_SOCK_REF,              /* u32 */
        TIPC_NLA_SOCK_CON,              /* nest */
        TIPC_NLA_SOCK_HAS_PUBL,         /* flag */
+       TIPC_NLA_SOCK_STAT,             /* nest */
+       TIPC_NLA_SOCK_TYPE,             /* u32 */
+       TIPC_NLA_SOCK_INO,              /* u32 */
+       TIPC_NLA_SOCK_UID,              /* u32 */
+       TIPC_NLA_SOCK_TIPC_STATE,       /* u32 */
+       TIPC_NLA_SOCK_COOKIE,           /* u64 */
+       TIPC_NLA_SOCK_PAD,              /* flag */
 
        __TIPC_NLA_SOCK_MAX,
        TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1
@@ -162,6 +169,8 @@ enum {
        TIPC_NLA_NET_UNSPEC,
        TIPC_NLA_NET_ID,                /* u32 */
        TIPC_NLA_NET_ADDR,              /* u32 */
+       TIPC_NLA_NET_NODEID,            /* u64 */
+       TIPC_NLA_NET_NODEID_W1,         /* u64 */
 
        __TIPC_NLA_NET_MAX,
        TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1
@@ -238,6 +247,18 @@ enum {
        TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1
 };
 
+/* Nest, socket statistics info */
+enum {
+       TIPC_NLA_SOCK_STAT_RCVQ,        /* u32 */
+       TIPC_NLA_SOCK_STAT_SENDQ,       /* u32 */
+       TIPC_NLA_SOCK_STAT_LINK_CONG,   /* flag */
+       TIPC_NLA_SOCK_STAT_CONN_CONG,   /* flag */
+       TIPC_NLA_SOCK_STAT_DROP,        /* u32 */
+
+       __TIPC_NLA_SOCK_STAT_MAX,
+       TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1
+};
+
 /* Nest, link propreties. Valid for link, media and bearer */
 enum {
        TIPC_NLA_PROP_UNSPEC,
diff --git a/include/uapi/linux/tipc_sockets_diag.h b/include/uapi/linux/tipc_sockets_diag.h
new file mode 100644 (file)
index 0000000..7678cf2
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* AF_TIPC sock_diag interface for querying open sockets */
+
+#ifndef _UAPI__TIPC_SOCKETS_DIAG_H__
+#define _UAPI__TIPC_SOCKETS_DIAG_H__
+
+#include <linux/types.h>
+#include <linux/sock_diag.h>
+
+/* Request */
+struct tipc_sock_diag_req {
+       __u8    sdiag_family;   /* must be AF_TIPC */
+       __u8    sdiag_protocol; /* must be 0 */
+       __u16   pad;            /* must be 0 */
+       __u32   tidiag_states;  /* query*/
+};
+#endif /* _UAPI__TIPC_SOCKETS_DIAG_H__ */
index 293b2cdad88d94b75bbad6b953788aa77d3d7c37..c6633e97eca40b33a15d822a1e16222ce19bf94e 100644 (file)
@@ -38,6 +38,7 @@
 
 /* TLS socket options */
 #define TLS_TX                 1       /* Set transmit parameters */
+#define TLS_RX                 2       /* Set receive parameters */
 
 /* Supported versions */
 #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF)
@@ -59,6 +60,7 @@
 #define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE            8
 
 #define TLS_SET_RECORD_TYPE    1
+#define TLS_GET_RECORD_TYPE    2
 
 struct tls_crypto_info {
        __u16 version;
index 4b0b0b756f3ee4cbf37a5d04e013fe84a5b6f604..0af83d80fb3ea42b7fe161e89073a6f7503bc6dd 100644 (file)
@@ -32,6 +32,22 @@ struct ocxl_ioctl_attach {
        __u64 reserved3;
 };
 
+struct ocxl_ioctl_metadata {
+       __u16 version; // struct version, always backwards compatible
+
+       // Version 0 fields
+       __u8  afu_version_major;
+       __u8  afu_version_minor;
+       __u32 pasid;            // PASID assigned to the current context
+
+       __u64 pp_mmio_size;     // Per PASID MMIO size
+       __u64 global_mmio_size;
+
+       // End version 0 fields
+
+       __u64 reserved[13]; // Total of 16*u64
+};
+
 struct ocxl_ioctl_irq_fd {
        __u64 irq_offset;
        __s32 eventfd;
@@ -45,5 +61,6 @@ struct ocxl_ioctl_irq_fd {
 #define OCXL_IOCTL_IRQ_ALLOC   _IOR(OCXL_MAGIC, 0x11, __u64)
 #define OCXL_IOCTL_IRQ_FREE    _IOW(OCXL_MAGIC, 0x12, __u64)
 #define OCXL_IOCTL_IRQ_SET_FD  _IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd)
+#define OCXL_IOCTL_GET_METADATA _IOR(OCXL_MAGIC, 0x14, struct ocxl_ioctl_metadata)
 
 #endif /* _UAPI_MISC_OCXL_H */
index a8100b9548398e8b102052f2c1418b21ea423825..969eaf140ef0a5d356e2c9a085b5d2a86c215eba 100644 (file)
@@ -89,6 +89,7 @@
 #include <linux/io.h>
 #include <linux/cache.h>
 #include <linux/rodata_test.h>
+#include <linux/jump_label.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -1000,6 +1001,7 @@ static int __ref kernel_init(void *unused)
        /* need to finish all async __init code before freeing the memory */
        async_synchronize_full();
        ftrace_free_init_mem();
+       jump_label_invalidate_init();
        free_initmem();
        mark_readonly();
        system_state = SYSTEM_RUNNING;
index 81e2f6995adb14ea7e99e2a5cf6c27203ed726ef..bf6da59ae0d012b0e1328036fcfb1ddfd08347c5 100644 (file)
@@ -178,6 +178,9 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 static struct dentry *
 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
 {
+       /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
+        * extensions.
+        */
        if (strchr(dentry->d_name.name, '.'))
                return ERR_PTR(-EPERM);
 
index a927e89dad6e9591066c3a87afc497a196ebd887..69c5bccabd229f801537f6137e311d075b79db72 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <linux/list.h>
+#include <linux/mm.h>
 #include <net/strparser.h>
 #include <net/tcp.h>
 
@@ -47,6 +48,7 @@
 struct bpf_stab {
        struct bpf_map map;
        struct sock **sock_map;
+       struct bpf_prog *bpf_tx_msg;
        struct bpf_prog *bpf_parse;
        struct bpf_prog *bpf_verdict;
 };
@@ -62,8 +64,7 @@ struct smap_psock_map_entry {
 
 struct smap_psock {
        struct rcu_head rcu;
-       /* refcnt is used inside sk_callback_lock */
-       u32 refcnt;
+       refcount_t refcnt;
 
        /* datapath variables */
        struct sk_buff_head rxqueue;
@@ -74,7 +75,16 @@ struct smap_psock {
        int save_off;
        struct sk_buff *save_skb;
 
+       /* datapath variables for tx_msg ULP */
+       struct sock *sk_redir;
+       int apply_bytes;
+       int cork_bytes;
+       int sg_size;
+       int eval;
+       struct sk_msg_buff *cork;
+
        struct strparser strp;
+       struct bpf_prog *bpf_tx_msg;
        struct bpf_prog *bpf_parse;
        struct bpf_prog *bpf_verdict;
        struct list_head maps;
@@ -92,6 +102,11 @@ struct smap_psock {
        void (*save_write_space)(struct sock *sk);
 };
 
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
+                           int offset, size_t size, int flags);
+
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 {
        return rcu_dereference_sk_user_data(sk);
@@ -116,27 +131,41 @@ static int bpf_tcp_init(struct sock *sk)
 
        psock->save_close = sk->sk_prot->close;
        psock->sk_proto = sk->sk_prot;
+
+       if (psock->bpf_tx_msg) {
+               tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
+               tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
+       }
+
        sk->sk_prot = &tcp_bpf_proto;
        rcu_read_unlock();
        return 0;
 }
 
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md);
+
 static void bpf_tcp_release(struct sock *sk)
 {
        struct smap_psock *psock;
 
        rcu_read_lock();
        psock = smap_psock_sk(sk);
+       if (unlikely(!psock))
+               goto out;
 
-       if (likely(psock)) {
-               sk->sk_prot = psock->sk_proto;
-               psock->sk_proto = NULL;
+       if (psock->cork) {
+               free_start_sg(psock->sock, psock->cork);
+               kfree(psock->cork);
+               psock->cork = NULL;
        }
+
+       sk->sk_prot = psock->sk_proto;
+       psock->sk_proto = NULL;
+out:
        rcu_read_unlock();
 }
 
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-
 static void bpf_tcp_close(struct sock *sk, long timeout)
 {
        void (*close_fun)(struct sock *sk, long timeout);
@@ -175,6 +204,7 @@ enum __sk_action {
        __SK_DROP = 0,
        __SK_PASS,
        __SK_REDIRECT,
+       __SK_NONE,
 };
 
 static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
@@ -186,10 +216,621 @@ static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
        .release        = bpf_tcp_release,
 };
 
+static int memcopy_from_iter(struct sock *sk,
+                            struct sk_msg_buff *md,
+                            struct iov_iter *from, int bytes)
+{
+       struct scatterlist *sg = md->sg_data;
+       int i = md->sg_curr, rc = -ENOSPC;
+
+       do {
+               int copy;
+               char *to;
+
+               if (md->sg_copybreak >= sg[i].length) {
+                       md->sg_copybreak = 0;
+
+                       if (++i == MAX_SKB_FRAGS)
+                               i = 0;
+
+                       if (i == md->sg_end)
+                               break;
+               }
+
+               copy = sg[i].length - md->sg_copybreak;
+               to = sg_virt(&sg[i]) + md->sg_copybreak;
+               md->sg_copybreak += copy;
+
+               if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
+                       rc = copy_from_iter_nocache(to, copy, from);
+               else
+                       rc = copy_from_iter(to, copy, from);
+
+               if (rc != copy) {
+                       rc = -EFAULT;
+                       goto out;
+               }
+
+               bytes -= copy;
+               if (!bytes)
+                       break;
+
+               md->sg_copybreak = 0;
+               if (++i == MAX_SKB_FRAGS)
+                       i = 0;
+       } while (i != md->sg_end);
+out:
+       md->sg_curr = i;
+       return rc;
+}
+
+static int bpf_tcp_push(struct sock *sk, int apply_bytes,
+                       struct sk_msg_buff *md,
+                       int flags, bool uncharge)
+{
+       bool apply = apply_bytes;
+       struct scatterlist *sg;
+       int offset, ret = 0;
+       struct page *p;
+       size_t size;
+
+       while (1) {
+               sg = md->sg_data + md->sg_start;
+               size = (apply && apply_bytes < sg->length) ?
+                       apply_bytes : sg->length;
+               offset = sg->offset;
+
+               tcp_rate_check_app_limited(sk);
+               p = sg_page(sg);
+retry:
+               ret = do_tcp_sendpages(sk, p, offset, size, flags);
+               if (ret != size) {
+                       if (ret > 0) {
+                               if (apply)
+                                       apply_bytes -= ret;
+                               size -= ret;
+                               offset += ret;
+                               if (uncharge)
+                                       sk_mem_uncharge(sk, ret);
+                               goto retry;
+                       }
+
+                       sg->length = size;
+                       sg->offset = offset;
+                       return ret;
+               }
+
+               if (apply)
+                       apply_bytes -= ret;
+               sg->offset += ret;
+               sg->length -= ret;
+               if (uncharge)
+                       sk_mem_uncharge(sk, ret);
+
+               if (!sg->length) {
+                       put_page(p);
+                       md->sg_start++;
+                       if (md->sg_start == MAX_SKB_FRAGS)
+                               md->sg_start = 0;
+                       memset(sg, 0, sizeof(*sg));
+
+                       if (md->sg_start == md->sg_end)
+                               break;
+               }
+
+               if (apply && !apply_bytes)
+                       break;
+       }
+       return 0;
+}
+
+static inline void bpf_compute_data_pointers_sg(struct sk_msg_buff *md)
+{
+       struct scatterlist *sg = md->sg_data + md->sg_start;
+
+       if (md->sg_copy[md->sg_start]) {
+               md->data = md->data_end = 0;
+       } else {
+               md->data = sg_virt(sg);
+               md->data_end = md->data + sg->length;
+       }
+}
+
+static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+       struct scatterlist *sg = md->sg_data;
+       int i = md->sg_start;
+
+       do {
+               int uncharge = (bytes < sg[i].length) ? bytes : sg[i].length;
+
+               sk_mem_uncharge(sk, uncharge);
+               bytes -= uncharge;
+               if (!bytes)
+                       break;
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       } while (i != md->sg_end);
+}
+
+static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+       struct scatterlist *sg = md->sg_data;
+       int i = md->sg_start, free;
+
+       while (bytes && sg[i].length) {
+               free = sg[i].length;
+               if (bytes < free) {
+                       sg[i].length -= bytes;
+                       sg[i].offset += bytes;
+                       sk_mem_uncharge(sk, bytes);
+                       break;
+               }
+
+               sk_mem_uncharge(sk, sg[i].length);
+               put_page(sg_page(&sg[i]));
+               bytes -= sg[i].length;
+               sg[i].length = 0;
+               sg[i].page_link = 0;
+               sg[i].offset = 0;
+               i++;
+
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       }
+}
+
+static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
+{
+       struct scatterlist *sg = md->sg_data;
+       int i = start, free = 0;
+
+       while (sg[i].length) {
+               free += sg[i].length;
+               sk_mem_uncharge(sk, sg[i].length);
+               put_page(sg_page(&sg[i]));
+               sg[i].length = 0;
+               sg[i].page_link = 0;
+               sg[i].offset = 0;
+               i++;
+
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       }
+
+       return free;
+}
+
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+       int free = free_sg(sk, md->sg_start, md);
+
+       md->sg_start = md->sg_end;
+       return free;
+}
+
+static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+       return free_sg(sk, md->sg_curr, md);
+}
+
+static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
+{
+       return ((_rc == SK_PASS) ?
+              (md->map ? __SK_REDIRECT : __SK_PASS) :
+              __SK_DROP);
+}
+
+static unsigned int smap_do_tx_msg(struct sock *sk,
+                                  struct smap_psock *psock,
+                                  struct sk_msg_buff *md)
+{
+       struct bpf_prog *prog;
+       unsigned int rc, _rc;
+
+       preempt_disable();
+       rcu_read_lock();
+
+       /* If the policy was removed mid-send then default to 'accept' */
+       prog = READ_ONCE(psock->bpf_tx_msg);
+       if (unlikely(!prog)) {
+               _rc = SK_PASS;
+               goto verdict;
+       }
+
+       bpf_compute_data_pointers_sg(md);
+       rc = (*prog->bpf_func)(md, prog->insnsi);
+       psock->apply_bytes = md->apply_bytes;
+
+       /* Moving return codes from UAPI namespace into internal namespace */
+       _rc = bpf_map_msg_verdict(rc, md);
+
+       /* The psock has a refcount on the sock but not on the map and because
+        * we need to drop rcu read lock here its possible the map could be
+        * removed between here and when we need it to execute the sock
+        * redirect. So do the map lookup now for future use.
+        */
+       if (_rc == __SK_REDIRECT) {
+               if (psock->sk_redir)
+                       sock_put(psock->sk_redir);
+               psock->sk_redir = do_msg_redirect_map(md);
+               if (!psock->sk_redir) {
+                       _rc = __SK_DROP;
+                       goto verdict;
+               }
+               sock_hold(psock->sk_redir);
+       }
+verdict:
+       rcu_read_unlock();
+       preempt_enable();
+
+       return _rc;
+}
+
+static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
+                                      struct sk_msg_buff *md,
+                                      int flags)
+{
+       struct smap_psock *psock;
+       struct scatterlist *sg;
+       int i, err, free = 0;
+
+       sg = md->sg_data;
+
+       rcu_read_lock();
+       psock = smap_psock_sk(sk);
+       if (unlikely(!psock))
+               goto out_rcu;
+
+       if (!refcount_inc_not_zero(&psock->refcnt))
+               goto out_rcu;
+
+       rcu_read_unlock();
+       lock_sock(sk);
+       err = bpf_tcp_push(sk, send, md, flags, false);
+       release_sock(sk);
+       smap_release_sock(psock, sk);
+       if (unlikely(err))
+               goto out;
+       return 0;
+out_rcu:
+       rcu_read_unlock();
+out:
+       i = md->sg_start;
+       while (sg[i].length) {
+               free += sg[i].length;
+               put_page(sg_page(&sg[i]));
+               sg[i].length = 0;
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       }
+       return free;
+}
+
+static inline void bpf_md_init(struct smap_psock *psock)
+{
+       if (!psock->apply_bytes) {
+               psock->eval =  __SK_NONE;
+               if (psock->sk_redir) {
+                       sock_put(psock->sk_redir);
+                       psock->sk_redir = NULL;
+               }
+       }
+}
+
+static void apply_bytes_dec(struct smap_psock *psock, int i)
+{
+       if (psock->apply_bytes) {
+               if (psock->apply_bytes < i)
+                       psock->apply_bytes = 0;
+               else
+                       psock->apply_bytes -= i;
+       }
+}
+
+static int bpf_exec_tx_verdict(struct smap_psock *psock,
+                              struct sk_msg_buff *m,
+                              struct sock *sk,
+                              int *copied, int flags)
+{
+       bool cork = false, enospc = (m->sg_start == m->sg_end);
+       struct sock *redir;
+       int err = 0;
+       int send;
+
+more_data:
+       if (psock->eval == __SK_NONE)
+               psock->eval = smap_do_tx_msg(sk, psock, m);
+
+       if (m->cork_bytes &&
+           m->cork_bytes > psock->sg_size && !enospc) {
+               psock->cork_bytes = m->cork_bytes - psock->sg_size;
+               if (!psock->cork) {
+                       psock->cork = kcalloc(1,
+                                       sizeof(struct sk_msg_buff),
+                                       GFP_ATOMIC | __GFP_NOWARN);
+
+                       if (!psock->cork) {
+                               err = -ENOMEM;
+                               goto out_err;
+                       }
+               }
+               memcpy(psock->cork, m, sizeof(*m));
+               goto out_err;
+       }
+
+       send = psock->sg_size;
+       if (psock->apply_bytes && psock->apply_bytes < send)
+               send = psock->apply_bytes;
+
+       switch (psock->eval) {
+       case __SK_PASS:
+               err = bpf_tcp_push(sk, send, m, flags, true);
+               if (unlikely(err)) {
+                       *copied -= free_start_sg(sk, m);
+                       break;
+               }
+
+               apply_bytes_dec(psock, send);
+               psock->sg_size -= send;
+               break;
+       case __SK_REDIRECT:
+               redir = psock->sk_redir;
+               apply_bytes_dec(psock, send);
+
+               if (psock->cork) {
+                       cork = true;
+                       psock->cork = NULL;
+               }
+
+               return_mem_sg(sk, send, m);
+               release_sock(sk);
+
+               err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
+               lock_sock(sk);
+
+               if (cork) {
+                       free_start_sg(sk, m);
+                       kfree(m);
+                       m = NULL;
+               }
+               if (unlikely(err))
+                       *copied -= err;
+               else
+                       psock->sg_size -= send;
+               break;
+       case __SK_DROP:
+       default:
+               free_bytes_sg(sk, send, m);
+               apply_bytes_dec(psock, send);
+               *copied -= send;
+               psock->sg_size -= send;
+               err = -EACCES;
+               break;
+       }
+
+       if (likely(!err)) {
+               bpf_md_init(psock);
+               if (m &&
+                   m->sg_data[m->sg_start].page_link &&
+                   m->sg_data[m->sg_start].length)
+                       goto more_data;
+       }
+
+out_err:
+       return err;
+}
+
+static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+       int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
+       struct sk_msg_buff md = {0};
+       unsigned int sg_copy = 0;
+       struct smap_psock *psock;
+       int copied = 0, err = 0;
+       struct scatterlist *sg;
+       long timeo;
+
+       /* Its possible a sock event or user removed the psock _but_ the ops
+        * have not been reprogrammed yet so we get here. In this case fallback
+        * to tcp_sendmsg. Note this only works because we _only_ ever allow
+        * a single ULP there is no hierarchy here.
+        */
+       rcu_read_lock();
+       psock = smap_psock_sk(sk);
+       if (unlikely(!psock)) {
+               rcu_read_unlock();
+               return tcp_sendmsg(sk, msg, size);
+       }
+
+       /* Increment the psock refcnt to ensure its not released while sending a
+        * message. Required because sk lookup and bpf programs are used in
+        * separate rcu critical sections. Its OK if we lose the map entry
+        * but we can't lose the sock reference.
+        */
+       if (!refcount_inc_not_zero(&psock->refcnt)) {
+               rcu_read_unlock();
+               return tcp_sendmsg(sk, msg, size);
+       }
+
+       sg = md.sg_data;
+       sg_init_table(sg, MAX_SKB_FRAGS);
+       rcu_read_unlock();
+
+       lock_sock(sk);
+       timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+       while (msg_data_left(msg)) {
+               struct sk_msg_buff *m;
+               bool enospc = false;
+               int copy;
+
+               if (sk->sk_err) {
+                       err = sk->sk_err;
+                       goto out_err;
+               }
+
+               copy = msg_data_left(msg);
+               if (!sk_stream_memory_free(sk))
+                       goto wait_for_sndbuf;
+
+               m = psock->cork_bytes ? psock->cork : &md;
+               m->sg_curr = m->sg_copybreak ? m->sg_curr : m->sg_end;
+               err = sk_alloc_sg(sk, copy, m->sg_data,
+                                 m->sg_start, &m->sg_end, &sg_copy,
+                                 m->sg_end - 1);
+               if (err) {
+                       if (err != -ENOSPC)
+                               goto wait_for_memory;
+                       enospc = true;
+                       copy = sg_copy;
+               }
+
+               err = memcopy_from_iter(sk, m, &msg->msg_iter, copy);
+               if (err < 0) {
+                       free_curr_sg(sk, m);
+                       goto out_err;
+               }
+
+               psock->sg_size += copy;
+               copied += copy;
+               sg_copy = 0;
+
+               /* When bytes are being corked skip running BPF program and
+                * applying verdict unless there is no more buffer space. In
+                * the ENOSPC case simply run BPF prorgram with currently
+                * accumulated data. We don't have much choice at this point
+                * we could try extending the page frags or chaining complex
+                * frags but even in these cases _eventually_ we will hit an
+                * OOM scenario. More complex recovery schemes may be
+                * implemented in the future, but BPF programs must handle
+                * the case where apply_cork requests are not honored. The
+                * canonical method to verify this is to check data length.
+                */
+               if (psock->cork_bytes) {
+                       if (copy > psock->cork_bytes)
+                               psock->cork_bytes = 0;
+                       else
+                               psock->cork_bytes -= copy;
+
+                       if (psock->cork_bytes && !enospc)
+                               goto out_cork;
+
+                       /* All cork bytes accounted for re-run filter */
+                       psock->eval = __SK_NONE;
+                       psock->cork_bytes = 0;
+               }
+
+               err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
+               if (unlikely(err < 0))
+                       goto out_err;
+               continue;
+wait_for_sndbuf:
+               set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+               err = sk_stream_wait_memory(sk, &timeo);
+               if (err)
+                       goto out_err;
+       }
+out_err:
+       if (err < 0)
+               err = sk_stream_error(sk, msg->msg_flags, err);
+out_cork:
+       release_sock(sk);
+       smap_release_sock(psock, sk);
+       return copied ? copied : err;
+}
+
+static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
+                           int offset, size_t size, int flags)
+{
+       struct sk_msg_buff md = {0}, *m = NULL;
+       int err = 0, copied = 0;
+       struct smap_psock *psock;
+       struct scatterlist *sg;
+       bool enospc = false;
+
+       rcu_read_lock();
+       psock = smap_psock_sk(sk);
+       if (unlikely(!psock))
+               goto accept;
+
+       if (!refcount_inc_not_zero(&psock->refcnt))
+               goto accept;
+       rcu_read_unlock();
+
+       lock_sock(sk);
+
+       if (psock->cork_bytes)
+               m = psock->cork;
+       else
+               m = &md;
+
+       /* Catch case where ring is full and sendpage is stalled. */
+       if (unlikely(m->sg_end == m->sg_start &&
+           m->sg_data[m->sg_end].length))
+               goto out_err;
+
+       psock->sg_size += size;
+       sg = &m->sg_data[m->sg_end];
+       sg_set_page(sg, page, size, offset);
+       get_page(page);
+       m->sg_copy[m->sg_end] = true;
+       sk_mem_charge(sk, size);
+       m->sg_end++;
+       copied = size;
+
+       if (m->sg_end == MAX_SKB_FRAGS)
+               m->sg_end = 0;
+
+       if (m->sg_end == m->sg_start)
+               enospc = true;
+
+       if (psock->cork_bytes) {
+               if (size > psock->cork_bytes)
+                       psock->cork_bytes = 0;
+               else
+                       psock->cork_bytes -= size;
+
+               if (psock->cork_bytes && !enospc)
+                       goto out_err;
+
+               /* All cork bytes accounted for re-run filter */
+               psock->eval = __SK_NONE;
+               psock->cork_bytes = 0;
+       }
+
+       err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
+out_err:
+       release_sock(sk);
+       smap_release_sock(psock, sk);
+       return copied ? copied : err;
+accept:
+       rcu_read_unlock();
+       return tcp_sendpage(sk, page, offset, size, flags);
+}
+
+static void bpf_tcp_msg_add(struct smap_psock *psock,
+                           struct sock *sk,
+                           struct bpf_prog *tx_msg)
+{
+       struct bpf_prog *orig_tx_msg;
+
+       orig_tx_msg = xchg(&psock->bpf_tx_msg, tx_msg);
+       if (orig_tx_msg)
+               bpf_prog_put(orig_tx_msg);
+}
+
 static int bpf_tcp_ulp_register(void)
 {
        tcp_bpf_proto = tcp_prot;
        tcp_bpf_proto.close = bpf_tcp_close;
+       /* Once BPF TX ULP is registered it is never unregistered. It
+        * will be in the ULP list for the lifetime of the system. Doing
+        * duplicate registers is not a problem.
+        */
        return tcp_register_ulp(&bpf_tcp_ulp_ops);
 }
 
@@ -373,15 +1014,13 @@ static void smap_destroy_psock(struct rcu_head *rcu)
 
 static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
 {
-       psock->refcnt--;
-       if (psock->refcnt)
-               return;
-
-       tcp_cleanup_ulp(sock);
-       smap_stop_sock(psock, sock);
-       clear_bit(SMAP_TX_RUNNING, &psock->state);
-       rcu_assign_sk_user_data(sock, NULL);
-       call_rcu_sched(&psock->rcu, smap_destroy_psock);
+       if (refcount_dec_and_test(&psock->refcnt)) {
+               tcp_cleanup_ulp(sock);
+               smap_stop_sock(psock, sock);
+               clear_bit(SMAP_TX_RUNNING, &psock->state);
+               rcu_assign_sk_user_data(sock, NULL);
+               call_rcu_sched(&psock->rcu, smap_destroy_psock);
+       }
 }
 
 static int smap_parse_func_strparser(struct strparser *strp,
@@ -415,7 +1054,6 @@ static int smap_parse_func_strparser(struct strparser *strp,
        return rc;
 }
 
-
 static int smap_read_sock_done(struct strparser *strp, int err)
 {
        return err;
@@ -485,12 +1123,22 @@ static void smap_gc_work(struct work_struct *w)
                bpf_prog_put(psock->bpf_parse);
        if (psock->bpf_verdict)
                bpf_prog_put(psock->bpf_verdict);
+       if (psock->bpf_tx_msg)
+               bpf_prog_put(psock->bpf_tx_msg);
+
+       if (psock->cork) {
+               free_start_sg(psock->sock, psock->cork);
+               kfree(psock->cork);
+       }
 
        list_for_each_entry_safe(e, tmp, &psock->maps, list) {
                list_del(&e->list);
                kfree(e);
        }
 
+       if (psock->sk_redir)
+               sock_put(psock->sk_redir);
+
        sock_put(psock->sock);
        kfree(psock);
 }
@@ -506,12 +1154,13 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
        if (!psock)
                return ERR_PTR(-ENOMEM);
 
+       psock->eval =  __SK_NONE;
        psock->sock = sock;
        skb_queue_head_init(&psock->rxqueue);
        INIT_WORK(&psock->tx_work, smap_tx_work);
        INIT_WORK(&psock->gc_work, smap_gc_work);
        INIT_LIST_HEAD(&psock->maps);
-       psock->refcnt = 1;
+       refcount_set(&psock->refcnt, 1);
 
        rcu_assign_sk_user_data(sock, psock);
        sock_hold(sock);
@@ -714,10 +1363,11 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 {
        struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
        struct smap_psock_map_entry *e = NULL;
-       struct bpf_prog *verdict, *parse;
+       struct bpf_prog *verdict, *parse, *tx_msg;
        struct sock *osock, *sock;
        struct smap_psock *psock;
        u32 i = *(u32 *)key;
+       bool new = false;
        int err;
 
        if (unlikely(flags > BPF_EXIST))
@@ -740,6 +1390,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
         */
        verdict = READ_ONCE(stab->bpf_verdict);
        parse = READ_ONCE(stab->bpf_parse);
+       tx_msg = READ_ONCE(stab->bpf_tx_msg);
 
        if (parse && verdict) {
                /* bpf prog refcnt may be zero if a concurrent attach operation
@@ -758,6 +1409,17 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                }
        }
 
+       if (tx_msg) {
+               tx_msg = bpf_prog_inc_not_zero(stab->bpf_tx_msg);
+               if (IS_ERR(tx_msg)) {
+                       if (verdict)
+                               bpf_prog_put(verdict);
+                       if (parse)
+                               bpf_prog_put(parse);
+                       return PTR_ERR(tx_msg);
+               }
+       }
+
        write_lock_bh(&sock->sk_callback_lock);
        psock = smap_psock_sk(sock);
 
@@ -772,7 +1434,14 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                        err = -EBUSY;
                        goto out_progs;
                }
-               psock->refcnt++;
+               if (READ_ONCE(psock->bpf_tx_msg) && tx_msg) {
+                       err = -EBUSY;
+                       goto out_progs;
+               }
+               if (!refcount_inc_not_zero(&psock->refcnt)) {
+                       err = -EAGAIN;
+                       goto out_progs;
+               }
        } else {
                psock = smap_init_psock(sock, stab);
                if (IS_ERR(psock)) {
@@ -780,11 +1449,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                        goto out_progs;
                }
 
-               err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
-               if (err)
-                       goto out_progs;
-
                set_bit(SMAP_TX_RUNNING, &psock->state);
+               new = true;
        }
 
        e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
@@ -797,6 +1463,14 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
        /* 3. At this point we have a reference to a valid psock that is
         * running. Attach any BPF programs needed.
         */
+       if (tx_msg)
+               bpf_tcp_msg_add(psock, sock, tx_msg);
+       if (new) {
+               err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
+               if (err)
+                       goto out_free;
+       }
+
        if (parse && verdict && !psock->strp_enabled) {
                err = smap_init_sock(psock, sock);
                if (err)
@@ -818,8 +1492,6 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                struct smap_psock *opsock = smap_psock_sk(osock);
 
                write_lock_bh(&osock->sk_callback_lock);
-               if (osock != sock && parse)
-                       smap_stop_sock(opsock, osock);
                smap_list_remove(opsock, &stab->sock_map[i]);
                smap_release_sock(opsock, osock);
                write_unlock_bh(&osock->sk_callback_lock);
@@ -832,6 +1504,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
                bpf_prog_put(verdict);
        if (parse)
                bpf_prog_put(parse);
+       if (tx_msg)
+               bpf_prog_put(tx_msg);
        write_unlock_bh(&sock->sk_callback_lock);
        kfree(e);
        return err;
@@ -846,6 +1520,9 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
                return -EINVAL;
 
        switch (type) {
+       case BPF_SK_MSG_VERDICT:
+               orig = xchg(&stab->bpf_tx_msg, prog);
+               break;
        case BPF_SK_SKB_STREAM_PARSER:
                orig = xchg(&stab->bpf_parse, prog);
                break;
@@ -907,6 +1584,10 @@ static void sock_map_release(struct bpf_map *map, struct file *map_file)
        orig = xchg(&stab->bpf_verdict, NULL);
        if (orig)
                bpf_prog_put(orig);
+
+       orig = xchg(&stab->bpf_tx_msg, NULL);
+       if (orig)
+               bpf_prog_put(orig);
 }
 
 const struct bpf_map_ops sock_map_ops = {
index b0ecf43f5894d12de9a20f4399e79e0b6d2979b8..57eeb1234b67e7dabd555e9562b0a0b59cd57abb 100644 (file)
@@ -9,16 +9,19 @@
 #include <linux/filter.h>
 #include <linux/stacktrace.h>
 #include <linux/perf_event.h>
+#include <linux/elf.h>
+#include <linux/pagemap.h>
 #include "percpu_freelist.h"
 
-#define STACK_CREATE_FLAG_MASK \
-       (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+#define STACK_CREATE_FLAG_MASK                                 \
+       (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |        \
+        BPF_F_STACK_BUILD_ID)
 
 struct stack_map_bucket {
        struct pcpu_freelist_node fnode;
        u32 hash;
        u32 nr;
-       u64 ip[];
+       u64 data[];
 };
 
 struct bpf_stack_map {
@@ -29,6 +32,17 @@ struct bpf_stack_map {
        struct stack_map_bucket *buckets[];
 };
 
+static inline bool stack_map_use_build_id(struct bpf_map *map)
+{
+       return (map->map_flags & BPF_F_STACK_BUILD_ID);
+}
+
+static inline int stack_map_data_size(struct bpf_map *map)
+{
+       return stack_map_use_build_id(map) ?
+               sizeof(struct bpf_stack_build_id) : sizeof(u64);
+}
+
 static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
 {
        u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
@@ -68,8 +82,16 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 
        /* check sanity of attributes */
        if (attr->max_entries == 0 || attr->key_size != 4 ||
-           value_size < 8 || value_size % 8 ||
-           value_size / 8 > sysctl_perf_event_max_stack)
+           value_size < 8 || value_size % 8)
+               return ERR_PTR(-EINVAL);
+
+       BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
+       if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
+               if (value_size % sizeof(struct bpf_stack_build_id) ||
+                   value_size / sizeof(struct bpf_stack_build_id)
+                   > sysctl_perf_event_max_stack)
+                       return ERR_PTR(-EINVAL);
+       } else if (value_size / 8 > sysctl_perf_event_max_stack)
                return ERR_PTR(-EINVAL);
 
        /* hash table size must be power of 2 */
@@ -114,13 +136,184 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
        return ERR_PTR(err);
 }
 
+#define BPF_BUILD_ID 3
+/*
+ * Parse build id from the note segment. This logic can be shared between
+ * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
+ * identical.
+ */
+static inline int stack_map_parse_build_id(void *page_addr,
+                                          unsigned char *build_id,
+                                          void *note_start,
+                                          Elf32_Word note_size)
+{
+       Elf32_Word note_offs = 0, new_offs;
+
+       /* check for overflow */
+       if (note_start < page_addr || note_start + note_size < note_start)
+               return -EINVAL;
+
+       /* only supports note that fits in the first page */
+       if (note_start + note_size > page_addr + PAGE_SIZE)
+               return -EINVAL;
+
+       while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+               Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+               if (nhdr->n_type == BPF_BUILD_ID &&
+                   nhdr->n_namesz == sizeof("GNU") &&
+                   nhdr->n_descsz == BPF_BUILD_ID_SIZE) {
+                       memcpy(build_id,
+                              note_start + note_offs +
+                              ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
+                              BPF_BUILD_ID_SIZE);
+                       return 0;
+               }
+               new_offs = note_offs + sizeof(Elf32_Nhdr) +
+                       ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+               if (new_offs <= note_offs)  /* overflow */
+                       break;
+               note_offs = new_offs;
+       }
+       return -EINVAL;
+}
+
+/* Parse build ID from 32-bit ELF */
+static int stack_map_get_build_id_32(void *page_addr,
+                                    unsigned char *build_id)
+{
+       Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
+       Elf32_Phdr *phdr;
+       int i;
+
+       /* only supports phdr that fits in one page */
+       if (ehdr->e_phnum >
+           (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+               return -EINVAL;
+
+       phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
+
+       for (i = 0; i < ehdr->e_phnum; ++i)
+               if (phdr[i].p_type == PT_NOTE)
+                       return stack_map_parse_build_id(page_addr, build_id,
+                                       page_addr + phdr[i].p_offset,
+                                       phdr[i].p_filesz);
+       return -EINVAL;
+}
+
+/* Parse build ID from 64-bit ELF */
+static int stack_map_get_build_id_64(void *page_addr,
+                                    unsigned char *build_id)
+{
+       Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
+       Elf64_Phdr *phdr;
+       int i;
+
+       /* only supports phdr that fits in one page */
+       if (ehdr->e_phnum >
+           (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+               return -EINVAL;
+
+       phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
+
+       for (i = 0; i < ehdr->e_phnum; ++i)
+               if (phdr[i].p_type == PT_NOTE)
+                       return stack_map_parse_build_id(page_addr, build_id,
+                                       page_addr + phdr[i].p_offset,
+                                       phdr[i].p_filesz);
+       return -EINVAL;
+}
+
+/* Parse build ID of ELF file mapped to vma */
+static int stack_map_get_build_id(struct vm_area_struct *vma,
+                                 unsigned char *build_id)
+{
+       Elf32_Ehdr *ehdr;
+       struct page *page;
+       void *page_addr;
+       int ret;
+
+       /* only works for page backed storage  */
+       if (!vma->vm_file)
+               return -EINVAL;
+
+       page = find_get_page(vma->vm_file->f_mapping, 0);
+       if (!page)
+               return -EFAULT; /* page not mapped */
+
+       ret = -EINVAL;
+       page_addr = page_address(page);
+       ehdr = (Elf32_Ehdr *)page_addr;
+
+       /* compare magic x7f "ELF" */
+       if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+               goto out;
+
+       /* only support executable file and shared object file */
+       if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
+               goto out;
+
+       if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
+               ret = stack_map_get_build_id_32(page_addr, build_id);
+       else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
+               ret = stack_map_get_build_id_64(page_addr, build_id);
+out:
+       put_page(page);
+       return ret;
+}
+
+static void stack_map_get_build_id_offset(struct bpf_map *map,
+                                         struct stack_map_bucket *bucket,
+                                         u64 *ips, u32 trace_nr, bool user)
+{
+       int i;
+       struct vm_area_struct *vma;
+       struct bpf_stack_build_id *id_offs;
+
+       bucket->nr = trace_nr;
+       id_offs = (struct bpf_stack_build_id *)bucket->data;
+
+       /*
+        * We cannot do up_read() in nmi context, so build_id lookup is
+        * only supported for non-nmi events. If at some point, it is
+        * possible to run find_vma() without taking the semaphore, we
+        * would like to allow build_id lookup in nmi context.
+        *
+        * Same fallback is used for kernel stack (!user) on a stackmap
+        * with build_id.
+        */
+       if (!user || !current || !current->mm || in_nmi() ||
+           down_read_trylock(&current->mm->mmap_sem) == 0) {
+               /* cannot access current->mm, fall back to ips */
+               for (i = 0; i < trace_nr; i++) {
+                       id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+                       id_offs[i].ip = ips[i];
+               }
+               return;
+       }
+
+       for (i = 0; i < trace_nr; i++) {
+               vma = find_vma(current->mm, ips[i]);
+               if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
+                       /* per entry fall back to ips */
+                       id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+                       id_offs[i].ip = ips[i];
+                       continue;
+               }
+               id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
+                       - vma->vm_start;
+               id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
+       }
+       up_read(&current->mm->mmap_sem);
+}
+
 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
           u64, flags)
 {
        struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
        struct perf_callchain_entry *trace;
        struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
-       u32 max_depth = map->value_size / 8;
+       u32 max_depth = map->value_size / stack_map_data_size(map);
        /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
        u32 init_nr = sysctl_perf_event_max_stack - max_depth;
        u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
@@ -128,6 +321,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
        bool user = flags & BPF_F_USER_STACK;
        bool kernel = !user;
        u64 *ips;
+       bool hash_matches;
 
        if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
                               BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
@@ -156,24 +350,43 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
        id = hash & (smap->n_buckets - 1);
        bucket = READ_ONCE(smap->buckets[id]);
 
-       if (bucket && bucket->hash == hash) {
-               if (flags & BPF_F_FAST_STACK_CMP)
+       hash_matches = bucket && bucket->hash == hash;
+       /* fast cmp */
+       if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
+               return id;
+
+       if (stack_map_use_build_id(map)) {
+               /* for build_id+offset, pop a bucket before slow cmp */
+               new_bucket = (struct stack_map_bucket *)
+                       pcpu_freelist_pop(&smap->freelist);
+               if (unlikely(!new_bucket))
+                       return -ENOMEM;
+               stack_map_get_build_id_offset(map, new_bucket, ips,
+                                             trace_nr, user);
+               trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
+               if (hash_matches && bucket->nr == trace_nr &&
+                   memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
+                       pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
                        return id;
-               if (bucket->nr == trace_nr &&
-                   memcmp(bucket->ip, ips, trace_len) == 0)
+               }
+               if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
+                       pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
+                       return -EEXIST;
+               }
+       } else {
+               if (hash_matches && bucket->nr == trace_nr &&
+                   memcmp(bucket->data, ips, trace_len) == 0)
                        return id;
+               if (bucket && !(flags & BPF_F_REUSE_STACKID))
+                       return -EEXIST;
+
+               new_bucket = (struct stack_map_bucket *)
+                       pcpu_freelist_pop(&smap->freelist);
+               if (unlikely(!new_bucket))
+                       return -ENOMEM;
+               memcpy(new_bucket->data, ips, trace_len);
        }
 
-       /* this call stack is not in the map, try to add it */
-       if (bucket && !(flags & BPF_F_REUSE_STACKID))
-               return -EEXIST;
-
-       new_bucket = (struct stack_map_bucket *)
-               pcpu_freelist_pop(&smap->freelist);
-       if (unlikely(!new_bucket))
-               return -ENOMEM;
-
-       memcpy(new_bucket->ip, ips, trace_len);
        new_bucket->hash = hash;
        new_bucket->nr = trace_nr;
 
@@ -212,8 +425,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
        if (!bucket)
                return -ENOENT;
 
-       trace_len = bucket->nr * sizeof(u64);
-       memcpy(value, bucket->ip, trace_len);
+       trace_len = bucket->nr * stack_map_data_size(map);
+       memcpy(value, bucket->data, trace_len);
        memset(value + trace_len, 0, map->value_size - trace_len);
 
        old_bucket = xchg(&smap->buckets[id], bucket);
index e24aa3241387de91483a89160e50ee41deede775..dd172ee16716a6ba0e05dbaa8e8e907273d53133 100644 (file)
@@ -1315,7 +1315,8 @@ static int bpf_obj_get(const union bpf_attr *attr)
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
-static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
+static int sockmap_get_from_fd(const union bpf_attr *attr,
+                              int type, bool attach)
 {
        struct bpf_prog *prog = NULL;
        int ufd = attr->target_fd;
@@ -1329,8 +1330,7 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
                return PTR_ERR(map);
 
        if (attach) {
-               prog = bpf_prog_get_type(attr->attach_bpf_fd,
-                                        BPF_PROG_TYPE_SK_SKB);
+               prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
                if (IS_ERR(prog)) {
                        fdput(f);
                        return PTR_ERR(prog);
@@ -1382,9 +1382,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
        case BPF_CGROUP_DEVICE:
                ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
                break;
+       case BPF_SK_MSG_VERDICT:
+               return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
        case BPF_SK_SKB_STREAM_PARSER:
        case BPF_SK_SKB_STREAM_VERDICT:
-               return sockmap_get_from_fd(attr, true);
+               return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
        default:
                return -EINVAL;
        }
@@ -1437,9 +1439,11 @@ static int bpf_prog_detach(const union bpf_attr *attr)
        case BPF_CGROUP_DEVICE:
                ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
                break;
+       case BPF_SK_MSG_VERDICT:
+               return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
        case BPF_SK_SKB_STREAM_PARSER:
        case BPF_SK_SKB_STREAM_VERDICT:
-               return sockmap_get_from_fd(attr, false);
+               return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
        default:
                return -EINVAL;
        }
@@ -1845,7 +1849,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
        union bpf_attr attr = {};
        int err;
 
-       if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
+       if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
                return -EPERM;
 
        err = check_uarg_tail_zero(uattr, sizeof(attr), size);
index 5fb69a85d9675dcf8a077d20f5314ee78949e884..e9f7c20691c1e685ee3feda6e684918118cd6c57 100644 (file)
@@ -508,10 +508,6 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 static const int caller_saved[CALLER_SAVED_REGS] = {
        BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
-#define CALLEE_SAVED_REGS 5
-static const int callee_saved[CALLEE_SAVED_REGS] = {
-       BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
-};
 
 static void __mark_reg_not_init(struct bpf_reg_state *reg);
 
@@ -1252,6 +1248,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
        case BPF_PROG_TYPE_XDP:
        case BPF_PROG_TYPE_LWT_XMIT:
        case BPF_PROG_TYPE_SK_SKB:
+       case BPF_PROG_TYPE_SK_MSG:
                if (meta)
                        return meta->pkt_access;
 
@@ -1356,6 +1353,13 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
        return reg->type == PTR_TO_CTX;
 }
 
+static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
+{
+       const struct bpf_reg_state *reg = cur_regs(env) + regno;
+
+       return type_is_pkt_pointer(reg->type);
+}
+
 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
                                   const struct bpf_reg_state *reg,
                                   int off, int size, bool strict)
@@ -1416,10 +1420,10 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
 }
 
 static int check_ptr_alignment(struct bpf_verifier_env *env,
-                              const struct bpf_reg_state *reg,
-                              int off, int size)
+                              const struct bpf_reg_state *reg, int off,
+                              int size, bool strict_alignment_once)
 {
-       bool strict = env->strict_alignment;
+       bool strict = env->strict_alignment || strict_alignment_once;
        const char *pointer_desc = "";
 
        switch (reg->type) {
@@ -1576,9 +1580,9 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
  * if t==write && value_regno==-1, some unknown value is stored into memory
  * if t==read && value_regno==-1, don't care what we read from memory
  */
-static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off,
-                           int bpf_size, enum bpf_access_type t,
-                           int value_regno)
+static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
+                           int off, int bpf_size, enum bpf_access_type t,
+                           int value_regno, bool strict_alignment_once)
 {
        struct bpf_reg_state *regs = cur_regs(env);
        struct bpf_reg_state *reg = regs + regno;
@@ -1590,7 +1594,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                return size;
 
        /* alignment checks will add in reg->off themselves */
-       err = check_ptr_alignment(env, reg, off, size);
+       err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
        if (err)
                return err;
 
@@ -1735,21 +1739,23 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
                return -EACCES;
        }
 
-       if (is_ctx_reg(env, insn->dst_reg)) {
-               verbose(env, "BPF_XADD stores into R%d context is not allowed\n",
-                       insn->dst_reg);
+       if (is_ctx_reg(env, insn->dst_reg) ||
+           is_pkt_reg(env, insn->dst_reg)) {
+               verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
+                       insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
+                       "context" : "packet");
                return -EACCES;
        }
 
        /* check whether atomic_add can read the memory */
        err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-                              BPF_SIZE(insn->code), BPF_READ, -1);
+                              BPF_SIZE(insn->code), BPF_READ, -1, true);
        if (err)
                return err;
 
        /* check whether atomic_add can write into the same memory */
        return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
-                               BPF_SIZE(insn->code), BPF_WRITE, -1);
+                               BPF_SIZE(insn->code), BPF_WRITE, -1, true);
 }
 
 /* when register 'regno' is passed into function that will read 'access_size'
@@ -2066,7 +2072,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
        case BPF_MAP_TYPE_SOCKMAP:
                if (func_id != BPF_FUNC_sk_redirect_map &&
                    func_id != BPF_FUNC_sock_map_update &&
-                   func_id != BPF_FUNC_map_delete_elem)
+                   func_id != BPF_FUNC_map_delete_elem &&
+                   func_id != BPF_FUNC_msg_redirect_map)
                        goto error;
                break;
        default:
@@ -2104,6 +2111,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
                        goto error;
                break;
        case BPF_FUNC_sk_redirect_map:
+       case BPF_FUNC_msg_redirect_map:
                if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
                        goto error;
                break;
@@ -2388,7 +2396,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
         * is inferred from register state.
         */
        for (i = 0; i < meta.access_size; i++) {
-               err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1);
+               err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
+                                      BPF_WRITE, -1, false);
                if (err)
                        return err;
        }
@@ -4632,7 +4641,7 @@ static int do_check(struct bpf_verifier_env *env)
                         */
                        err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
                                               BPF_SIZE(insn->code), BPF_READ,
-                                              insn->dst_reg);
+                                              insn->dst_reg, false);
                        if (err)
                                return err;
 
@@ -4684,7 +4693,7 @@ static int do_check(struct bpf_verifier_env *env)
                        /* check that memory (dst_reg + off) is writeable */
                        err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
                                               BPF_SIZE(insn->code), BPF_WRITE,
-                                              insn->src_reg);
+                                              insn->src_reg, false);
                        if (err)
                                return err;
 
@@ -4719,7 +4728,7 @@ static int do_check(struct bpf_verifier_env *env)
                        /* check that memory (dst_reg + off) is writeable */
                        err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
                                               BPF_SIZE(insn->code), BPF_WRITE,
-                                              -1);
+                                              -1, false);
                        if (err)
                                return err;
 
index 8cda3bc3ae22841f9c3a9478ae65cadba6b65f66..4bfb2908ec157204692424bc1b1a32a7ef185b29 100644 (file)
@@ -3183,6 +3183,16 @@ static int cgroup_enable_threaded(struct cgroup *cgrp)
        if (cgroup_is_threaded(cgrp))
                return 0;
 
+       /*
+        * If @cgroup is populated or has domain controllers enabled, it
+        * can't be switched.  While the below cgroup_can_be_thread_root()
+        * test can catch the same conditions, that's only when @parent is
+        * not mixable, so let's check it explicitly.
+        */
+       if (cgroup_is_populated(cgrp) ||
+           cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
+               return -EOPNOTSUPP;
+
        /* we're joining the parent's domain, ensure its validity */
        if (!cgroup_is_valid_domain(dom_cgrp) ||
            !cgroup_can_be_thread_root(dom_cgrp))
index 3247fe761f6018ef61ecb37124720f3fa092d441..3f5fa8902e7dc72096ab1c8da3f3638ac6dc32db 100644 (file)
@@ -488,25 +488,6 @@ get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat)
 }
 EXPORT_SYMBOL_GPL(get_compat_sigset);
 
-int
-put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
-                 unsigned int size)
-{
-       /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */
-#ifdef __BIG_ENDIAN
-       compat_sigset_t v;
-       switch (_NSIG_WORDS) {
-       case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
-       case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2];
-       case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1];
-       case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0];
-       }
-       return copy_to_user(compat, &v, size) ? -EFAULT : 0;
-#else
-       return copy_to_user(compat, set, size) ? -EFAULT : 0;
-#endif
-}
-
 #ifdef CONFIG_NUMA
 COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
                       compat_uptr_t __user *, pages32,
index 96db9ae5d5af751edd61189407aa064d591b54dd..4b838470fac42ca8b11d1535d785569b0a6d6715 100644 (file)
@@ -2246,7 +2246,7 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
                        struct perf_event_context *task_ctx,
                        enum event_type_t event_type)
 {
-       enum event_type_t ctx_event_type = event_type & EVENT_ALL;
+       enum event_type_t ctx_event_type;
        bool cpu_event = !!(event_type & EVENT_CPU);
 
        /*
@@ -2256,6 +2256,8 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
        if (event_type & EVENT_PINNED)
                event_type |= EVENT_FLEXIBLE;
 
+       ctx_event_type = event_type & EVENT_ALL;
+
        perf_pmu_disable(cpuctx->ctx.pmu);
        if (task_ctx)
                task_ctx_sched_out(cpuctx, task_ctx, event_type);
index a17fdb63dc3e470955dcfd5e5861920b9db2749f..6a5b61ebc66c956e6eeee6537786f4161d98f2ef 100644 (file)
@@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
        return e;
 }
 
-static inline int init_kernel_text(unsigned long addr)
+int init_kernel_text(unsigned long addr)
 {
        if (addr >= (unsigned long)_sinittext &&
            addr < (unsigned long)_einittext)
index 21b0122cb39cb1c8f45976566e3b4f675d50450c..1d5632d8bbccfd941a6b0b1cf0ac380c4aeb0a5c 100644 (file)
 
 static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs);
 
+static void fei_post_handler(struct kprobe *kp, struct pt_regs *regs,
+                            unsigned long flags)
+{
+       /*
+        * A dummy post handler is required to prohibit optimizing, because
+        * jump optimization does not support execution path overriding.
+        */
+}
+
 struct fei_attr {
        struct list_head list;
        struct kprobe kp;
@@ -56,6 +65,7 @@ static struct fei_attr *fei_attr_new(const char *sym, unsigned long addr)
                        return NULL;
                }
                attr->kp.pre_handler = fei_kprobe_handler;
+               attr->kp.post_handler = fei_post_handler;
                attr->retval = adjust_error_retval(addr, 0);
                INIT_LIST_HEAD(&attr->list);
        }
index 5187dfe809ac46eede7a8163bd86a88e8a186379..4c5770407031f083dae0a6fcbc3f20abb8a30146 100644 (file)
@@ -16,6 +16,7 @@ struct cpumap {
        unsigned int            available;
        unsigned int            allocated;
        unsigned int            managed;
+       bool                    initialized;
        bool                    online;
        unsigned long           alloc_map[IRQ_MATRIX_SIZE];
        unsigned long           managed_map[IRQ_MATRIX_SIZE];
@@ -81,9 +82,11 @@ void irq_matrix_online(struct irq_matrix *m)
 
        BUG_ON(cm->online);
 
-       bitmap_zero(cm->alloc_map, m->matrix_bits);
-       cm->available = m->alloc_size - (cm->managed + m->systembits_inalloc);
-       cm->allocated = 0;
+       if (!cm->initialized) {
+               cm->available = m->alloc_size;
+               cm->available -= cm->managed + m->systembits_inalloc;
+               cm->initialized = true;
+       }
        m->global_available += cm->available;
        cm->online = true;
        m->online_maps++;
@@ -370,14 +373,16 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu,
        if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end))
                return;
 
-       if (cm->online) {
-               clear_bit(bit, cm->alloc_map);
-               cm->allocated--;
+       clear_bit(bit, cm->alloc_map);
+       cm->allocated--;
+
+       if (cm->online)
                m->total_allocated--;
-               if (!managed) {
-                       cm->available++;
+
+       if (!managed) {
+               cm->available++;
+               if (cm->online)
                        m->global_available++;
-               }
        }
        trace_irq_matrix_free(bit, cpu, m, cm);
 }
index b4517095db6af2f5130e6c871a7a9e32884c2a66..e7214093dcd143e61325956064c5f84d772aa110 100644 (file)
@@ -366,12 +366,16 @@ static void __jump_label_update(struct static_key *key,
 {
        for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
                /*
-                * entry->code set to 0 invalidates module init text sections
-                * kernel_text_address() verifies we are not in core kernel
-                * init code, see jump_label_invalidate_module_init().
+                * An entry->code of 0 indicates an entry which has been
+                * disabled because it was in an init text area.
                 */
-               if (entry->code && kernel_text_address(entry->code))
-                       arch_jump_label_transform(entry, jump_label_type(entry));
+               if (entry->code) {
+                       if (kernel_text_address(entry->code))
+                               arch_jump_label_transform(entry, jump_label_type(entry));
+                       else
+                               WARN_ONCE(1, "can't patch jump_label at %pS",
+                                         (void *)(unsigned long)entry->code);
+               }
        }
 }
 
@@ -417,6 +421,19 @@ void __init jump_label_init(void)
        cpus_read_unlock();
 }
 
+/* Disable any jump label entries in __init code */
+void __init jump_label_invalidate_init(void)
+{
+       struct jump_entry *iter_start = __start___jump_table;
+       struct jump_entry *iter_stop = __stop___jump_table;
+       struct jump_entry *iter;
+
+       for (iter = iter_start; iter < iter_stop; iter++) {
+               if (init_kernel_text(iter->code))
+                       iter->code = 0;
+       }
+}
+
 #ifdef CONFIG_MODULES
 
 static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
@@ -633,6 +650,7 @@ static void jump_label_del_module(struct module *mod)
        }
 }
 
+/* Disable any jump label entries in module init code */
 static void jump_label_invalidate_module_init(struct module *mod)
 {
        struct jump_entry *iter_start = mod->jump_entries;
index 65cc0cb984e6aef64211da9ff693b4dc67968103..940633c632541d7aa31eb68ef7c271f17bb86369 100644 (file)
@@ -1616,11 +1616,12 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
 void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
 {
        DEFINE_WAKE_Q(wake_q);
+       unsigned long flags;
        bool postunlock;
 
-       raw_spin_lock_irq(&lock->wait_lock);
+       raw_spin_lock_irqsave(&lock->wait_lock, flags);
        postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
-       raw_spin_unlock_irq(&lock->wait_lock);
+       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        if (postunlock)
                rt_mutex_postunlock(&wake_q);
index 4849be5f9b3c30120f0f964dfd32f5bbf1546a54..895e6b76b25e0604b980e31fa8dba388b25e6581 100644 (file)
@@ -275,8 +275,15 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap)
        return (res->start + resource_size(res)) >> PAGE_SHIFT;
 }
 
+static unsigned long pfn_next(unsigned long pfn)
+{
+       if (pfn % 1024 == 0)
+               cond_resched();
+       return pfn + 1;
+}
+
 #define for_each_device_pfn(pfn, map) \
-       for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
+       for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
 
 static void devm_memremap_pages_release(void *data)
 {
@@ -337,10 +344,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
        resource_size_t align_start, align_size, align_end;
        struct vmem_altmap *altmap = pgmap->altmap_valid ?
                        &pgmap->altmap : NULL;
+       struct resource *res = &pgmap->res;
        unsigned long pfn, pgoff, order;
        pgprot_t pgprot = PAGE_KERNEL;
-       int error, nid, is_ram, i = 0;
-       struct resource *res = &pgmap->res;
+       int error, nid, is_ram;
 
        align_start = res->start & ~(SECTION_SIZE - 1);
        align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -409,8 +416,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
                list_del(&page->lru);
                page->pgmap = pgmap;
                percpu_ref_get(pgmap->ref);
-               if (!(++i % 1024))
-                       cond_resched();
        }
 
        devm_add_action(dev, devm_memremap_pages_release, pgmap);
@@ -422,7 +427,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
  err_pfn_remap:
  err_radix:
        pgmap_radix_release(res, pgoff);
-       devres_free(pgmap);
        return ERR_PTR(error);
 }
 EXPORT_SYMBOL(devm_memremap_pages);
index ad2d420024f6d01086f07b47e60fd1fbde0ff1dc..e42764acedb4cd35ccb81d396b1391510b52db25 100644 (file)
@@ -4228,7 +4228,7 @@ static int modules_open(struct inode *inode, struct file *file)
                m->private = kallsyms_show_value() ? NULL : (void *)8ul;
        }
 
-       return 0;
+       return err;
 }
 
 static const struct file_operations proc_modules_operations = {
index 2cfef408fec931ac0ef3f3c95b31269ed1d3ea29..4b794f1d85613578bd10bc07d78a3d746a8f0db9 100644 (file)
@@ -640,7 +640,7 @@ device_initcall(register_warn_debugfs);
  */
 __visible void __stack_chk_fail(void)
 {
-       panic("stack-protector: Kernel stack is corrupted in: %p\n",
+       panic("stack-protector: Kernel stack is corrupted in: %pB\n",
                __builtin_return_address(0));
 }
 EXPORT_SYMBOL(__stack_chk_fail);
index fc1123583fa6edadb19214bb1922f7ce4ccdf1e1..f274fbef821d1bee0c457da42417f4b8153444cf 100644 (file)
@@ -2397,7 +2397,7 @@ void console_unlock(void)
 
                if (console_lock_spinning_disable_and_check()) {
                        printk_safe_exit_irqrestore(flags);
-                       return;
+                       goto out;
                }
 
                printk_safe_exit_irqrestore(flags);
@@ -2430,6 +2430,7 @@ void console_unlock(void)
        if (retry && console_trylock())
                goto again;
 
+out:
        if (wake_klogd)
                wake_up_klogd();
 }
index e7c535eee0a6d493a2a43eba210c08c6858b63d1..c94895bc5a2c14dc05117a7050fb9b3964db0357 100644 (file)
@@ -6683,13 +6683,18 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
                parent_quota = parent_b->hierarchical_quota;
 
                /*
-                * Ensure max(child_quota) <= parent_quota, inherit when no
+                * Ensure max(child_quota) <= parent_quota.  On cgroup2,
+                * always take the min.  On cgroup1, only inherit when no
                 * limit is set:
                 */
-               if (quota == RUNTIME_INF)
-                       quota = parent_quota;
-               else if (parent_quota != RUNTIME_INF && quota > parent_quota)
-                       return -EINVAL;
+               if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
+                       quota = min(quota, parent_quota);
+               } else {
+                       if (quota == RUNTIME_INF)
+                               quota = parent_quota;
+                       else if (parent_quota != RUNTIME_INF && quota > parent_quota)
+                               return -EINVAL;
+               }
        }
        cfs_b->hierarchical_quota = quota;
 
index 48150ab42de96449051b5e441cb405ef1a09445c..4a4fd567fb26654bc96d6fddc31c7ba0da7cd409 100644 (file)
@@ -1894,6 +1894,12 @@ int timers_dead_cpu(unsigned int cpu)
                raw_spin_lock_irq(&new_base->lock);
                raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
+               /*
+                * The current CPUs base clock might be stale. Update it
+                * before moving the timers over.
+                */
+               forward_timer_base(new_base);
+
                BUG_ON(old_base->running_timer);
 
                for (i = 0; i < WHEEL_SIZE; i++)
index c0a9e310d71501948d60de5f499aa17cb087eaf6..7f9691c86b6e04c39b46fdf058e5f8d746006560 100644 (file)
@@ -661,7 +661,41 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
        .arg3_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx,
+static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
+{
+       switch (func_id) {
+       case BPF_FUNC_perf_event_output:
+               return &bpf_perf_event_output_proto_tp;
+       case BPF_FUNC_get_stackid:
+               return &bpf_get_stackid_proto_tp;
+       default:
+               return tracing_func_proto(func_id);
+       }
+}
+
+static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+                                   struct bpf_insn_access_aux *info)
+{
+       if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
+               return false;
+       if (type != BPF_READ)
+               return false;
+       if (off % size != 0)
+               return false;
+
+       BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
+       return true;
+}
+
+const struct bpf_verifier_ops tracepoint_verifier_ops = {
+       .get_func_proto  = tp_prog_func_proto,
+       .is_valid_access = tp_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops tracepoint_prog_ops = {
+};
+
+BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
           struct bpf_perf_event_value *, buf, u32, size)
 {
        int err = -EINVAL;
@@ -678,8 +712,8 @@ BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx,
        return err;
 }
 
-static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = {
-         .func           = bpf_perf_prog_read_value_tp,
+static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
+         .func           = bpf_perf_prog_read_value,
          .gpl_only       = true,
          .ret_type       = RET_INTEGER,
          .arg1_type      = ARG_PTR_TO_CTX,
@@ -687,7 +721,7 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = {
          .arg3_type      = ARG_CONST_SIZE,
 };
 
-static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
 {
        switch (func_id) {
        case BPF_FUNC_perf_event_output:
@@ -695,39 +729,16 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
        case BPF_FUNC_get_stackid:
                return &bpf_get_stackid_proto_tp;
        case BPF_FUNC_perf_prog_read_value:
-               return &bpf_perf_prog_read_value_proto_tp;
+               return &bpf_perf_prog_read_value_proto;
        default:
                return tracing_func_proto(func_id);
        }
 }
 
-static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
-                                   struct bpf_insn_access_aux *info)
-{
-       if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
-               return false;
-       if (type != BPF_READ)
-               return false;
-       if (off % size != 0)
-               return false;
-
-       BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
-       return true;
-}
-
-const struct bpf_verifier_ops tracepoint_verifier_ops = {
-       .get_func_proto  = tp_prog_func_proto,
-       .is_valid_access = tp_prog_is_valid_access,
-};
-
-const struct bpf_prog_ops tracepoint_prog_ops = {
-};
-
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
                                    struct bpf_insn_access_aux *info)
 {
-       const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
-                                        sample_period);
+       const int size_u64 = sizeof(u64);
 
        if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
                return false;
@@ -738,8 +749,13 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
 
        switch (off) {
        case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
-               bpf_ctx_record_field_size(info, size_sp);
-               if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
+               bpf_ctx_record_field_size(info, size_u64);
+               if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+                       return false;
+               break;
+       case bpf_ctx_range(struct bpf_perf_event_data, addr):
+               bpf_ctx_record_field_size(info, size_u64);
+               if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
                        return false;
                break;
        default:
@@ -766,6 +782,14 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
                                      bpf_target_off(struct perf_sample_data, period, 8,
                                                     target_size));
                break;
+       case offsetof(struct bpf_perf_event_data, addr):
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+                                                      data), si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_perf_event_data_kern, data));
+               *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+                                     bpf_target_off(struct perf_sample_data, addr, 8,
+                                                    target_size));
+               break;
        default:
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
                                                       regs), si->dst_reg, si->src_reg,
@@ -779,7 +803,7 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
 }
 
 const struct bpf_verifier_ops perf_event_verifier_ops = {
-       .get_func_proto         = tp_prog_func_proto,
+       .get_func_proto         = pe_prog_func_proto,
        .is_valid_access        = pe_prog_is_valid_access,
        .convert_ctx_access     = pe_prog_convert_ctx_access,
 };
index bb9a519cbf5093ece372daa532121f1511957999..6ec6ba65127b46e52bbfa611092a1140a8180728 100644 (file)
@@ -3018,14 +3018,6 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork)
        return ret;
 }
 
-/*
- * See cancel_delayed_work()
- */
-bool cancel_work(struct work_struct *work)
-{
-       return __cancel_work(work, false);
-}
-
 /**
  * cancel_delayed_work - cancel a delayed work
  * @dwork: delayed_work to cancel
@@ -5337,7 +5329,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
 
        ret = device_register(&wq_dev->dev);
        if (ret) {
-               kfree(wq_dev);
+               put_device(&wq_dev->dev);
                wq->wq_dev = NULL;
                return ret;
        }
index f93a945274af12575f8fbbceb821552b2a13e61e..590facba2c5083b36b54b0809408ccedb97d43b5 100644 (file)
@@ -3,7 +3,7 @@
  *
  * As should be obvious for Linux kernel code, license is GPLv2
  *
- * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Copyright (c) 2007-2008 Joern Engel <joern@purestorage.com>
  * Bits and pieces stolen from Peter Zijlstra's code, which is
  * Copyright 2007, Red Hat Inc. Peter Zijlstra
  * GPLv2
@@ -76,6 +76,8 @@ struct btree_geo btree_geo128 = {
 };
 EXPORT_SYMBOL_GPL(btree_geo128);
 
+#define MAX_KEYLEN     (2 * LONG_PER_U64)
+
 static struct kmem_cache *btree_cachep;
 
 void *btree_alloc(gfp_t gfp_mask, void *pool_data)
@@ -313,7 +315,7 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
 {
        int i, height;
        unsigned long *node, *oldnode;
-       unsigned long *retry_key = NULL, key[geo->keylen];
+       unsigned long *retry_key = NULL, key[MAX_KEYLEN];
 
        if (keyzero(geo, __key))
                return NULL;
@@ -639,8 +641,8 @@ EXPORT_SYMBOL_GPL(btree_remove);
 int btree_merge(struct btree_head *target, struct btree_head *victim,
                struct btree_geo *geo, gfp_t gfp)
 {
-       unsigned long key[geo->keylen];
-       unsigned long dup[geo->keylen];
+       unsigned long key[MAX_KEYLEN];
+       unsigned long dup[MAX_KEYLEN];
        void *val;
        int err;
 
index c1b0fad31b109157427d1ec1e30e7e93e303465d..1077366f496ba6c7ef6905685c2435b090b81b1b 100644 (file)
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -150,6 +150,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
                return BUG_TRAP_TYPE_NONE;
 
        bug = find_bug(bugaddr);
+       if (!bug)
+               return BUG_TRAP_TYPE_NONE;
 
        file = NULL;
        line = 0;
@@ -191,7 +193,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
        if (file)
                pr_crit("kernel BUG at %s:%u!\n", file, line);
        else
-               pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n",
+               pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
                        (void *)bugaddr);
 
        return BUG_TRAP_TYPE_BUG;
index 1b34d210452c5aba703aea04e648dd6ed56fd576..7f5cdc1e6b298f2c2121ebc33195fbc7c7d71145 100644 (file)
@@ -1491,12 +1491,12 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
        if (unlikely(virt == NULL))
                return;
 
-       entry = dma_entry_alloc();
-       if (!entry)
+       /* handle vmalloc and linear addresses */
+       if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
                return;
 
-       /* handle vmalloc and linear addresses */
-       if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+       entry = dma_entry_alloc();
+       if (!entry)
                return;
 
        entry->type      = dma_debug_coherent;
@@ -1528,7 +1528,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
        };
 
        /* handle vmalloc and linear addresses */
-       if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+       if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
                return;
 
        if (is_vmalloc_addr(virt))
index 99ec5bc89d252fd303a0a277d9b1471a6583e282..823b813f08f862b4c80463ad69f91ba9acf62703 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -36,8 +36,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
 {
        struct radix_tree_iter iter;
        void __rcu **slot;
-       int base = idr->idr_base;
-       int id = *nextid;
+       unsigned int base = idr->idr_base;
+       unsigned int id = *nextid;
 
        if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
                return -EINVAL;
@@ -204,10 +204,11 @@ int idr_for_each(const struct idr *idr,
 
        radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
                int ret;
+               unsigned long id = iter.index + base;
 
-               if (WARN_ON_ONCE(iter.index > INT_MAX))
+               if (WARN_ON_ONCE(id > INT_MAX))
                        break;
-               ret = fn(iter.index + base, rcu_dereference_raw(*slot), data);
+               ret = fn(id, rcu_dereference_raw(*slot), data);
                if (ret)
                        return ret;
        }
@@ -230,8 +231,8 @@ void *idr_get_next(struct idr *idr, int *nextid)
 {
        struct radix_tree_iter iter;
        void __rcu **slot;
-       int base = idr->idr_base;
-       int id = *nextid;
+       unsigned long base = idr->idr_base;
+       unsigned long id = *nextid;
 
        id = (id < base) ? 0 : id - base;
        slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
index b808a390e4c3e32d2789b059ab7752c6d533d7a3..54e5bbaa3200317534926e65982dcd3cbab71492 100644 (file)
@@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 
                if (ioremap_pmd_enabled() &&
                    ((next - addr) == PMD_SIZE) &&
-                   IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
+                   IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
+                   pmd_free_pte_page(pmd)) {
                        if (pmd_set_huge(pmd, phys_addr + addr, prot))
                                continue;
                }
@@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 
                if (ioremap_pud_enabled() &&
                    ((next - addr) == PUD_SIZE) &&
-                   IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
+                   IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
+                   pud_free_pmd_page(pud)) {
                        if (pud_set_huge(pud, phys_addr + addr, prot))
                                continue;
                }
index 9539d7ab3ea85e86d3f5d4bc13f5191a37dc0c4e..fa10ad8e9b17494814b976eaf9689b16a8c561bd 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 
 
@@ -32,11 +33,13 @@ u64 uevent_seqnum;
 #ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 #endif
-#ifdef CONFIG_NET
+
 struct uevent_sock {
        struct list_head list;
        struct sock *sk;
 };
+
+#ifdef CONFIG_NET
 static LIST_HEAD(uevent_sock_list);
 #endif
 
@@ -602,12 +605,88 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
+static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
+                               struct netlink_ext_ack *extack)
+{
+       /* u64 to chars: 2^64 - 1 = 21 chars */
+       char buf[sizeof("SEQNUM=") + 21];
+       struct sk_buff *skbc;
+       int ret;
+
+       /* bump and prepare sequence number */
+       ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", ++uevent_seqnum);
+       if (ret < 0 || (size_t)ret >= sizeof(buf))
+               return -ENOMEM;
+       ret++;
+
+       /* verify message does not overflow */
+       if ((skb->len + ret) > UEVENT_BUFFER_SIZE) {
+               NL_SET_ERR_MSG(extack, "uevent message too big");
+               return -EINVAL;
+       }
+
+       /* copy skb and extend to accommodate sequence number */
+       skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL);
+       if (!skbc)
+               return -ENOMEM;
+
+       /* append sequence number */
+       skb_put_data(skbc, buf, ret);
+
+       /* remove msg header */
+       skb_pull(skbc, NLMSG_HDRLEN);
+
+       /* set portid 0 to inform userspace message comes from kernel */
+       NETLINK_CB(skbc).portid = 0;
+       NETLINK_CB(skbc).dst_group = 1;
+
+       ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL);
+       /* ENOBUFS should be handled in userspace */
+       if (ret == -ENOBUFS || ret == -ESRCH)
+               ret = 0;
+
+       return ret;
+}
+
+static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh,
+                             struct netlink_ext_ack *extack)
+{
+       struct net *net;
+       int ret;
+
+       if (!nlmsg_data(nlh))
+               return -EINVAL;
+
+       /*
+        * Verify that we are allowed to send messages to the target
+        * network namespace. The caller must have CAP_SYS_ADMIN in the
+        * owning user namespace of the target network namespace.
+        */
+       net = sock_net(NETLINK_CB(skb).sk);
+       if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) {
+               NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability");
+               return -EPERM;
+       }
+
+       mutex_lock(&uevent_sock_mutex);
+       ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack);
+       mutex_unlock(&uevent_sock_mutex);
+
+       return ret;
+}
+
+static void uevent_net_rcv(struct sk_buff *skb)
+{
+       netlink_rcv_skb(skb, &uevent_net_rcv_skb);
+}
+
 static int uevent_net_init(struct net *net)
 {
        struct uevent_sock *ue_sk;
        struct netlink_kernel_cfg cfg = {
                .groups = 1,
-               .flags  = NL_CFG_F_NONROOT_RECV,
+               .input = uevent_net_rcv,
+               .flags  = NL_CFG_F_NONROOT_RECV
        };
 
        ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
@@ -621,6 +700,9 @@ static int uevent_net_init(struct net *net)
                kfree(ue_sk);
                return -ENODEV;
        }
+
+       net->uevent_sock = ue_sk;
+
        mutex_lock(&uevent_sock_mutex);
        list_add_tail(&ue_sk->list, &uevent_sock_list);
        mutex_unlock(&uevent_sock_mutex);
@@ -629,17 +711,9 @@ static int uevent_net_init(struct net *net)
 
 static void uevent_net_exit(struct net *net)
 {
-       struct uevent_sock *ue_sk;
+       struct uevent_sock *ue_sk = net->uevent_sock;
 
        mutex_lock(&uevent_sock_mutex);
-       list_for_each_entry(ue_sk, &uevent_sock_list, list) {
-               if (sock_net(ue_sk->sk) == net)
-                       goto found;
-       }
-       mutex_unlock(&uevent_sock_mutex);
-       return;
-
-found:
        list_del(&ue_sk->list);
        mutex_unlock(&uevent_sock_mutex);
 
index 30e7dd88148b0282c1f445047b360156e79278a0..9f96fa7bc0006e6eb38d4c88aba96d8ea04049fb 100644 (file)
@@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu);
  * This function normally doesn't block and can be called from any context
  * but it may block if @confirm_kill is specified and @ref is in the
  * process of switching to atomic mode by percpu_ref_switch_to_atomic().
+ *
+ * There are no implied RCU grace periods between kill and release.
  */
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
                                 percpu_ref_func_t *confirm_kill)
index 3825c30aaa36985e2e73c5d657bae3455b674cd9..47de025b624520f75e521bef46dc9b28baa6a1a0 100644 (file)
@@ -506,8 +506,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
                if (!key ||
                    (ht->p.obj_cmpfn ?
                     ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
-                    rhashtable_compare(&arg, rht_obj(ht, head))))
+                    rhashtable_compare(&arg, rht_obj(ht, head)))) {
+                       pprev = &head->next;
                        continue;
+               }
 
                if (!ht->rhlist)
                        return rht_obj(ht, head);
index b4e22345963f339ffe05c974bc111ae7da9dc58f..b2badf6b23cdb10e27b6575806304dd0524755c9 100644 (file)
 #include <linux/if_vlan.h>
 #include <linux/random.h>
 #include <linux/highmem.h>
+#include <linux/sched.h>
 
 /* General test specific settings */
 #define MAX_SUBTESTS   3
-#define MAX_TESTRUNS   10000
+#define MAX_TESTRUNS   1000
 #define MAX_DATA       128
 #define MAX_INSNS      512
 #define MAX_K          0xffffFFFF
@@ -5466,7 +5467,7 @@ static struct bpf_test tests[] = {
        {
                "BPF_MAXINSNS: Jump, gap, jump, ...",
                { },
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86)
                CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 #else
                CLASSIC | FLAG_NO_DATA,
@@ -6573,6 +6574,93 @@ static bool exclude_test(int test_id)
        return test_id < test_range[0] || test_id > test_range[1];
 }
 
+static __init struct sk_buff *build_test_skb(void)
+{
+       u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
+       struct sk_buff *skb[2];
+       struct page *page[2];
+       int i, data_size = 8;
+
+       for (i = 0; i < 2; i++) {
+               page[i] = alloc_page(GFP_KERNEL);
+               if (!page[i]) {
+                       if (i == 0)
+                               goto err_page0;
+                       else
+                               goto err_page1;
+               }
+
+               /* this will set skb[i]->head_frag */
+               skb[i] = dev_alloc_skb(headroom + data_size);
+               if (!skb[i]) {
+                       if (i == 0)
+                               goto err_skb0;
+                       else
+                               goto err_skb1;
+               }
+
+               skb_reserve(skb[i], headroom);
+               skb_put(skb[i], data_size);
+               skb[i]->protocol = htons(ETH_P_IP);
+               skb_reset_network_header(skb[i]);
+               skb_set_mac_header(skb[i], -ETH_HLEN);
+
+               skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
+               // skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
+       }
+
+       /* setup shinfo */
+       skb_shinfo(skb[0])->gso_size = 1448;
+       skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
+       skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
+       skb_shinfo(skb[0])->gso_segs = 0;
+       skb_shinfo(skb[0])->frag_list = skb[1];
+
+       /* adjust skb[0]'s len */
+       skb[0]->len += skb[1]->len;
+       skb[0]->data_len += skb[1]->data_len;
+       skb[0]->truesize += skb[1]->truesize;
+
+       return skb[0];
+
+err_skb1:
+       __free_page(page[1]);
+err_page1:
+       kfree_skb(skb[0]);
+err_skb0:
+       __free_page(page[0]);
+err_page0:
+       return NULL;
+}
+
+static __init int test_skb_segment(void)
+{
+       netdev_features_t features;
+       struct sk_buff *skb, *segs;
+       int ret = -1;
+
+       features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
+                  NETIF_F_IPV6_CSUM;
+       features |= NETIF_F_RXCSUM;
+       skb = build_test_skb();
+       if (!skb) {
+               pr_info("%s: failed to build_test_skb", __func__);
+               goto done;
+       }
+
+       segs = skb_segment(skb, features);
+       if (segs) {
+               kfree_skb_list(segs);
+               ret = 0;
+               pr_info("%s: success in skb_segment!", __func__);
+       } else {
+               pr_info("%s: failed in skb_segment!", __func__);
+       }
+       kfree_skb(skb);
+done:
+       return ret;
+}
+
 static __init int test_bpf(void)
 {
        int i, err_cnt = 0, pass_cnt = 0;
@@ -6582,6 +6670,7 @@ static __init int test_bpf(void)
                struct bpf_prog *fp;
                int err;
 
+               cond_resched();
                if (exclude_test(i))
                        continue;
 
@@ -6630,9 +6719,11 @@ static int __init test_bpf_init(void)
                return ret;
 
        ret = test_bpf();
-
        destroy_bpf_tests();
-       return ret;
+       if (ret)
+               return ret;
+
+       return test_skb_segment();
 }
 
 static void __exit test_bpf_exit(void)
index e372b97eee1301c1bd2e6c49b49e57c8ef4bff10..0e5b7a61460bb092226a3785abeaa2168d95d790 100644 (file)
@@ -1141,7 +1141,7 @@ static struct kmod_test_device *register_test_dev_kmod(void)
        mutex_lock(&reg_dev_mutex);
 
        /* int should suffice for number of devices, test for wrap */
-       if (unlikely(num_test_devs + 1) < 0) {
+       if (num_test_devs + 1 == INT_MAX) {
                pr_err("reached limit of number of test devices\n");
                goto out;
        }
index 76d3667fdea21c2c1842ce19818ddda05b7ceada..f4000c137dbed6da5754713f3d280a9e1caed528 100644 (file)
@@ -79,6 +79,21 @@ struct thread_data {
        struct test_obj *objs;
 };
 
+static u32 my_hashfn(const void *data, u32 len, u32 seed)
+{
+       const struct test_obj_rhl *obj = data;
+
+       return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE;
+}
+
+static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+       const struct test_obj_rhl *test_obj = obj;
+       const struct test_obj_val *val = arg->key;
+
+       return test_obj->value.id - val->id;
+}
+
 static struct rhashtable_params test_rht_params = {
        .head_offset = offsetof(struct test_obj, node),
        .key_offset = offsetof(struct test_obj, value),
@@ -87,6 +102,17 @@ static struct rhashtable_params test_rht_params = {
        .nulls_base = (3U << RHT_BASE_SHIFT),
 };
 
+static struct rhashtable_params test_rht_params_dup = {
+       .head_offset = offsetof(struct test_obj_rhl, list_node),
+       .key_offset = offsetof(struct test_obj_rhl, value),
+       .key_len = sizeof(struct test_obj_val),
+       .hashfn = jhash,
+       .obj_hashfn = my_hashfn,
+       .obj_cmpfn = my_cmpfn,
+       .nelem_hint = 128,
+       .automatic_shrinking = false,
+};
+
 static struct semaphore prestart_sem;
 static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0);
 
@@ -465,6 +491,112 @@ static int __init test_rhashtable_max(struct test_obj *array,
        return err;
 }
 
+static unsigned int __init print_ht(struct rhltable *rhlt)
+{
+       struct rhashtable *ht;
+       const struct bucket_table *tbl;
+       char buff[512] = "";
+       unsigned int i, cnt = 0;
+
+       ht = &rhlt->ht;
+       tbl = rht_dereference(ht->tbl, ht);
+       for (i = 0; i < tbl->size; i++) {
+               struct rhash_head *pos, *next;
+               struct test_obj_rhl *p;
+
+               pos = rht_dereference(tbl->buckets[i], ht);
+               next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
+
+               if (!rht_is_a_nulls(pos)) {
+                       sprintf(buff, "%s\nbucket[%d] -> ", buff, i);
+               }
+
+               while (!rht_is_a_nulls(pos)) {
+                       struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead);
+                       sprintf(buff, "%s[[", buff);
+                       do {
+                               pos = &list->rhead;
+                               list = rht_dereference(list->next, ht);
+                               p = rht_obj(ht, pos);
+
+                               sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid,
+                                       list? ", " : " ");
+                               cnt++;
+                       } while (list);
+
+                       pos = next,
+                       next = !rht_is_a_nulls(pos) ?
+                               rht_dereference(pos->next, ht) : NULL;
+
+                       sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : "");
+               }
+       }
+       printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff);
+
+       return cnt;
+}
+
+static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects,
+                                 int cnt, bool slow)
+{
+       struct rhltable rhlt;
+       unsigned int i, ret;
+       const char *key;
+       int err = 0;
+
+       err = rhltable_init(&rhlt, &test_rht_params_dup);
+       if (WARN_ON(err))
+               return err;
+
+       for (i = 0; i < cnt; i++) {
+               rhl_test_objects[i].value.tid = i;
+               key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead);
+               key += test_rht_params_dup.key_offset;
+
+               if (slow) {
+                       err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key,
+                                                            &rhl_test_objects[i].list_node.rhead));
+                       if (err == -EAGAIN)
+                               err = 0;
+               } else
+                       err = rhltable_insert(&rhlt,
+                                             &rhl_test_objects[i].list_node,
+                                             test_rht_params_dup);
+               if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast"))
+                       goto skip_print;
+       }
+
+       ret = print_ht(&rhlt);
+       WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast");
+
+skip_print:
+       rhltable_destroy(&rhlt);
+
+       return 0;
+}
+
+static int __init test_insert_duplicates_run(void)
+{
+       struct test_obj_rhl rhl_test_objects[3] = {};
+
+       pr_info("test inserting duplicates\n");
+
+       /* two different values that map to same bucket */
+       rhl_test_objects[0].value.id = 1;
+       rhl_test_objects[1].value.id = 21;
+
+       /* and another duplicate with same as [0] value
+        * which will be second on the bucket list */
+       rhl_test_objects[2].value.id = rhl_test_objects[0].value.id;
+
+       test_insert_dup(rhl_test_objects, 2, false);
+       test_insert_dup(rhl_test_objects, 3, false);
+       test_insert_dup(rhl_test_objects, 2, true);
+       test_insert_dup(rhl_test_objects, 3, true);
+
+       return 0;
+}
+
 static int thread_lookup_test(struct thread_data *tdata)
 {
        unsigned int entries = tdata->entries;
@@ -613,6 +745,8 @@ static int __init test_rht_init(void)
        do_div(total_time, runs);
        pr_info("Average test time: %llu\n", total_time);
 
+       test_insert_duplicates_run();
+
        if (!tcount)
                return 0;
 
index 1b46e6e74881d3ce634511d98e4f177625b5501c..6afae32571cae669044319e8aa4438679e8a3b95 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -516,7 +516,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
        }
 
        if (ret & VM_FAULT_RETRY) {
-               if (nonblocking)
+               if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
                        *nonblocking = 0;
                return -EBUSY;
        }
@@ -890,7 +890,10 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
                                break;
                }
                if (*locked) {
-                       /* VM_FAULT_RETRY didn't trigger */
+                       /*
+                        * VM_FAULT_RETRY didn't trigger or it was a
+                        * FOLL_NOWAIT.
+                        */
                        if (!pages_done)
                                pages_done = ret;
                        break;
index 87ab9b8f56b53dae6875e19533b5e1e171d2533c..5a68730eebd62656e3c300581da9a9e71fd7ffc8 100644 (file)
@@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
        VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-       if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+       if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
+                                 true)) {
                put_page(page);
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
@@ -1316,7 +1317,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
        }
 
        if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
-                                       huge_gfp, &memcg, true))) {
+                               huge_gfp | __GFP_NORETRY, &memcg, true))) {
                put_page(new_page);
                split_huge_pmd(vma, vmf->pmd, vmf->address);
                if (page)
@@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
 
        list_for_each_safe(pos, next, &list) {
                page = list_entry((void *)pos, struct page, mapping);
-               lock_page(page);
+               if (!trylock_page(page))
+                       goto next;
                /* split_huge_page() removes page from list on success */
                if (!split_huge_page(page))
                        split++;
                unlock_page(page);
+next:
                put_page(page);
        }
 
index 7c204e3d132b808364fc1e87e3dace180f9ba173..976bbc5646fe8c6e386ddb0d42b8ba0ccb3e9777 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/bootmem.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
+#include <linux/mmdebug.h>
 #include <linux/sched/signal.h>
 #include <linux/rmap.h>
 #include <linux/string_helpers.h>
@@ -1583,7 +1584,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                page = NULL;
        } else {
                h->surplus_huge_pages++;
-               h->nr_huge_pages_node[page_to_nid(page)]++;
+               h->surplus_huge_pages_node[page_to_nid(page)]++;
        }
 
 out_unlock:
@@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode,
        struct resv_map *resv_map;
        long gbl_reserve;
 
+       /* This should never happen */
+       if (from > to) {
+               VM_WARN(1, "%s called with a negative range\n", __func__);
+               return -EINVAL;
+       }
+
        /*
         * Only apply hugepage reservation if asked. At fault time, an
         * attempt will be made for VM_NORESERVE to allocate a page
index b7e2268dfc9a15c64eced7bfb857e9a32fc128c1..e42568284e06038ab70ec1344f63a2e5182ea90d 100644 (file)
@@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                        goto out;
                }
 
-               VM_BUG_ON_PAGE(PageCompound(page), page);
+               /* TODO: teach khugepaged to collapse THP mapped with pte */
+               if (PageCompound(page)) {
+                       result = SCAN_PAGE_COMPOUND;
+                       goto out;
+               }
+
                VM_BUG_ON_PAGE(!PageAnon(page), page);
 
                /*
@@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm,
                goto out_nolock;
        }
 
-       if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+       /* Do not oom kill for khugepaged charges */
+       if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+                                          &memcg, true))) {
                result = SCAN_CGROUP_CHARGE_FAIL;
                goto out_nolock;
        }
@@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm,
                goto out;
        }
 
-       if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+       /* Do not oom kill for khugepaged charges */
+       if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+                                          &memcg, true))) {
                result = SCAN_CGROUP_CHARGE_FAIL;
                goto out;
        }
index 5a9ca2a1751bfe1c999dfe6dfcf4716966d0e888..48376bd3327423fcccc308a0a6655db399611510 100644 (file)
@@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
                *out_nid = r->nid;
 }
 
-unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
-                                                     unsigned long max_pfn)
-{
-       struct memblock_type *type = &memblock.memory;
-       unsigned int right = type->cnt;
-       unsigned int mid, left = 0;
-       phys_addr_t addr = PFN_PHYS(pfn + 1);
-
-       do {
-               mid = (right + left) / 2;
-
-               if (addr < type->regions[mid].base)
-                       right = mid;
-               else if (addr >= (type->regions[mid].base +
-                                 type->regions[mid].size))
-                       left = mid + 1;
-               else {
-                       /* addr is within the region, so pfn + 1 is valid */
-                       return min(pfn + 1, max_pfn);
-               }
-       } while (left < right);
-
-       if (right == type->cnt)
-               return max_pfn;
-       else
-               return min(PHYS_PFN(type->regions[right].base), max_pfn);
-}
-
 /**
  * memblock_set_node - set node ID on memblock regions
  * @base: base of area to set node ID for
index d879f1d8a44ade626a5ed91aa3973068f16d8bc2..32cba0332787f48dea47b5b09f9dac21c567ea84 100644 (file)
@@ -2124,6 +2124,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
        case MPOL_INTERLEAVE:
                return !!nodes_equal(a->v.nodes, b->v.nodes);
        case MPOL_PREFERRED:
+               /* a's ->flags is the same as b's */
+               if (a->flags & MPOL_F_LOCAL)
+                       return true;
                return a->v.preferred_node == b->v.preferred_node;
        default:
                BUG();
index cb416723538fe49810db0cb9b9e0b165cb963a44..1741dd23e7c1f7d4878cf38ff9ba021928f99267 100644 (file)
@@ -1910,7 +1910,9 @@ static int move_freepages(struct zone *zone,
         * Remove at a later date when no bug reports exist related to
         * grouping pages by mobility
         */
-       VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
+       VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) &&
+                 pfn_valid(page_to_pfn(end_page)) &&
+                 page_zone(start_page) != page_zone(end_page));
 #endif
 
        if (num_movable)
@@ -3594,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
                return false;
 
        /* this guy won't enter reclaim */
-       if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
+       if (current->flags & PF_MEMALLOC)
                return false;
 
        /* We're only interested __GFP_FS allocations for now */
@@ -5354,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                if (context != MEMMAP_EARLY)
                        goto not_early;
 
-               if (!early_pfn_valid(pfn)) {
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-                       /*
-                        * Skip to the pfn preceding the next valid one (or
-                        * end_pfn), such that we hit a valid pfn (or end_pfn)
-                        * on our next iteration of the loop.
-                        */
-                       pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
-#endif
+               if (!early_pfn_valid(pfn))
                        continue;
-               }
                if (!early_pfn_in_nid(pfn, nid))
                        continue;
                if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
index d2a76642c4ae89ecc5489dc430a7cddbce351b5e..38de70ab1a0d625c9363f3c519f2ab2ec04a1fc5 100644 (file)
@@ -34,7 +34,7 @@
 #include <linux/log2.h>
 
 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
-                              int page_start, int page_end)
+                              int page_start, int page_end, gfp_t gfp)
 {
        return 0;
 }
@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
        /* nada */
 }
 
-static struct pcpu_chunk *pcpu_create_chunk(void)
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
 {
        const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
        struct pcpu_chunk *chunk;
        struct page *pages;
        int i;
 
-       chunk = pcpu_alloc_chunk();
+       chunk = pcpu_alloc_chunk(gfp);
        if (!chunk)
                return NULL;
 
-       pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages));
+       pages = alloc_pages(gfp, order_base_2(nr_pages));
        if (!pages) {
                pcpu_free_chunk(chunk);
                return NULL;
index 9158e5a81391ced4e268e3d5dd9879c2bc7280ce..d8078de912de38a15626771fea1b25c5b3baf0a6 100644 (file)
@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void)
        lockdep_assert_held(&pcpu_alloc_mutex);
 
        if (!pages)
-               pages = pcpu_mem_zalloc(pages_size);
+               pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
        return pages;
 }
 
@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
  * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
  * @page_start: page index of the first page to be allocated
  * @page_end: page index of the last page to be allocated + 1
+ * @gfp: allocation flags passed to the underlying allocator
  *
  * Allocate pages [@page_start,@page_end) into @pages for all units.
  * The allocation is for @chunk.  Percpu core doesn't care about the
  * content of @pages and will pass it verbatim to pcpu_map_pages().
  */
 static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
-                           struct page **pages, int page_start, int page_end)
+                           struct page **pages, int page_start, int page_end,
+                           gfp_t gfp)
 {
-       const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM;
        unsigned int cpu, tcpu;
        int i;
 
+       gfp |= __GFP_HIGHMEM;
+
        for_each_possible_cpu(cpu) {
                for (i = page_start; i < page_end; i++) {
                        struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
  * @chunk: chunk of interest
  * @page_start: the start page
  * @page_end: the end page
+ * @gfp: allocation flags passed to the underlying memory allocator
  *
  * For each cpu, populate and map pages [@page_start,@page_end) into
  * @chunk.
@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
  * pcpu_alloc_mutex, does GFP_KERNEL allocation.
  */
 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
-                              int page_start, int page_end)
+                              int page_start, int page_end, gfp_t gfp)
 {
        struct page **pages;
 
@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
        if (!pages)
                return -ENOMEM;
 
-       if (pcpu_alloc_pages(chunk, pages, page_start, page_end))
+       if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
                return -ENOMEM;
 
        if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
        pcpu_free_pages(chunk, pages, page_start, page_end);
 }
 
-static struct pcpu_chunk *pcpu_create_chunk(void)
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
 {
        struct pcpu_chunk *chunk;
        struct vm_struct **vms;
 
-       chunk = pcpu_alloc_chunk();
+       chunk = pcpu_alloc_chunk(gfp);
        if (!chunk)
                return NULL;
 
index 50e7fdf84055151d8c7e8bb220f7a73e96b7f3e4..9297098519a6fa793112c3653595c6b509c5fda1 100644 (file)
@@ -80,6 +80,7 @@
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
 #include <linux/kmemleak.h>
+#include <linux/sched.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
 /**
  * pcpu_mem_zalloc - allocate memory
  * @size: bytes to allocate
+ * @gfp: allocation flags
  *
  * Allocate @size bytes.  If @size is smaller than PAGE_SIZE,
- * kzalloc() is used; otherwise, vzalloc() is used.  The returned
- * memory is always zeroed.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
+ * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
+ * This is to facilitate passing through whitelisted flags.  The
+ * returned memory is always zeroed.
  *
  * RETURNS:
  * Pointer to the allocated area on success, NULL on failure.
  */
-static void *pcpu_mem_zalloc(size_t size)
+static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
 {
        if (WARN_ON_ONCE(!slab_is_available()))
                return NULL;
 
        if (size <= PAGE_SIZE)
-               return kzalloc(size, GFP_KERNEL);
+               return kzalloc(size, gfp);
        else
-               return vzalloc(size);
+               return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
 }
 
 /**
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
        return chunk;
 }
 
-static struct pcpu_chunk *pcpu_alloc_chunk(void)
+static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
 {
        struct pcpu_chunk *chunk;
        int region_bits;
 
-       chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
+       chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
        if (!chunk)
                return NULL;
 
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
        region_bits = pcpu_chunk_map_bits(chunk);
 
        chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
-                                          sizeof(chunk->alloc_map[0]));
+                                          sizeof(chunk->alloc_map[0]), gfp);
        if (!chunk->alloc_map)
                goto alloc_map_fail;
 
        chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
-                                          sizeof(chunk->bound_map[0]));
+                                          sizeof(chunk->bound_map[0]), gfp);
        if (!chunk->bound_map)
                goto bound_map_fail;
 
        chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
-                                          sizeof(chunk->md_blocks[0]));
+                                          sizeof(chunk->md_blocks[0]), gfp);
        if (!chunk->md_blocks)
                goto md_blocks_fail;
 
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
  * pcpu_addr_to_page           - translate address to physical address
  * pcpu_verify_alloc_info      - check alloc_info is acceptable during init
  */
-static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static struct pcpu_chunk *pcpu_create_chunk(void);
+static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
+                              int page_start, int page_end, gfp_t gfp);
+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
+                                 int page_start, int page_end);
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
 static struct page *pcpu_addr_to_page(void *addr);
 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
                                 gfp_t gfp)
 {
+       /* whitelisted flags that can be passed to the backing allocators */
+       gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
        bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
        bool do_warn = !(gfp & __GFP_NOWARN);
        static int warn_limit = 10;
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
                return NULL;
        }
 
-       if (!is_atomic)
-               mutex_lock(&pcpu_alloc_mutex);
+       if (!is_atomic) {
+               /*
+                * pcpu_balance_workfn() allocates memory under this mutex,
+                * and it may wait for memory reclaim. Allow current task
+                * to become OOM victim, in case of memory pressure.
+                */
+               if (gfp & __GFP_NOFAIL)
+                       mutex_lock(&pcpu_alloc_mutex);
+               else if (mutex_lock_killable(&pcpu_alloc_mutex))
+                       return NULL;
+       }
 
        spin_lock_irqsave(&pcpu_lock, flags);
 
@@ -1421,7 +1434,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
        }
 
        if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
-               chunk = pcpu_create_chunk();
+               chunk = pcpu_create_chunk(pcpu_gfp);
                if (!chunk) {
                        err = "failed to allocate new chunk";
                        goto fail;
@@ -1450,7 +1463,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
                                           page_start, page_end) {
                        WARN_ON(chunk->immutable);
 
-                       ret = pcpu_populate_chunk(chunk, rs, re);
+                       ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
 
                        spin_lock_irqsave(&pcpu_lock, flags);
                        if (ret) {
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
  * pcpu_balance_workfn - manage the amount of free chunks and populated pages
  * @work: unused
  *
- * Reclaim all fully free chunks except for the first one.
+ * Reclaim all fully free chunks except for the first one.  This is also
+ * responsible for maintaining the pool of empty populated pages.  However,
+ * it is possible that this is called when physical memory is scarce causing
+ * OOM killer to be triggered.  We should avoid doing so until an actual
+ * allocation causes the failure as it is possible that requests can be
+ * serviced from already backed regions.
  */
 static void pcpu_balance_workfn(struct work_struct *work)
 {
+       /* gfp flags passed to underlying allocators */
+       const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
        LIST_HEAD(to_free);
        struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
        struct pcpu_chunk *chunk, *next;
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
                        spin_unlock_irq(&pcpu_lock);
                }
                pcpu_destroy_chunk(chunk);
+               cond_resched();
        }
 
        /*
@@ -1645,7 +1666,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
                                           chunk->nr_pages) {
                        int nr = min(re - rs, nr_to_pop);
 
-                       ret = pcpu_populate_chunk(chunk, rs, rs + nr);
+                       ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
                        if (!ret) {
                                nr_to_pop -= nr;
                                spin_lock_irq(&pcpu_lock);
@@ -1662,7 +1683,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
 
        if (nr_to_pop) {
                /* ran out of chunks to populate, create a new one and retry */
-               chunk = pcpu_create_chunk();
+               chunk = pcpu_create_chunk(gfp);
                if (chunk) {
                        spin_lock_irq(&pcpu_lock);
                        pcpu_chunk_relocate(chunk, -1);
index 1907688b75ee0110253f3d96e1e3d7ea02433a2a..b859192433998a14096629b53ac5297a6a0dd2bb 100644 (file)
@@ -493,36 +493,45 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
                info = list_entry(pos, struct shmem_inode_info, shrinklist);
                inode = &info->vfs_inode;
 
-               if (nr_to_split && split >= nr_to_split) {
-                       iput(inode);
-                       continue;
-               }
+               if (nr_to_split && split >= nr_to_split)
+                       goto leave;
 
-               page = find_lock_page(inode->i_mapping,
+               page = find_get_page(inode->i_mapping,
                                (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
                if (!page)
                        goto drop;
 
+               /* No huge page at the end of the file: nothing to split */
                if (!PageTransHuge(page)) {
-                       unlock_page(page);
                        put_page(page);
                        goto drop;
                }
 
+               /*
+                * Leave the inode on the list if we failed to lock
+                * the page at this time.
+                *
+                * Waiting for the lock may lead to deadlock in the
+                * reclaim path.
+                */
+               if (!trylock_page(page)) {
+                       put_page(page);
+                       goto leave;
+               }
+
                ret = split_huge_page(page);
                unlock_page(page);
                put_page(page);
 
-               if (ret) {
-                       /* split failed: leave it on the list */
-                       iput(inode);
-                       continue;
-               }
+               /* If split failed leave the inode on the list */
+               if (ret)
+                       goto leave;
 
                split++;
 drop:
                list_del_init(&info->shrinklist);
                removed++;
+leave:
                iput(inode);
        }
 
index bee53495a829299f766d11ffd316acdb54cfffdf..cd5dc3faaa57d667f14be1074e2e00bb5db8f330 100644 (file)
@@ -1779,6 +1779,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        if (stat.nr_writeback && stat.nr_writeback == nr_taken)
                set_bit(PGDAT_WRITEBACK, &pgdat->flags);
 
+       /*
+        * If dirty pages are scanned that are not queued for IO, it
+        * implies that flushers are not doing their job. This can
+        * happen when memory pressure pushes dirty pages to the end of
+        * the LRU before the dirty limits are breached and the dirty
+        * data has expired. It can also happen when the proportion of
+        * dirty pages grows not through writes but through memory
+        * pressure reclaiming all the clean cache. And in some cases,
+        * the flushers simply cannot keep up with the allocation
+        * rate. Nudge the flusher threads in case they are asleep.
+        */
+       if (stat.nr_unqueued_dirty == nr_taken)
+               wakeup_flusher_threads(WB_REASON_VMSCAN);
+
        /*
         * Legacy memcg will stall in page writeback so avoid forcibly
         * stalling here.
@@ -1791,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
                if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
                        set_bit(PGDAT_CONGESTED, &pgdat->flags);
 
-               /*
-                * If dirty pages are scanned that are not queued for IO, it
-                * implies that flushers are not doing their job. This can
-                * happen when memory pressure pushes dirty pages to the end of
-                * the LRU before the dirty limits are breached and the dirty
-                * data has expired. It can also happen when the proportion of
-                * dirty pages grows not through writes but through memory
-                * pressure reclaiming all the clean cache. And in some cases,
-                * the flushers simply cannot keep up with the allocation
-                * rate. Nudge the flusher threads in case they are asleep, but
-                * also allow kswapd to start writing pages during reclaim.
-                */
-               if (stat.nr_unqueued_dirty == nr_taken) {
-                       wakeup_flusher_threads(WB_REASON_VMSCAN);
+               /* Allow kswapd to start writing pages during reclaim. */
+               if (stat.nr_unqueued_dirty == nr_taken)
                        set_bit(PGDAT_DIRTY, &pgdat->flags);
-               }
 
                /*
                 * If kswapd scans pages marked marked for immediate
index bad01b14a4ad6b5d4e61ac5e0ed93022755223ab..bd0ed39f65fbb20621ef73513872e1b21c3dade1 100644 (file)
@@ -729,6 +729,7 @@ static struct pernet_operations vlan_net_ops = {
        .exit = vlan_exit_net,
        .id   = &vlan_net_id,
        .size = sizeof(struct vlan_net),
+       .async = true,
 };
 
 static int __init vlan_proto_init(void)
index 64aa9f755e1d251e19f1b713acfc163318a9b57d..45c9bf5ff3a0c1f33d5e9443f9237b1277df6502 100644 (file)
@@ -48,8 +48,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
                 * original position later
                 */
                skb_push(skb, offset);
-               skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto,
-                                             skb->vlan_tci);
+               skb = *skbp = vlan_insert_inner_tag(skb, skb->vlan_proto,
+                                                   skb->vlan_tci, skb->mac_len);
                if (!skb)
                        return false;
                skb_pull(skb, offset + VLAN_HLEN);
index c44f6515be5ecb20c763d13a652e0f8c006e7ef2..e4e2e02b738002832a3865eaf352b79d83fb7755 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
index 022f6e77307b86fa00377dd37af187757b1a0ba9..b97ba6fb835392835dd2a3978decccfc3161a6db 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
index 80c72c7d3cad332769056169cd8d1c2aba9bf1bd..ea309ad0617502a436b4b1874bbb871d051b25b2 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index 029221615ba3d59a6b5d47b78c16c7c8555ba7a6..534b790c3753d40cc1e18fa36465a318553a944e 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
index 79e32638372663314e1f47590b74ec7482d1fb8d..be09a98838252f4f0c23cec0625930cf896cd0ff 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -157,7 +157,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
  * Return: 0 on success, a negative error code otherwise.
  */
 static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
-                                    int max_if_num)
+                                    unsigned int max_if_num)
 {
        void *data_ptr;
        size_t old_size;
@@ -201,7 +201,8 @@ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
  */
 static void
 batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
-                                  int max_if_num, int del_if_num)
+                                  unsigned int max_if_num,
+                                  unsigned int del_if_num)
 {
        size_t chunk_size;
        size_t if_offset;
@@ -239,7 +240,8 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
  */
 static void
 batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
-                                      int max_if_num, int del_if_num)
+                                      unsigned int max_if_num,
+                                      unsigned int del_if_num)
 {
        size_t if_offset;
        void *data_ptr;
@@ -276,7 +278,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
  * Return: 0 on success, a negative error code otherwise.
  */
 static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
-                                    int max_if_num, int del_if_num)
+                                    unsigned int max_if_num,
+                                    unsigned int del_if_num)
 {
        spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
 
@@ -311,7 +314,8 @@ static struct batadv_orig_node *
 batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
 {
        struct batadv_orig_node *orig_node;
-       int size, hash_added;
+       int hash_added;
+       size_t size;
 
        orig_node = batadv_orig_hash_find(bat_priv, addr);
        if (orig_node)
@@ -893,7 +897,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
        u32 i;
        size_t word_index;
        u8 *w;
-       int if_num;
+       unsigned int if_num;
 
        for (i = 0; i < hash->size; i++) {
                head = &hash->table[i];
@@ -1023,7 +1027,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
        struct batadv_neigh_node *tmp_neigh_node = NULL;
        struct batadv_neigh_node *router = NULL;
        struct batadv_orig_node *orig_node_tmp;
-       int if_num;
+       unsigned int if_num;
        u8 sum_orig, sum_neigh;
        u8 *neigh_addr;
        u8 tq_avg;
@@ -1182,7 +1186,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
        u8 total_count;
        u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
        unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
-       int if_num;
+       unsigned int if_num;
        unsigned int tq_asym_penalty, inv_asym_penalty;
        unsigned int combined_tq;
        unsigned int tq_iface_penalty;
@@ -1702,9 +1706,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
 
        if (is_my_orig) {
                unsigned long *word;
-               int offset;
+               size_t offset;
                s32 bit_pos;
-               s16 if_num;
+               unsigned int if_num;
                u8 *weight;
 
                orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
@@ -2729,7 +2733,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
        struct batadv_neigh_ifinfo *router_ifinfo = NULL;
        struct batadv_neigh_node *router;
        struct batadv_gw_node *curr_gw;
-       int ret = -EINVAL;
+       int ret = 0;
        void *hdr;
 
        router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
index 9dc0dd5c83df40af56cabad17de86002b8701198..317cafd302cfdd62c5ed6e7b6171a3e2ee4b6214 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index 27e165ac9302bc365e1f81034a1c1353e991b724..ec93337ee2597738e46b87dd72724d5becf3f48e 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
@@ -928,7 +928,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
        struct batadv_neigh_ifinfo *router_ifinfo = NULL;
        struct batadv_neigh_node *router;
        struct batadv_gw_node *curr_gw;
-       int ret = -EINVAL;
+       int ret = 0;
        void *hdr;
 
        router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
index a17ab68bbce8ec1bda67b68102b0c313fdb1e9b7..ec4a2a569750c48d7c302a2153f3bb49fb074dea 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
index a83478c4659701630bce792575f2cb0bc39bba86..28687493599f5ba10b8813c18d803582210bc292 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
index 5e39d0588a48a4c9c7fd8d2bdf0036f4869fa0ed..e8c7b7fd290d7495e0daf0edca654bd4b06e05a3 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
index ba59b77c605d6abd4d2a37fbd2792bd70ee288c4..2948b41b06d47c0ee32649fa410b323f39c36151 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
index 6a4c14ccc3c61bbce96726db94baa1ae57505d5d..ed36c5e79fde8f72db1a123176ed46262f5c0b71 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
index bdc1ef06e05b56c86c0bc9621ec22a42261719a8..a296a4d851f5130dc24fa8e5fc324e4c6411fc8c 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
index ca9d0753dd6b92bdfc332094b2d1776989270b39..48f6832895318ee2ec43745314cb7ef3aeda4fbe 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
index fad47853ad3c5e0bae2ca2b404badc6797d7f1e9..a2de5a44bd41bf5c3d521d29b72e0b225a3ace05 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
@@ -2161,22 +2161,25 @@ batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
 {
        struct batadv_bla_claim *claim;
        int idx = 0;
+       int ret = 0;
 
        rcu_read_lock();
        hlist_for_each_entry_rcu(claim, head, hash_entry) {
                if (idx++ < *idx_skip)
                        continue;
-               if (batadv_bla_claim_dump_entry(msg, portid, seq,
-                                               primary_if, claim)) {
+
+               ret = batadv_bla_claim_dump_entry(msg, portid, seq,
+                                                 primary_if, claim);
+               if (ret) {
                        *idx_skip = idx - 1;
                        goto unlock;
                }
        }
 
-       *idx_skip = idx;
+       *idx_skip = 0;
 unlock:
        rcu_read_unlock();
-       return 0;
+       return ret;
 }
 
 /**
@@ -2391,22 +2394,25 @@ batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
 {
        struct batadv_bla_backbone_gw *backbone_gw;
        int idx = 0;
+       int ret = 0;
 
        rcu_read_lock();
        hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
                if (idx++ < *idx_skip)
                        continue;
-               if (batadv_bla_backbone_dump_entry(msg, portid, seq,
-                                                  primary_if, backbone_gw)) {
+
+               ret = batadv_bla_backbone_dump_entry(msg, portid, seq,
+                                                    primary_if, backbone_gw);
+               if (ret) {
                        *idx_skip = idx - 1;
                        goto unlock;
                }
        }
 
-       *idx_skip = idx;
+       *idx_skip = 0;
 unlock:
        rcu_read_unlock();
-       return 0;
+       return ret;
 }
 
 /**
index b27571abcd2ff2a29ae0a28c7aa52a4d8fb22483..71f95a3e4d3f335890408685432f18e5d7411a76 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
index 21d1189957a7f4a91347f0bffcb528c7a0b1fc1b..4229b01ac7b54008e023df0ed6546a6d541498ba 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index 90a08d35c501862f8cdfb039a13fc50f470a551f..37b069698b04b369e68e4e8a31c3ac01575b0178 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index 9703c791ffc5ac2fd375531073f134e6beb86141..a60bacf7120be88ba7626cf0a87dd34eef0a2eec 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
@@ -33,6 +33,7 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "send.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 #include "tvlv.h"
 
@@ -393,7 +400,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
                   batadv_arp_hw_src(skb, hdr_size), &ip_src,
                   batadv_arp_hw_dst(skb, hdr_size), &ip_dst);
 
-       if (hdr_size == 0)
+       if (hdr_size < sizeof(struct batadv_unicast_packet))
                return;
 
        unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
@@ -495,7 +502,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
         * the one with the lowest address
         */
        if (tmp_max == max && max_orig_node &&
-           batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
+           batadv_compare_eth(candidate->orig, max_orig_node->orig))
                goto out;
 
        ret = true;
@@ -851,6 +858,151 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
 }
 #endif
 
+/**
+ * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a
+ *  netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @dat_entry: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                           struct batadv_dat_entry *dat_entry)
+{
+       int msecs;
+       void *hdr;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+       if (!hdr)
+               return -ENOBUFS;
+
+       msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
+
+       if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+                           dat_entry->ip) ||
+           nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN,
+                   dat_entry->mac_addr) ||
+           nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) ||
+           nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+               genlmsg_cancel(msg, hdr);
+               return -EMSGSIZE;
+       }
+
+       genlmsg_end(msg, hdr);
+       return 0;
+}
+
+/**
+ * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to
+ *  a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+                            struct hlist_head *head, int *idx_skip)
+{
+       struct batadv_dat_entry *dat_entry;
+       int idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+               if (idx < *idx_skip)
+                       goto skip;
+
+               if (batadv_dat_cache_dump_entry(msg, portid, seq,
+                                               dat_entry)) {
+                       rcu_read_unlock();
+                       *idx_skip = idx;
+
+                       return -EMSGSIZE;
+               }
+
+skip:
+               idx++;
+       }
+       rcu_read_unlock();
+
+       return 0;
+}
+
+/**
+ * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct batadv_hard_iface *primary_if = NULL;
+       int portid = NETLINK_CB(cb->skb).portid;
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct batadv_hashtable *hash;
+       struct batadv_priv *bat_priv;
+       int bucket = cb->args[0];
+       struct hlist_head *head;
+       int idx = cb->args[1];
+       int ifindex;
+       int ret = 0;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh,
+                                            BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+               ret = -ENODEV;
+               goto out;
+       }
+
+       bat_priv = netdev_priv(soft_iface);
+       hash = bat_priv->dat.hash;
+
+       primary_if = batadv_primary_if_get_selected(bat_priv);
+       if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       while (bucket < hash->size) {
+               head = &hash->table[bucket];
+
+               if (batadv_dat_cache_dump_bucket(msg, portid,
+                                                cb->nlh->nlmsg_seq, head,
+                                                &idx))
+                       break;
+
+               bucket++;
+               idx = 0;
+       }
+
+       cb->args[0] = bucket;
+       cb->args[1] = idx;
+
+       ret = msg->len;
+
+out:
+       if (primary_if)
+               batadv_hardif_put(primary_if);
+
+       if (soft_iface)
+               dev_put(soft_iface);
+
+       return ret;
+}
+
 /**
  * batadv_arp_get_type() - parse an ARP packet and gets the type
  * @bat_priv: the bat priv with all the soft interface information
index 12897eb46268c6b1b92def9f39666280d52cfdec..a045960283375406b9a093b8c18a6f6c3882e814 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
@@ -28,6 +28,7 @@
 
 #include "originator.h"
 
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -81,6 +82,7 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
 int batadv_dat_init(struct batadv_priv *bat_priv);
 void batadv_dat_free(struct batadv_priv *bat_priv);
 int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
 /**
  * batadv_dat_inc_counter() - increment the correct DAT packet counter
@@ -169,6 +171,12 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv)
 {
 }
 
+static inline int
+batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
                                          u8 subtype)
 {
index 22dde42fd80e63e79faebfdb0d807786f1b9a4b5..0fddc17106bd8a0e3f064fee9adba7c226f34682 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
  *
@@ -288,7 +288,8 @@ batadv_frag_merge_packets(struct hlist_head *chain)
        /* Move the existing MAC header to just before the payload. (Override
         * the fragment header.)
         */
-       skb_pull_rcsum(skb_out, hdr_size);
+       skb_pull(skb_out, hdr_size);
+       skb_out->ip_summed = CHECKSUM_NONE;
        memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN);
        skb_set_mac_header(skb_out, -ETH_HLEN);
        skb_reset_network_header(skb_out);
index 138b22a1836aff700e8be0ae406ed69a3ae21990..944512e0778290c3539193b262c83e4bebc19754 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
  *
index 37fe9a644f22f29d2fdd360f1f6bd63a22cd2680..c294f6fd43e03962bac21072ac096fd9a97f2c09 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index 981f58421a323b9d9941450fdc6fb693199283f0..f0b86fcb249386b746d77f1abaefadc62d222b8c 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index b3e156af2256cff85a1d362112ccf577ac7fa348..936c107f31991215d332bff6a7b9f7897f1089c1 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index afebd9c7edf454ed50e3ddd16211c4a4d3ac5f33..80afb2793687317f7d6f0e8a8563b9808e6cbdb8 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index 5f186bff284a37132eaa4862854308163269a22f..c405d15befd60bdabf9f50813c3bee446238d539 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -763,6 +763,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
        hard_iface->soft_iface = soft_iface;
        bat_priv = netdev_priv(hard_iface->soft_iface);
 
+       if (bat_priv->num_ifaces >= UINT_MAX) {
+               ret = -ENOSPC;
+               goto err_dev;
+       }
+
        ret = netdev_master_upper_dev_link(hard_iface->net_dev,
                                           soft_iface, NULL, NULL, NULL);
        if (ret)
@@ -876,7 +881,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
        batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface);
 
        /* nobody uses this interface anymore */
-       if (!bat_priv->num_ifaces) {
+       if (bat_priv->num_ifaces == 0) {
                batadv_gw_check_client_stop(bat_priv);
 
                if (autodel == BATADV_IF_CLEANUP_AUTO)
@@ -912,7 +917,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
        if (ret)
                goto free_if;
 
-       hard_iface->if_num = -1;
+       hard_iface->if_num = 0;
        hard_iface->net_dev = net_dev;
        hard_iface->soft_iface = NULL;
        hard_iface->if_status = BATADV_IF_NOT_IN_USE;
index de5e9a374ece5f05ae36399cef49f062000a532c..d1c0f61893016b9a12fa81ba1d2de68172cbca96 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index 04d964358c98750763ab4c9c6952f82ae115de99..7b49e4001778f0a53d14eb76ee96595a9a02e775 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
index 4ce1b6d3ad5c7d570697adf5b4f84a24d92faef1..9490a7ca2ba698399b2247799dd85004da5d04c8 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
index e91f29c7c638a5f9fede10b08cdd11f37d2ee1e6..55c358ad3331f817b3911d21b6cbec7ddec2d031 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -24,6 +24,7 @@
 #include <linux/debugfs.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
+#include <linux/eventpoll.h>
 #include <linux/export.h>
 #include <linux/fcntl.h>
 #include <linux/fs.h>
index 84cddd01eeab8f77be5bdca6b72bd4032c16a03a..958be22beda9f460699a9a4633ef84c378edb621 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index dc9fa37ddd14145b90053e59beb78211f3a22d23..853773e45f7921bccc142ed4208ff2d377134b49 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
@@ -22,6 +22,7 @@
 #include <linux/compiler.h>
 #include <linux/debugfs.h>
 #include <linux/errno.h>
+#include <linux/eventpoll.h>
 #include <linux/export.h>
 #include <linux/fcntl.h>
 #include <linux/fs.h>
index 35e02b2b9e72bd046cd342f099ae633f5245efdb..35f4f397ed57dd351ca2eb4135ddaffd84235d61 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index d31c8266e244ec4e508c719975fbe698ea35047d..69c0d85bceb3e0a1915e37d278110ee2655c4571 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index f7ba3f96d8f36be0f49541fa49454dda6f73bcee..057a28a9fe8803114bd5d04334dcc965f3884b18 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -25,7 +25,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.0"
+#define BATADV_SOURCE_VERSION "2018.1"
 #endif
 
 /* B.A.T.M.A.N. parameters */
@@ -331,11 +331,13 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
  *
  * Return: true when x is a predecessor of y, false otherwise
  */
-#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
-                                typeof(y)_d2 = (y); \
-                                typeof(x)_dummy = (_d1 - _d2); \
-                                (void)(&_d1 == &_d2); \
-                                _dummy > batadv_smallest_signed_int(_dummy); })
+#define batadv_seq_before(x, y) ({ \
+       typeof(x)_d1 = (x); \
+       typeof(y)_d2 = (y); \
+       typeof(x)_dummy = (_d1 - _d2); \
+       (void)(&_d1 == &_d2); \
+       _dummy > batadv_smallest_signed_int(_dummy); \
+})
 
 /**
  * batadv_seq_after() - Checks if a sequence number x is a successor of y
index cbdeb47ec3f606ddfe53a122392d82576914480b..de3a055f7dd8af8b8a4678bb2bc4768ed0c51acf 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <net/addrconf.h>
+#include <net/genetlink.h>
 #include <net/if_inet6.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/sock.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 #include "tvlv.h"
 
@@ -101,8 +108,37 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
        return upper;
 }
 
+/**
+ * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv4 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+{
+       static const u8 prefix[] = {0x01, 0x00, 0x5E};
+
+       return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
+ * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv6 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+{
+       static const u8 prefix[] = {0x33, 0x33};
+
+       return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
 /**
  * batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
  * @dev: the device to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
@@ -119,9 +155,12 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
  */
-static int batadv_mcast_mla_softif_get(struct net_device *dev,
+static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
+                                      struct net_device *dev,
                                       struct hlist_head *mcast_list)
 {
+       bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+       bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
        struct net_device *bridge = batadv_mcast_get_bridge(dev);
        struct netdev_hw_addr *mc_list_entry;
        struct batadv_hw_addr *new;
@@ -129,6 +168,12 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
 
        netif_addr_lock_bh(bridge ? bridge : dev);
        netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
+               if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
+                       continue;
+
+               if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
+                       continue;
+
                new = kmalloc(sizeof(*new), GFP_ATOMIC);
                if (!new) {
                        ret = -ENOMEM;
@@ -193,6 +238,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
 
 /**
  * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
  * @dev: a bridge slave whose bridge to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
@@ -204,10 +250,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
  */
-static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
+                                      struct net_device *dev,
                                       struct hlist_head *mcast_list)
 {
        struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+       bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+       bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
        struct br_ip_list *br_ip_entry, *tmp;
        struct batadv_hw_addr *new;
        u8 mcast_addr[ETH_ALEN];
@@ -221,6 +270,12 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
                goto out;
 
        list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+               if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
+                       continue;
+
+               if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
+                       continue;
+
                batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
                if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
                        continue;
@@ -543,8 +598,8 @@ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
                bat_priv->mcast.enabled = true;
        }
 
-       return !(mcast_data.flags &
-                (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
+       return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 &&
+                mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6);
 }
 
 /**
@@ -568,11 +623,11 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
        if (!batadv_mcast_mla_tvlv_update(bat_priv))
                goto update;
 
-       ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list);
+       ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
        if (ret < 0)
                goto out;
 
-       ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+       ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
        if (ret < 0)
                goto out;
 
@@ -1285,6 +1340,236 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
 }
 #endif
 
+/**
+ * batadv_mcast_mesh_info_put() - put multicast info into a netlink message
+ * @msg: buffer for the message
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 or error code.
+ */
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+                              struct batadv_priv *bat_priv)
+{
+       u32 flags = bat_priv->mcast.flags;
+       u32 flags_priv = BATADV_NO_FLAGS;
+
+       if (bat_priv->mcast.bridged) {
+               flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
+
+               if (bat_priv->mcast.querier_ipv4.exists)
+                       flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
+               if (bat_priv->mcast.querier_ipv6.exists)
+                       flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
+               if (bat_priv->mcast.querier_ipv4.shadowing)
+                       flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
+               if (bat_priv->mcast.querier_ipv6.shadowing)
+                       flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
+       }
+
+       if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) ||
+           nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv))
+               return -EMSGSIZE;
+
+       return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table
+ *  to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @orig_node: originator to dump the multicast flags of
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+                             struct batadv_orig_node *orig_node)
+{
+       void *hdr;
+
+       hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+                         NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+       if (!hdr)
+               return -ENOBUFS;
+
+       if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+                   orig_node->orig)) {
+               genlmsg_cancel(msg, hdr);
+               return -EMSGSIZE;
+       }
+
+       if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+                    &orig_node->capabilities)) {
+               if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS,
+                               orig_node->mcast_flags)) {
+                       genlmsg_cancel(msg, hdr);
+                       return -EMSGSIZE;
+               }
+       }
+
+       genlmsg_end(msg, hdr);
+       return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags
+ *  table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+                              struct hlist_head *head, long *idx_skip)
+{
+       struct batadv_orig_node *orig_node;
+       long idx = 0;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+               if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+                             &orig_node->capa_initialized))
+                       continue;
+
+               if (idx < *idx_skip)
+                       goto skip;
+
+               if (batadv_mcast_flags_dump_entry(msg, portid, seq,
+                                                 orig_node)) {
+                       rcu_read_unlock();
+                       *idx_skip = idx;
+
+                       return -EMSGSIZE;
+               }
+
+skip:
+               idx++;
+       }
+       rcu_read_unlock();
+
+       return 0;
+}
+
+/**
+ * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bucket: current bucket to dump
+ * @idx: index in current bucket to the next entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+                         struct batadv_priv *bat_priv, long *bucket, long *idx)
+{
+       struct batadv_hashtable *hash = bat_priv->orig_hash;
+       long bucket_tmp = *bucket;
+       struct hlist_head *head;
+       long idx_tmp = *idx;
+
+       while (bucket_tmp < hash->size) {
+               head = &hash->table[bucket_tmp];
+
+               if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
+                                                  &idx_tmp))
+                       break;
+
+               bucket_tmp++;
+               idx_tmp = 0;
+       }
+
+       *bucket = bucket_tmp;
+       *idx = idx_tmp;
+
+       return msg->len;
+}
+
+/**
+ * batadv_mcast_netlink_get_primary() - get primary interface from netlink
+ *  callback
+ * @cb: netlink callback structure
+ * @primary_if: the primary interface pointer to return the result in
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
+                                struct batadv_hard_iface **primary_if)
+{
+       struct batadv_hard_iface *hard_iface = NULL;
+       struct net *net = sock_net(cb->skb->sk);
+       struct net_device *soft_iface;
+       struct batadv_priv *bat_priv;
+       int ifindex;
+       int ret = 0;
+
+       ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+       if (!ifindex)
+               return -EINVAL;
+
+       soft_iface = dev_get_by_index(net, ifindex);
+       if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+               ret = -ENODEV;
+               goto out;
+       }
+
+       bat_priv = netdev_priv(soft_iface);
+
+       hard_iface = batadv_primary_if_get_selected(bat_priv);
+       if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+out:
+       if (soft_iface)
+               dev_put(soft_iface);
+
+       if (!ret && primary_if)
+               *primary_if = hard_iface;
+       else
+               batadv_hardif_put(hard_iface);
+
+       return ret;
+}
+
+/**
+ * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct batadv_hard_iface *primary_if = NULL;
+       int portid = NETLINK_CB(cb->skb).portid;
+       struct batadv_priv *bat_priv;
+       long *bucket = &cb->args[0];
+       long *idx = &cb->args[1];
+       int ret;
+
+       ret = batadv_mcast_netlink_get_primary(cb, &primary_if);
+       if (ret)
+               return ret;
+
+       bat_priv = netdev_priv(primary_if->soft_iface);
+       ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
+                                       bat_priv, bucket, idx);
+
+       batadv_hardif_put(primary_if);
+       return ret;
+}
+
 /**
  * batadv_mcast_free() - free the multicast optimizations structures
  * @bat_priv: the bat priv with all the soft interface information
index 3ac06337ab715147906ad91b4768af33003794b2..3b04ab13f0eb1044454315c04e75a22ce4351afd 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
@@ -21,6 +21,7 @@
 
 #include "main.h"
 
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -54,6 +55,11 @@ void batadv_mcast_init(struct batadv_priv *bat_priv);
 
 int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
 
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+                              struct batadv_priv *bat_priv);
+
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb);
+
 void batadv_mcast_free(struct batadv_priv *bat_priv);
 
 void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
@@ -72,6 +78,18 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
        return 0;
 }
 
+static inline int
+batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv)
+{
+       return 0;
+}
+
+static inline int batadv_mcast_flags_dump(struct sk_buff *msg,
+                                         struct netlink_callback *cb)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
 {
 }
index a823d3899bad37b08bd46c2f1902060257b289fb..0d9459b69bdb812b1b68e28e6b68fec8ec95df2d 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
+#include "multicast.h"
 #include "originator.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
@@ -64,39 +66,44 @@ static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
 };
 
 static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
-       [BATADV_ATTR_VERSION]           = { .type = NLA_STRING },
-       [BATADV_ATTR_ALGO_NAME]         = { .type = NLA_STRING },
-       [BATADV_ATTR_MESH_IFINDEX]      = { .type = NLA_U32 },
-       [BATADV_ATTR_MESH_IFNAME]       = { .type = NLA_STRING },
-       [BATADV_ATTR_MESH_ADDRESS]      = { .len = ETH_ALEN },
-       [BATADV_ATTR_HARD_IFINDEX]      = { .type = NLA_U32 },
-       [BATADV_ATTR_HARD_IFNAME]       = { .type = NLA_STRING },
-       [BATADV_ATTR_HARD_ADDRESS]      = { .len = ETH_ALEN },
-       [BATADV_ATTR_ORIG_ADDRESS]      = { .len = ETH_ALEN },
-       [BATADV_ATTR_TPMETER_RESULT]    = { .type = NLA_U8 },
-       [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
-       [BATADV_ATTR_TPMETER_BYTES]     = { .type = NLA_U64 },
-       [BATADV_ATTR_TPMETER_COOKIE]    = { .type = NLA_U32 },
-       [BATADV_ATTR_ACTIVE]            = { .type = NLA_FLAG },
-       [BATADV_ATTR_TT_ADDRESS]        = { .len = ETH_ALEN },
-       [BATADV_ATTR_TT_TTVN]           = { .type = NLA_U8 },
-       [BATADV_ATTR_TT_LAST_TTVN]      = { .type = NLA_U8 },
-       [BATADV_ATTR_TT_CRC32]          = { .type = NLA_U32 },
-       [BATADV_ATTR_TT_VID]            = { .type = NLA_U16 },
-       [BATADV_ATTR_TT_FLAGS]          = { .type = NLA_U32 },
-       [BATADV_ATTR_FLAG_BEST]         = { .type = NLA_FLAG },
-       [BATADV_ATTR_LAST_SEEN_MSECS]   = { .type = NLA_U32 },
-       [BATADV_ATTR_NEIGH_ADDRESS]     = { .len = ETH_ALEN },
-       [BATADV_ATTR_TQ]                = { .type = NLA_U8 },
-       [BATADV_ATTR_THROUGHPUT]        = { .type = NLA_U32 },
-       [BATADV_ATTR_BANDWIDTH_UP]      = { .type = NLA_U32 },
-       [BATADV_ATTR_BANDWIDTH_DOWN]    = { .type = NLA_U32 },
-       [BATADV_ATTR_ROUTER]            = { .len = ETH_ALEN },
-       [BATADV_ATTR_BLA_OWN]           = { .type = NLA_FLAG },
-       [BATADV_ATTR_BLA_ADDRESS]       = { .len = ETH_ALEN },
-       [BATADV_ATTR_BLA_VID]           = { .type = NLA_U16 },
-       [BATADV_ATTR_BLA_BACKBONE]      = { .len = ETH_ALEN },
-       [BATADV_ATTR_BLA_CRC]           = { .type = NLA_U16 },
+       [BATADV_ATTR_VERSION]                   = { .type = NLA_STRING },
+       [BATADV_ATTR_ALGO_NAME]                 = { .type = NLA_STRING },
+       [BATADV_ATTR_MESH_IFINDEX]              = { .type = NLA_U32 },
+       [BATADV_ATTR_MESH_IFNAME]               = { .type = NLA_STRING },
+       [BATADV_ATTR_MESH_ADDRESS]              = { .len = ETH_ALEN },
+       [BATADV_ATTR_HARD_IFINDEX]              = { .type = NLA_U32 },
+       [BATADV_ATTR_HARD_IFNAME]               = { .type = NLA_STRING },
+       [BATADV_ATTR_HARD_ADDRESS]              = { .len = ETH_ALEN },
+       [BATADV_ATTR_ORIG_ADDRESS]              = { .len = ETH_ALEN },
+       [BATADV_ATTR_TPMETER_RESULT]            = { .type = NLA_U8 },
+       [BATADV_ATTR_TPMETER_TEST_TIME]         = { .type = NLA_U32 },
+       [BATADV_ATTR_TPMETER_BYTES]             = { .type = NLA_U64 },
+       [BATADV_ATTR_TPMETER_COOKIE]            = { .type = NLA_U32 },
+       [BATADV_ATTR_ACTIVE]                    = { .type = NLA_FLAG },
+       [BATADV_ATTR_TT_ADDRESS]                = { .len = ETH_ALEN },
+       [BATADV_ATTR_TT_TTVN]                   = { .type = NLA_U8 },
+       [BATADV_ATTR_TT_LAST_TTVN]              = { .type = NLA_U8 },
+       [BATADV_ATTR_TT_CRC32]                  = { .type = NLA_U32 },
+       [BATADV_ATTR_TT_VID]                    = { .type = NLA_U16 },
+       [BATADV_ATTR_TT_FLAGS]                  = { .type = NLA_U32 },
+       [BATADV_ATTR_FLAG_BEST]                 = { .type = NLA_FLAG },
+       [BATADV_ATTR_LAST_SEEN_MSECS]           = { .type = NLA_U32 },
+       [BATADV_ATTR_NEIGH_ADDRESS]             = { .len = ETH_ALEN },
+       [BATADV_ATTR_TQ]                        = { .type = NLA_U8 },
+       [BATADV_ATTR_THROUGHPUT]                = { .type = NLA_U32 },
+       [BATADV_ATTR_BANDWIDTH_UP]              = { .type = NLA_U32 },
+       [BATADV_ATTR_BANDWIDTH_DOWN]            = { .type = NLA_U32 },
+       [BATADV_ATTR_ROUTER]                    = { .len = ETH_ALEN },
+       [BATADV_ATTR_BLA_OWN]                   = { .type = NLA_FLAG },
+       [BATADV_ATTR_BLA_ADDRESS]               = { .len = ETH_ALEN },
+       [BATADV_ATTR_BLA_VID]                   = { .type = NLA_U16 },
+       [BATADV_ATTR_BLA_BACKBONE]              = { .len = ETH_ALEN },
+       [BATADV_ATTR_BLA_CRC]                   = { .type = NLA_U16 },
+       [BATADV_ATTR_DAT_CACHE_IP4ADDRESS]      = { .type = NLA_U32 },
+       [BATADV_ATTR_DAT_CACHE_HWADDRESS]       = { .len = ETH_ALEN },
+       [BATADV_ATTR_DAT_CACHE_VID]             = { .type = NLA_U16 },
+       [BATADV_ATTR_MCAST_FLAGS]               = { .type = NLA_U32 },
+       [BATADV_ATTR_MCAST_FLAGS_PRIV]          = { .type = NLA_U32 },
 };
 
 /**
@@ -147,6 +154,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
                goto out;
 #endif
 
+       if (batadv_mcast_mesh_info_put(msg, bat_priv))
+               goto out;
+
        primary_if = batadv_primary_if_get_selected(bat_priv);
        if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
                hard_iface = primary_if->net_dev;
@@ -604,6 +614,18 @@ static const struct genl_ops batadv_netlink_ops[] = {
                .policy = batadv_netlink_policy,
                .dumpit = batadv_bla_backbone_dump,
        },
+       {
+               .cmd = BATADV_CMD_GET_DAT_CACHE,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_dat_cache_dump,
+       },
+       {
+               .cmd = BATADV_CMD_GET_MCAST_FLAGS,
+               .flags = GENL_ADMIN_PERM,
+               .policy = batadv_netlink_policy,
+               .dumpit = batadv_mcast_flags_dump,
+       },
 
 };
 
index 0e7e57b69b54ee95adc6a1fb34246dfcd0ebb244..571d9a5ae7aaaf043629949fae141fd2f4511cee 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
index b48116bb24ef100c3f3f6c08f76c3d01c39ed9e1..c3578444f3cbe759a5385ac460ccb9d41ae1c4de 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
index adaeafa4f71ef4c02971fe7cb0051f0d856636d6..65c346812bc11e7b9c32698ab50b435c321743d0 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
index 58a7d9274435cffe99fdb661ce7005f5f6da35c1..716e5b43acfae598c99b8fc1a491276aad0a2d6f 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -1569,7 +1569,7 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
  * Return: 0 on success or negative error number in case of failure
  */
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
-                           int max_if_num)
+                           unsigned int max_if_num)
 {
        struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
        struct batadv_algo_ops *bao = bat_priv->algo_ops;
@@ -1611,7 +1611,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
  * Return: 0 on success or negative error number in case of failure
  */
 int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
-                           int max_if_num)
+                           unsigned int max_if_num)
 {
        struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
        struct batadv_hashtable *hash = bat_priv->orig_hash;
index 8e543a3cdc6c310297be13a5253ef33819b9df68..3b3f59b881e19878bd7c956ea0288a8452399afd 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -73,9 +73,9 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
-                           int max_if_num);
+                           unsigned int max_if_num);
 int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
-                           int max_if_num);
+                           unsigned int max_if_num);
 struct batadv_orig_node_vlan *
 batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
                          unsigned short vid);
index b6891e8b741c424496c58a21944f5bf30cff02ff..cc3ed93a6d513dffd4711cac50545d65ef7d640e 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -759,6 +759,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 /**
  * batadv_reroute_unicast_packet() - update the unicast header for re-routing
  * @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast packet to process
  * @unicast_packet: the unicast header to be updated
  * @dst_addr: the payload destination
  * @vid: VLAN identifier
@@ -770,7 +771,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
  * Return: true if the packet header has been updated, false otherwise
  */
 static bool
-batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
+batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
                              struct batadv_unicast_packet *unicast_packet,
                              u8 *dst_addr, unsigned short vid)
 {
@@ -799,8 +800,10 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
        }
 
        /* update the packet header */
+       skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
        ether_addr_copy(unicast_packet->dest, orig_addr);
        unicast_packet->ttvn = orig_ttvn;
+       skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 
        ret = true;
 out:
@@ -841,7 +844,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
         * the packet to
         */
        if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) {
-               if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+               if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
                                                  ethhdr->h_dest, vid))
                        batadv_dbg_ratelimited(BATADV_DBG_TT,
                                               bat_priv,
@@ -887,7 +890,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
         * destination can possibly be updated and forwarded towards the new
         * target host
         */
-       if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+       if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
                                          ethhdr->h_dest, vid)) {
                batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv,
                                       "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
@@ -910,12 +913,14 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
        if (!primary_if)
                return false;
 
+       /* update the packet header */
+       skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
        ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
+       unicast_packet->ttvn = curr_ttvn;
+       skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 
        batadv_hardif_put(primary_if);
 
-       unicast_packet->ttvn = curr_ttvn;
-
        return true;
 }
 
@@ -968,14 +973,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
        struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL;
        int check, hdr_size = sizeof(*unicast_packet);
        enum batadv_subtype subtype;
-       struct ethhdr *ethhdr;
        int ret = NET_RX_DROP;
        bool is4addr, is_gw;
 
        unicast_packet = (struct batadv_unicast_packet *)skb->data;
-       unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
-       ethhdr = eth_hdr(skb);
-
        is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR;
        /* the caller function should have already pulled 2 bytes */
        if (is4addr)
@@ -995,12 +996,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
        if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
                goto free_skb;
 
+       unicast_packet = (struct batadv_unicast_packet *)skb->data;
+
        /* packet for me */
        if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
                /* If this is a unicast packet from another backgone gw,
                 * drop it.
                 */
-               orig_addr_gw = ethhdr->h_source;
+               orig_addr_gw = eth_hdr(skb)->h_source;
                orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw);
                if (orig_node_gw) {
                        is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw,
@@ -1015,6 +1018,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
                }
 
                if (is4addr) {
+                       unicast_4addr_packet =
+                               (struct batadv_unicast_4addr_packet *)skb->data;
                        subtype = unicast_4addr_packet->subtype;
                        batadv_dat_inc_counter(bat_priv, subtype);
 
index a1289bc5f1159f4fd438352fc74b666e8f7c331e..db54c2d9b8bfb7bf45cdc7e4f5cff6429c6ff7d7 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index 2a5ab6f1076d44a8ea094e60b5ad470f36b1d881..4a35f5c2f52ba60d57f564daba05de5c2829ee5b 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index 1e8c79093623ea82b8cb35cd0a20ab6fac4f0fdd..64cce07b8fe62da8c36898a9259816d3353c504b 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index 900c5ce21cd410dc77bc0e008ae714c95e6af55a..edeffcb9f3a24e1b53c2b4d705fb260717ac09c4 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -459,13 +459,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
 
        /* skb->dev & skb->pkt_type are set here */
        skb->protocol = eth_type_trans(skb, soft_iface);
-
-       /* should not be necessary anymore as we use skb_pull_rcsum()
-        * TODO: please verify this and remove this TODO
-        * -- Dec 21st 2009, Simon Wunderlich
-        */
-
-       /* skb->ip_summed = CHECKSUM_UNNECESSARY; */
+       skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 
        batadv_inc_counter(bat_priv, BATADV_CNT_RX);
        batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
index 075c5b5b2ce1cb08bca5c16a7fa90f478098cc87..daf87f07fadd8e8a4d82d35a0d268bc9c3f2d3c3 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index c1578fa0b952ab2e997d62e4f95f2111df063882..f2eef43bd2ec5b798ba552ff14eedcfa734b39d6 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index bbeee61221fae6b9e1ca69cb4da8fc23483705a9..c1e3fb69952df67dde8c13977544926a27e62767 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
index 8b576712d0c1f772bdeba0b95575e37fad007f09..11520de96ccb1a87183e9666066e21731538ccd9 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
index c8b8f2cb2c2b507b3ef16426ffb34b3146e18c86..68e600974759a9b005062b1689f91e0dbffb1b30 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
index 7550a9ccd69537d55019b124834503283313607b..0225616d5771d0986127322142fc591780fc25b0 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
index 8d9e3abec2c84e2d0f5453d06c1d59e41fc1d48e..01b6c8eafaf91322944265233946619fda673381 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
index 5ffcb45ac6ffab4cf51298a64c31470a51e23e08..a637458205d16bf838f796383d8cc15ac861801b 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index a74df33f446d57473ba9bc633534ebafd8c3c3d3..ef5867f49824feaa0786843a46cac9f952444380 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
index bb1578410e0cfa390a496299e3c0ed73925e8410..476b052ad9824d4cbcd6218dce40b603e3400fd2 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
@@ -167,7 +167,7 @@ struct batadv_hard_iface {
        struct list_head list;
 
        /** @if_num: identificator of the interface */
-       s16 if_num;
+       unsigned int if_num;
 
        /** @if_status: status of the interface for batman-adv */
        char if_status;
@@ -1596,7 +1596,7 @@ struct batadv_priv {
        atomic_t batman_queue_left;
 
        /** @num_ifaces: number of interfaces assigned to this mesh interface */
-       char num_ifaces;
+       unsigned int num_ifaces;
 
        /** @mesh_obj: kobject for sysfs mesh subdirectory */
        struct kobject *mesh_obj;
@@ -2186,15 +2186,16 @@ struct batadv_algo_orig_ops {
         *  orig_node due to a new hard-interface being added into the mesh
         *  (optional)
         */
-       int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
+       int (*add_if)(struct batadv_orig_node *orig_node,
+                     unsigned int max_if_num);
 
        /**
         * @del_if: ask the routing algorithm to apply the needed changes to the
         *  orig_node due to an hard-interface being removed from the mesh
         *  (optional)
         */
-       int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
-                     int del_if_num);
+       int (*del_if)(struct batadv_orig_node *orig_node,
+                     unsigned int max_if_num, unsigned int del_if_num);
 
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
        /** @print: print the originator table (optional) */
index 01117ae84f1d3f1246626396fbcf66ee2a532792..a2ddae2f37d7a264ed3d75be73624e0eae51655c 100644 (file)
@@ -2296,8 +2296,14 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
        else
                sec_level = authreq_to_seclevel(auth);
 
-       if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK))
+       if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) {
+               /* If link is already encrypted with sufficient security we
+                * still need refresh encryption as per Core Spec 5.0 Vol 3,
+                * Part H 2.4.6
+                */
+               smp_ltk_encrypt(conn, hcon->sec_level);
                return 0;
+       }
 
        if (sec_level > hcon->pending_sec_level)
                hcon->pending_sec_level = sec_level;
index 6bf06e756df24b1e52e56816bc2078ba2b311144..a3f95ab9d6a38c67e6c0b599f9009ae9a727bc1b 100644 (file)
@@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 
        switch (event) {
        case NETDEV_CHANGEMTU:
-               dev_set_mtu(br->dev, br_min_mtu(br));
+               dev_set_mtu(br->dev, br_mtu(br));
                break;
 
        case NETDEV_CHANGEADDR:
@@ -188,6 +188,7 @@ static void __net_exit br_net_exit(struct net *net)
 
 static struct pernet_operations br_net_ops = {
        .exit   = br_net_exit,
+       .async  = true,
 };
 
 static const struct stp_proto br_stp_proto = {
index 1285ca30ab0a0b3c6c7acc0fefca166a209d0a94..278fc999d3550f278acdd336233d9cf8b8d47062 100644 (file)
@@ -224,7 +224,7 @@ static void br_get_stats64(struct net_device *dev,
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
        struct net_bridge *br = netdev_priv(dev);
-       if (new_mtu > br_min_mtu(br))
+       if (new_mtu > br_mtu(br))
                return -EINVAL;
 
        dev->mtu = new_mtu;
index 9ba4ed65c52ba83435b708db7d2643179336a8a5..87b2afd455c7f7fa2e720672741e0dd2d85fe244 100644 (file)
@@ -424,8 +424,18 @@ int br_del_bridge(struct net *net, const char *name)
        return ret;
 }
 
+static bool min_mtu(int a, int b)
+{
+       return a < b ? 1 : 0;
+}
+
+static bool max_mtu(int a, int b)
+{
+       return a > b ? 1 : 0;
+}
+
 /* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
-int br_min_mtu(const struct net_bridge *br)
+static int __br_mtu(const struct net_bridge *br, bool (compare_fn)(int, int))
 {
        const struct net_bridge_port *p;
        int mtu = 0;
@@ -436,13 +446,21 @@ int br_min_mtu(const struct net_bridge *br)
                mtu = ETH_DATA_LEN;
        else {
                list_for_each_entry(p, &br->port_list, list) {
-                       if (!mtu  || p->dev->mtu < mtu)
+                       if (!mtu || compare_fn(p->dev->mtu, mtu))
                                mtu = p->dev->mtu;
                }
        }
        return mtu;
 }
 
+int br_mtu(const struct net_bridge *br)
+{
+       if (br_vlan_enabled(br->dev))
+               return __br_mtu(br, max_mtu);
+       else
+               return __br_mtu(br, min_mtu);
+}
+
 static void br_set_gso_limits(struct net_bridge *br)
 {
        unsigned int gso_max_size = GSO_MAX_SIZE;
@@ -594,7 +612,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
        if (changed_addr)
                call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
 
-       dev_set_mtu(br->dev, br_min_mtu(br));
+       dev_set_mtu(br->dev, br_mtu(br));
        br_set_gso_limits(br);
 
        kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -641,7 +659,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
         */
        del_nbp(p);
 
-       dev_set_mtu(br->dev, br_min_mtu(br));
+       dev_set_mtu(br->dev, br_mtu(br));
        br_set_gso_limits(br);
 
        spin_lock_bh(&br->lock);
index 27f1d4f2114ab9dadf908dfd10105c9daac30230..c2120eb889a914b336b910143250d990c9245104 100644 (file)
@@ -214,7 +214,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
 
        iph = ip_hdr(skb);
        if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
-               goto inhdr_error;
+               goto csum_error;
 
        len = ntohs(iph->tot_len);
        if (skb->len < len) {
@@ -236,6 +236,8 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
         */
        return 0;
 
+csum_error:
+       __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
 inhdr_error:
        __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
 drop:
@@ -967,6 +969,7 @@ static struct pernet_operations brnf_net_ops __read_mostly = {
        .exit = brnf_exit_net,
        .id   = &brnf_net_id,
        .size = sizeof(struct brnf_net),
+       .async = true,
 };
 
 static struct notifier_block brnf_notifier __read_mostly = {
index 8e13a64d8c99e382f094480dda40c8eb8d43df8d..048d5b51813bac1e295c65b43d598d997892e95c 100644 (file)
@@ -578,7 +578,7 @@ int br_del_bridge(struct net *net, const char *name);
 int br_add_if(struct net_bridge *br, struct net_device *dev,
              struct netlink_ext_ack *extack);
 int br_del_if(struct net_bridge *br, struct net_device *dev);
-int br_min_mtu(const struct net_bridge *br);
+int br_mtu(const struct net_bridge *br);
 netdev_features_t br_features_recompute(struct net_bridge *br,
                                        netdev_features_t features);
 void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
index 51935270c6512529b87bb252dcc64f6be17719f1..9896f4975353db00af2ebf7432633c5e84ff7f7d 100644 (file)
@@ -168,6 +168,8 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
                masterv = br_vlan_find(vg, vid);
                if (WARN_ON(!masterv))
                        return NULL;
+               refcount_set(&masterv->refcnt, 1);
+               return masterv;
        }
        refcount_inc(&masterv->refcnt);
 
index ce7152a12bd8652f2941a5e643d2e4200400d756..620e54f082965daef2646abbff8302dc9c39ed7e 100644 (file)
@@ -172,18 +172,69 @@ ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par)
        return true;
 }
 
+static bool poolsize_invalid(const struct ebt_mac_wormhash *w)
+{
+       return w && w->poolsize >= (INT_MAX / sizeof(struct ebt_mac_wormhash_tuple));
+}
+
+static bool wormhash_offset_invalid(int off, unsigned int len)
+{
+       if (off == 0) /* not present */
+               return false;
+
+       if (off < (int)sizeof(struct ebt_among_info) ||
+           off % __alignof__(struct ebt_mac_wormhash))
+               return true;
+
+       off += sizeof(struct ebt_mac_wormhash);
+
+       return off > len;
+}
+
+static bool wormhash_sizes_valid(const struct ebt_mac_wormhash *wh, int a, int b)
+{
+       if (a == 0)
+               a = sizeof(struct ebt_among_info);
+
+       return ebt_mac_wormhash_size(wh) + a == b;
+}
+
 static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 {
        const struct ebt_among_info *info = par->matchinfo;
        const struct ebt_entry_match *em =
                container_of(par->matchinfo, const struct ebt_entry_match, data);
-       int expected_length = sizeof(struct ebt_among_info);
+       unsigned int expected_length = sizeof(struct ebt_among_info);
        const struct ebt_mac_wormhash *wh_dst, *wh_src;
        int err;
 
+       if (expected_length > em->match_size)
+               return -EINVAL;
+
+       if (wormhash_offset_invalid(info->wh_dst_ofs, em->match_size) ||
+           wormhash_offset_invalid(info->wh_src_ofs, em->match_size))
+               return -EINVAL;
+
        wh_dst = ebt_among_wh_dst(info);
-       wh_src = ebt_among_wh_src(info);
+       if (poolsize_invalid(wh_dst))
+               return -EINVAL;
+
        expected_length += ebt_mac_wormhash_size(wh_dst);
+       if (expected_length > em->match_size)
+               return -EINVAL;
+
+       wh_src = ebt_among_wh_src(info);
+       if (poolsize_invalid(wh_src))
+               return -EINVAL;
+
+       if (info->wh_src_ofs < info->wh_dst_ofs) {
+               if (!wormhash_sizes_valid(wh_src, info->wh_src_ofs, info->wh_dst_ofs))
+                       return -EINVAL;
+       } else {
+               if (!wormhash_sizes_valid(wh_dst, info->wh_dst_ofs, info->wh_src_ofs))
+                       return -EINVAL;
+       }
+
        expected_length += ebt_mac_wormhash_size(wh_src);
 
        if (em->match_size != EBT_ALIGN(expected_length)) {
index 276b60262981c95a9fccd508e8d8123212d535de..f070b5e5b9dd37f78877457e9355ae5eaf3871bd 100644 (file)
@@ -77,6 +77,7 @@ static void __net_exit broute_net_exit(struct net *net)
 static struct pernet_operations broute_net_ops = {
        .init = broute_net_init,
        .exit = broute_net_exit,
+       .async = true,
 };
 
 static int __init ebtable_broute_init(void)
index c41da5fac84f49a9cf5e58eaab88b3beb2d81fba..4151afc8efcc1f1cc41d9ea947d46e88670a6f63 100644 (file)
@@ -105,6 +105,7 @@ static void __net_exit frame_filter_net_exit(struct net *net)
 static struct pernet_operations frame_filter_net_ops = {
        .init = frame_filter_net_init,
        .exit = frame_filter_net_exit,
+       .async = true,
 };
 
 static int __init ebtable_filter_init(void)
index 08df7406ecb3835a664a695a239d73f62eeaf457..b8da2dfe2ec50f61c2587eb74763291eb227c6d1 100644 (file)
@@ -105,6 +105,7 @@ static void __net_exit frame_nat_net_exit(struct net *net)
 static struct pernet_operations frame_nat_net_ops = {
        .init = frame_nat_net_init,
        .exit = frame_nat_net_exit,
+       .async = true,
 };
 
 static int __init ebtable_nat_init(void)
index 02c4b409d31733c6192110139570e48980aa1740..a94d23b0a9af30babff7469c09e9acbf51c1c260 100644 (file)
@@ -1641,7 +1641,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
        int off = ebt_compat_match_offset(match, m->match_size);
        compat_uint_t msize = m->match_size - off;
 
-       BUG_ON(off >= m->match_size);
+       if (WARN_ON(off >= m->match_size))
+               return -EINVAL;
 
        if (copy_to_user(cm->u.name, match->name,
            strlen(match->name) + 1) || put_user(msize, &cm->match_size))
@@ -1671,7 +1672,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
        int off = xt_compat_target_offset(target);
        compat_uint_t tsize = t->target_size - off;
 
-       BUG_ON(off >= t->target_size);
+       if (WARN_ON(off >= t->target_size))
+               return -EINVAL;
 
        if (copy_to_user(cm->u.name, target->name,
            strlen(target->name) + 1) || put_user(tsize, &cm->match_size))
@@ -1902,7 +1904,8 @@ static int ebt_buf_add(struct ebt_entries_buf_state *state,
        if (state->buf_kern_start == NULL)
                goto count_only;
 
-       BUG_ON(state->buf_kern_offset + sz > state->buf_kern_len);
+       if (WARN_ON(state->buf_kern_offset + sz > state->buf_kern_len))
+               return -EINVAL;
 
        memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz);
 
@@ -1915,7 +1918,8 @@ static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz)
 {
        char *b = state->buf_kern_start;
 
-       BUG_ON(b && state->buf_kern_offset > state->buf_kern_len);
+       if (WARN_ON(b && state->buf_kern_offset > state->buf_kern_len))
+               return -EINVAL;
 
        if (b != NULL && sz > 0)
                memset(b + state->buf_kern_offset, 0, sz);
@@ -1992,8 +1996,10 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
        pad = XT_ALIGN(size_kern) - size_kern;
 
        if (pad > 0 && dst) {
-               BUG_ON(state->buf_kern_len <= pad);
-               BUG_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad);
+               if (WARN_ON(state->buf_kern_len <= pad))
+                       return -EINVAL;
+               if (WARN_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad))
+                       return -EINVAL;
                memset(dst + size_kern, 0, pad);
        }
        return off + match_size;
@@ -2043,7 +2049,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
                if (ret < 0)
                        return ret;
 
-               BUG_ON(ret < match32->match_size);
+               if (WARN_ON(ret < match32->match_size))
+                       return -EINVAL;
                growth += ret - match32->match_size;
                growth += ebt_compat_entry_padsize();
 
@@ -2053,7 +2060,9 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
                if (match_kern)
                        match_kern->match_size = ret;
 
-               WARN_ON(type == EBT_COMPAT_TARGET && size_left);
+               if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+                       return -EINVAL;
+
                match32 = (struct compat_ebt_entry_mwt *) buf;
        }
 
@@ -2109,6 +2118,19 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
         *
         * offsets are relative to beginning of struct ebt_entry (i.e., 0).
         */
+       for (i = 0; i < 4 ; ++i) {
+               if (offsets[i] > *total)
+                       return -EINVAL;
+
+               if (i < 3 && offsets[i] == *total)
+                       return -EINVAL;
+
+               if (i == 0)
+                       continue;
+               if (offsets[i-1] > offsets[i])
+                       return -EINVAL;
+       }
+
        for (i = 0, j = 1 ; j < 4 ; j++, i++) {
                struct compat_ebt_entry_mwt *match32;
                unsigned int size;
@@ -2140,7 +2162,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 
        startoff = state->buf_user_offset - startoff;
 
-       BUG_ON(*total < startoff);
+       if (WARN_ON(*total < startoff))
+               return -EINVAL;
        *total -= startoff;
        return 0;
 }
@@ -2267,7 +2290,8 @@ static int compat_do_replace(struct net *net, void __user *user,
        state.buf_kern_len = size64;
 
        ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
-       BUG_ON(ret < 0);        /* parses same data again */
+       if (WARN_ON(ret < 0))
+               goto out_unlock;
 
        vfree(entries_tmp);
        tmp.entries_size = size64;
index bd2b3c78f59bddd92425974d5731fba3e962e4d5..91bfc2ac055a077ec1f2b39c603b6063b69a60c2 100644 (file)
@@ -48,6 +48,7 @@ static void __net_exit nf_log_bridge_net_exit(struct net *net)
 static struct pernet_operations nf_log_bridge_net_ops = {
        .init = nf_log_bridge_net_init,
        .exit = nf_log_bridge_net_exit,
+       .async = true,
 };
 
 static int __init nf_log_bridge_init(void)
index e0adcd123f48a1a4f66028bbf731132ed8e7ff17..7a78268cc57242a5c3a26c963abb621dededeb6a 100644 (file)
@@ -544,6 +544,7 @@ static struct pernet_operations caif_net_ops = {
        .exit = caif_exit_net,
        .id   = &caif_net_id,
        .size = sizeof(struct caif_net),
+       .async = true,
 };
 
 /* Initialize Caif devices list */
index 6da324550eec0ac2f17440d050a9b43c6e6b14b0..e899970398a127f11a789fdf3a600a1150c1a0c5 100644 (file)
@@ -954,6 +954,7 @@ static struct notifier_block can_netdev_notifier __read_mostly = {
 static struct pernet_operations can_pernet_ops __read_mostly = {
        .init = can_pernet_init,
        .exit = can_pernet_exit,
+       .async = true,
 };
 
 static __init int can_init(void)
index ac5e5e34fee32e94e7ad7592f537223e5db29b4f..26730d39e048343f4c3ac0dcc4f4f26545cd0e0b 100644 (file)
@@ -1717,6 +1717,7 @@ static void canbcm_pernet_exit(struct net *net)
 static struct pernet_operations canbcm_pernet_ops __read_mostly = {
        .init = canbcm_pernet_init,
        .exit = canbcm_pernet_exit,
+       .async = true,
 };
 
 static int __init bcm_module_init(void)
index 398dd0395ad99b23bb72320c6a8868ceb763b069..08e97668d5cf74fb2e428092ecf26bc8e9ccf521 100644 (file)
@@ -1010,6 +1010,7 @@ static void __net_exit cangw_pernet_exit(struct net *net)
 static struct pernet_operations cangw_pernet_ops = {
        .init = cangw_pernet_init,
        .exit = cangw_pernet_exit,
+       .async = true,
 };
 
 static __init int cgw_module_init(void)
index 1e492ef2a33d945699a327831640db04c1f158fa..4d4c82229e9e21cea5ab7011aaf03e7a28e2b394 100644 (file)
@@ -418,6 +418,7 @@ ceph_parse_options(char *options, const char *dev_name,
                                opt->flags |= CEPH_OPT_FSID;
                        break;
                case Opt_name:
+                       kfree(opt->name);
                        opt->name = kstrndup(argstr[0].from,
                                              argstr[0].to-argstr[0].from,
                                              GFP_KERNEL);
@@ -427,6 +428,9 @@ ceph_parse_options(char *options, const char *dev_name,
                        }
                        break;
                case Opt_secret:
+                       ceph_crypto_key_destroy(opt->key);
+                       kfree(opt->key);
+
                        opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
                        if (!opt->key) {
                                err = -ENOMEM;
@@ -437,6 +441,9 @@ ceph_parse_options(char *options, const char *dev_name,
                                goto out;
                        break;
                case Opt_key:
+                       ceph_crypto_key_destroy(opt->key);
+                       kfree(opt->key);
+
                        opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
                        if (!opt->key) {
                                err = -ENOMEM;
index 5bdcc5a161fefd67c0a2fe6c74f74425c27dfd16..f9c28f44286cd9f38f57d4042b181e8978975cbb 100644 (file)
@@ -2378,7 +2378,7 @@ EXPORT_SYMBOL(netdev_set_num_tc);
 
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
@@ -3278,15 +3278,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 static void skb_update_prio(struct sk_buff *skb)
 {
-       struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
+       const struct netprio_map *map;
+       const struct sock *sk;
+       unsigned int prioidx;
 
-       if (!skb->priority && skb->sk && map) {
-               unsigned int prioidx =
-                       sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
+       if (skb->priority)
+               return;
+       map = rcu_dereference_bh(skb->dev->priomap);
+       if (!map)
+               return;
+       sk = skb_to_full_sk(skb);
+       if (!sk)
+               return;
 
-               if (prioidx < map->priomap_len)
-                       skb->priority = map->priomap[prioidx];
-       }
+       prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);
+
+       if (prioidx < map->priomap_len)
+               skb->priority = map->priomap[prioidx];
 }
 #else
 #define skb_update_prio(skb)
@@ -4351,6 +4359,9 @@ int netdev_rx_handler_register(struct net_device *dev,
        if (netdev_is_rx_handler_busy(dev))
                return -EBUSY;
 
+       if (dev->priv_flags & IFF_NO_RX_HANDLER)
+               return -EINVAL;
+
        /* Note: rx_handler_data must be set before rx_handler */
        rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
        rcu_assign_pointer(dev->rx_handler, rx_handler);
@@ -6396,6 +6407,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
                .linking = true,
                .upper_info = upper_info,
        };
+       struct net_device *master_dev;
        int ret = 0;
 
        ASSERT_RTNL();
@@ -6407,11 +6419,14 @@ static int __netdev_upper_dev_link(struct net_device *dev,
        if (netdev_has_upper_dev(upper_dev, dev))
                return -EBUSY;
 
-       if (netdev_has_upper_dev(dev, upper_dev))
-               return -EEXIST;
-
-       if (master && netdev_master_upper_dev_get(dev))
-               return -EBUSY;
+       if (!master) {
+               if (netdev_has_upper_dev(dev, upper_dev))
+                       return -EEXIST;
+       } else {
+               master_dev = netdev_master_upper_dev_get(dev);
+               if (master_dev)
+                       return master_dev == upper_dev ? -EEXIST : -EBUSY;
+       }
 
        ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
                                            &changeupper_info.info);
@@ -7542,6 +7557,19 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
                }
        }
 
+       /* LRO/HW-GRO features cannot be combined with RX-FCS */
+       if (features & NETIF_F_RXFCS) {
+               if (features & NETIF_F_LRO) {
+                       netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+                       features &= ~NETIF_F_LRO;
+               }
+
+               if (features & NETIF_F_GRO_HW) {
+                       netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
+                       features &= ~NETIF_F_GRO_HW;
+               }
+       }
+
        return features;
 }
 
@@ -7998,7 +8026,8 @@ int register_netdev(struct net_device *dev)
 {
        int err;
 
-       rtnl_lock();
+       if (rtnl_lock_killable())
+               return -EINTR;
        err = register_netdevice(dev);
        rtnl_unlock();
        return err;
index 0ab1af04296cbf0562fa51a88e48fe17ea168c60..a04e1e88bf3ab49340d788589c365aaf45d9d3e2 100644 (file)
@@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
        if (colon)
                *colon = 0;
 
-       dev_load(net, ifr->ifr_name);
-
        /*
         *      See which interface the caller is talking about.
         */
@@ -423,6 +421,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
        case SIOCGIFMAP:
        case SIOCGIFINDEX:
        case SIOCGIFTXQLEN:
+               dev_load(net, ifr->ifr_name);
                rcu_read_lock();
                ret = dev_ifsioc_locked(net, ifr, cmd);
                rcu_read_unlock();
@@ -431,6 +430,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
                return ret;
 
        case SIOCETHTOOL:
+               dev_load(net, ifr->ifr_name);
                rtnl_lock();
                ret = dev_ethtool(net, ifr);
                rtnl_unlock();
@@ -447,6 +447,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
        case SIOCGMIIPHY:
        case SIOCGMIIREG:
        case SIOCSIFNAME:
+               dev_load(net, ifr->ifr_name);
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                rtnl_lock();
@@ -494,6 +495,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
                /* fall through */
        case SIOCBONDSLAVEINFOQUERY:
        case SIOCBONDINFOQUERY:
+               dev_load(net, ifr->ifr_name);
                rtnl_lock();
                ret = dev_ifsioc(net, ifr, cmd);
                rtnl_unlock();
@@ -518,6 +520,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
                    cmd == SIOCGHWTSTAMP ||
                    (cmd >= SIOCDEVPRIVATE &&
                     cmd <= SIOCDEVPRIVATE + 15)) {
+                       dev_load(net, ifr->ifr_name);
                        rtnl_lock();
                        ret = dev_ifsioc(net, ifr, cmd);
                        rtnl_unlock();
index 88e8467792691617b18899b9955ddbfb0447cf1b..9236e421bd627392076659c7dede568c36d00e18 100644 (file)
@@ -1695,10 +1695,11 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
                goto nla_put_failure;
 
        if (table->resource_valid) {
-               nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
-                                 table->resource_id, DEVLINK_ATTR_PAD);
-               nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
-                                 table->resource_units, DEVLINK_ATTR_PAD);
+               if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+                                     table->resource_id, DEVLINK_ATTR_PAD) ||
+                   nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+                                     table->resource_units, DEVLINK_ATTR_PAD))
+                       goto nla_put_failure;
        }
        if (devlink_dpipe_matches_put(table, skb))
                goto nla_put_failure;
@@ -1797,7 +1798,7 @@ static int devlink_dpipe_tables_fill(struct genl_info *info,
        if (!nlh) {
                err = devlink_dpipe_send_and_alloc_skb(&skb, info);
                if (err)
-                       goto err_skb_send_alloc;
+                       return err;
                goto send_done;
        }
 
@@ -1806,7 +1807,6 @@ static int devlink_dpipe_tables_fill(struct genl_info *info,
 nla_put_failure:
        err = -EMSGSIZE;
 err_table_put:
-err_skb_send_alloc:
        genlmsg_cancel(skb, hdr);
        nlmsg_free(skb);
        return err;
@@ -2072,7 +2072,7 @@ static int devlink_dpipe_entries_fill(struct genl_info *info,
                                             table->counters_enabled,
                                             &dump_ctx);
        if (err)
-               goto err_entries_dump;
+               return err;
 
 send_done:
        nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
@@ -2080,16 +2080,10 @@ static int devlink_dpipe_entries_fill(struct genl_info *info,
        if (!nlh) {
                err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
                if (err)
-                       goto err_skb_send_alloc;
+                       return err;
                goto send_done;
        }
        return genlmsg_reply(dump_ctx.skb, info);
-
-err_entries_dump:
-err_skb_send_alloc:
-       genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
-       nlmsg_free(dump_ctx.skb);
-       return err;
 }
 
 static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
@@ -2228,7 +2222,7 @@ static int devlink_dpipe_headers_fill(struct genl_info *info,
        if (!nlh) {
                err = devlink_dpipe_send_and_alloc_skb(&skb, info);
                if (err)
-                       goto err_skb_send_alloc;
+                       return err;
                goto send_done;
        }
        return genlmsg_reply(skb, info);
@@ -2236,7 +2230,6 @@ static int devlink_dpipe_headers_fill(struct genl_info *info,
 nla_put_failure:
        err = -EMSGSIZE;
 err_table_put:
-err_skb_send_alloc:
        genlmsg_cancel(skb, hdr);
        nlmsg_free(skb);
        return err;
@@ -2332,7 +2325,7 @@ devlink_resource_validate_children(struct devlink_resource *resource)
        list_for_each_entry(child_resource, &resource->resource_list, list)
                parts_size += child_resource->size_new;
 
-       if (parts_size > resource->size)
+       if (parts_size > resource->size_new)
                size_valid = false;
 out:
        resource->size_valid = size_valid;
@@ -2345,17 +2338,17 @@ devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
        u64 reminder;
        int err = 0;
 
-       if (size > resource->size_params->size_max) {
+       if (size > resource->size_params.size_max) {
                NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
                err = -EINVAL;
        }
 
-       if (size < resource->size_params->size_min) {
+       if (size < resource->size_params.size_min) {
                NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
                err = -EINVAL;
        }
 
-       div64_u64_rem(size, resource->size_params->size_granularity, &reminder);
+       div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
        if (reminder) {
                NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
                err = -EINVAL;
@@ -2394,20 +2387,22 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
        return 0;
 }
 
-static void
+static int
 devlink_resource_size_params_put(struct devlink_resource *resource,
                                 struct sk_buff *skb)
 {
        struct devlink_resource_size_params *size_params;
 
-       size_params = resource->size_params;
-       nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
-                         size_params->size_granularity, DEVLINK_ATTR_PAD);
-       nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
-                         size_params->size_max, DEVLINK_ATTR_PAD);
-       nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
-                         size_params->size_min, DEVLINK_ATTR_PAD);
-       nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit);
+       size_params = &resource->size_params;
+       if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+                             size_params->size_granularity, DEVLINK_ATTR_PAD) ||
+           nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+                             size_params->size_max, DEVLINK_ATTR_PAD) ||
+           nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+                             size_params->size_min, DEVLINK_ATTR_PAD) ||
+           nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
+               return -EMSGSIZE;
+       return 0;
 }
 
 static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
@@ -2431,10 +2426,12 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
                nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
                                  resource->size_new, DEVLINK_ATTR_PAD);
        if (resource->resource_ops && resource->resource_ops->occ_get)
-               nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
-                                 resource->resource_ops->occ_get(devlink),
-                                 DEVLINK_ATTR_PAD);
-       devlink_resource_size_params_put(resource, skb);
+               if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+                                     resource->resource_ops->occ_get(devlink),
+                                     DEVLINK_ATTR_PAD))
+                       goto nla_put_failure;
+       if (devlink_resource_size_params_put(resource, skb))
+               goto nla_put_failure;
        if (list_empty(&resource->resource_list))
                goto out;
 
@@ -2739,22 +2736,22 @@ static const struct genl_ops devlink_nl_ops[] = {
                .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
                .doit = devlink_nl_cmd_dpipe_table_get,
                .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
                .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
        },
        {
                .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
                .doit = devlink_nl_cmd_dpipe_entries_get,
                .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
                .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
        },
        {
                .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
                .doit = devlink_nl_cmd_dpipe_headers_get,
                .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
                .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
        },
        {
                .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
@@ -2774,8 +2771,8 @@ static const struct genl_ops devlink_nl_ops[] = {
                .cmd = DEVLINK_CMD_RESOURCE_DUMP,
                .doit = devlink_nl_cmd_resource_dump,
                .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
                .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
        },
        {
                .cmd = DEVLINK_CMD_RELOAD,
@@ -3169,17 +3166,19 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
  */
 int devlink_resource_register(struct devlink *devlink,
                              const char *resource_name,
-                             bool top_hierarchy,
                              u64 resource_size,
                              u64 resource_id,
                              u64 parent_resource_id,
-                             struct devlink_resource_size_params *size_params,
+                             const struct devlink_resource_size_params *size_params,
                              const struct devlink_resource_ops *resource_ops)
 {
        struct devlink_resource *resource;
        struct list_head *resource_list;
+       bool top_hierarchy;
        int err = 0;
 
+       top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
        mutex_lock(&devlink->lock);
        resource = devlink_resource_find(devlink, NULL, resource_id);
        if (resource) {
@@ -3216,7 +3215,8 @@ int devlink_resource_register(struct devlink *devlink,
        resource->id = resource_id;
        resource->resource_ops = resource_ops;
        resource->size_valid = true;
-       resource->size_params = size_params;
+       memcpy(&resource->size_params, size_params,
+              sizeof(resource->size_params));
        INIT_LIST_HEAD(&resource->resource_list);
        list_add_tail(&resource->list, resource_list);
 out:
index 554d364492314fee6da31cbfe1089f7e6e1ee5ec..64cef977484aeadfa4b50cd61776f676e5cbbc95 100644 (file)
@@ -107,7 +107,7 @@ EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
 
 #if IS_ENABLED(CONFIG_IPV6)
 void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
-                      const struct in6_addr *addr)
+                      const struct in6_addr *saddr)
 {
        struct dst_cache_pcpu *idst;
 
@@ -117,7 +117,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
        idst = this_cpu_ptr(dst_cache->cache);
        dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
                                  rt6_get_cookie((struct rt6_info *)dst));
-       idst->in6_saddr = *addr;
+       idst->in6_saddr = *saddr;
 }
 EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
 
index 494e6a5d73061acd87534d636ee393555b0ed40d..157cd9efa4bedf868bd498676eb662cbfb786dab 100644 (file)
@@ -1022,6 +1022,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
        if (copy_from_user(&info, useraddr, info_size))
                return -EFAULT;
 
+       /* If FLOW_RSS was requested then user-space must be using the
+        * new definition, as FLOW_RSS is newer.
+        */
+       if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+               info_size = sizeof(info);
+               if (copy_from_user(&info, useraddr, info_size))
+                       return -EFAULT;
+       }
+
        if (info.cmd == ETHTOOL_GRXCLSRLALL) {
                if (info.rule_cnt > 0) {
                        if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -1251,9 +1260,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
        user_key_size = rxfh.key_size;
 
        /* Check that reserved fields are 0 for now */
-       if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
-           rxfh.rsvd8[2] || rxfh.rsvd32)
+       if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
                return -EINVAL;
+       /* Most drivers don't handle rss_context, check it's 0 as well */
+       if (rxfh.rss_context && !ops->get_rxfh_context)
+               return -EOPNOTSUPP;
 
        rxfh.indir_size = dev_indir_size;
        rxfh.key_size = dev_key_size;
@@ -1276,7 +1287,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
        if (user_key_size)
                hkey = rss_config + indir_bytes;
 
-       ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+       if (rxfh.rss_context)
+               ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
+                                                        &dev_hfunc,
+                                                        rxfh.rss_context);
+       else
+               ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
        if (ret)
                goto out;
 
@@ -1306,6 +1322,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
        u8 *hkey = NULL;
        u8 *rss_config;
        u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+       bool delete = false;
 
        if (!ops->get_rxnfc || !ops->set_rxfh)
                return -EOPNOTSUPP;
@@ -1319,9 +1336,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
                return -EFAULT;
 
        /* Check that reserved fields are 0 for now */
-       if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
-           rxfh.rsvd8[2] || rxfh.rsvd32)
+       if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
                return -EINVAL;
+       /* Most drivers don't handle rss_context, check it's 0 as well */
+       if (rxfh.rss_context && !ops->set_rxfh_context)
+               return -EOPNOTSUPP;
 
        /* If either indir, hash key or function is valid, proceed further.
         * Must request at least one change: indir size, hash key or function.
@@ -1346,7 +1365,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
        if (ret)
                goto out;
 
-       /* rxfh.indir_size == 0 means reset the indir table to default.
+       /* rxfh.indir_size == 0 means reset the indir table to default (master
+        * context) or delete the context (other RSS contexts).
         * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
         */
        if (rxfh.indir_size &&
@@ -1359,9 +1379,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
                if (ret)
                        goto out;
        } else if (rxfh.indir_size == 0) {
-               indir = (u32 *)rss_config;
-               for (i = 0; i < dev_indir_size; i++)
-                       indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+               if (rxfh.rss_context == 0) {
+                       indir = (u32 *)rss_config;
+                       for (i = 0; i < dev_indir_size; i++)
+                               indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+               } else {
+                       delete = true;
+               }
        }
 
        if (rxfh.key_size) {
@@ -1374,15 +1398,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
                }
        }
 
-       ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+       if (rxfh.rss_context)
+               ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
+                                           &rxfh.rss_context, delete);
+       else
+               ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
        if (ret)
                goto out;
 
-       /* indicate whether rxfh was set to default */
-       if (rxfh.indir_size == 0)
-               dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
-       else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
-               dev->priv_flags |= IFF_RXFH_CONFIGURED;
+       if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
+                        &rxfh.rss_context, sizeof(rxfh.rss_context)))
+               ret = -EFAULT;
+
+       if (!rxfh.rss_context) {
+               /* indicate whether rxfh was set to default */
+               if (rxfh.indir_size == 0)
+                       dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+               else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+                       dev->priv_flags |= IFF_RXFH_CONFIGURED;
+       }
 
 out:
        kfree(rss_config);
@@ -2520,11 +2554,14 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
 static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
 {
        struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM };
+       int rc;
 
        if (!dev->ethtool_ops->get_fecparam)
                return -EOPNOTSUPP;
 
-       dev->ethtool_ops->get_fecparam(dev, &fecparam);
+       rc = dev->ethtool_ops->get_fecparam(dev, &fecparam);
+       if (rc)
+               return rc;
 
        if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
                return -EFAULT;
index a6aea805a0a263de2e79f6f5f7e3b90cf0b984a2..f6f04fc0f62986fa2d4681025814b9a29580a071 100644 (file)
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
        if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
            !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
                return false;
+       if (fib_rule_port_range_set(&rule->sport_range))
+               return false;
+       if (fib_rule_port_range_set(&rule->dport_range))
+               return false;
        return true;
 }
 EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -221,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
        return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
 }
 
+static int nla_get_port_range(struct nlattr *pattr,
+                             struct fib_rule_port_range *port_range)
+{
+       const struct fib_rule_port_range *pr = nla_data(pattr);
+
+       if (!fib_rule_port_range_valid(pr))
+               return -EINVAL;
+
+       port_range->start = pr->start;
+       port_range->end = pr->end;
+
+       return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+                             struct fib_rule_port_range *range)
+{
+       return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
                          struct flowi *fl, int flags,
                          struct fib_lookup_arg *arg)
@@ -425,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
                    !uid_eq(r->uid_range.end, rule->uid_range.end))
                        continue;
 
+               if (r->ip_proto != rule->ip_proto)
+                       continue;
+
+               if (!fib_rule_port_range_compare(&r->sport_range,
+                                                &rule->sport_range))
+                       continue;
+
+               if (!fib_rule_port_range_compare(&r->dport_range,
+                                                &rule->dport_range))
+                       continue;
+
                if (!ops->compare(r, frh, tb))
                        continue;
                return 1;
@@ -569,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                rule->uid_range = fib_kuid_range_unset;
        }
 
+       if (tb[FRA_IP_PROTO])
+               rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+       if (tb[FRA_SPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+                                        &rule->sport_range);
+               if (err)
+                       goto errout_free;
+       }
+
+       if (tb[FRA_DPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+                                        &rule->dport_range);
+               if (err)
+                       goto errout_free;
+       }
+
        if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
            rule_exists(ops, frh, tb, rule)) {
                err = -EEXIST;
@@ -634,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
        struct net *net = sock_net(skb->sk);
        struct fib_rule_hdr *frh = nlmsg_data(nlh);
+       struct fib_rule_port_range sprange = {0, 0};
+       struct fib_rule_port_range dprange = {0, 0};
        struct fib_rules_ops *ops = NULL;
        struct fib_rule *rule, *r;
        struct nlattr *tb[FRA_MAX+1];
@@ -667,6 +721,20 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                range = fib_kuid_range_unset;
        }
 
+       if (tb[FRA_SPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+                                        &sprange);
+               if (err)
+                       goto errout;
+       }
+
+       if (tb[FRA_DPORT_RANGE]) {
+               err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+                                        &dprange);
+               if (err)
+                       goto errout;
+       }
+
        list_for_each_entry(rule, &ops->rules_list, list) {
                if (tb[FRA_PROTOCOL] &&
                    (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
@@ -712,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                     !uid_eq(rule->uid_range.end, range.end)))
                        continue;
 
+               if (tb[FRA_IP_PROTO] &&
+                   (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+                       continue;
+
+               if (fib_rule_port_range_set(&sprange) &&
+                   !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+                       continue;
+
+               if (fib_rule_port_range_set(&dprange) &&
+                   !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+                       continue;
+
                if (!ops->compare(rule, frh, tb))
                        continue;
 
@@ -790,7 +870,10 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
                         + nla_total_size(4) /* FRA_FWMASK */
                         + nla_total_size_64bit(8) /* FRA_TUN_ID */
                         + nla_total_size(sizeof(struct fib_kuid_range))
-                        + nla_total_size(1); /* FRA_PROTOCOL */
+                        + nla_total_size(1) /* FRA_PROTOCOL */
+                        + nla_total_size(1) /* FRA_IP_PROTO */
+                        + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+                        + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
 
        if (ops->nlmsg_payload)
                payload += ops->nlmsg_payload(rule);
@@ -855,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
            (rule->l3mdev &&
             nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
            (uid_range_set(&rule->uid_range) &&
-            nla_put_uid_range(skb, &rule->uid_range)))
+            nla_put_uid_range(skb, &rule->uid_range)) ||
+           (fib_rule_port_range_set(&rule->sport_range) &&
+            nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+           (fib_rule_port_range_set(&rule->dport_range) &&
+            nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+           (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
                goto nla_put_failure;
 
        if (rule->suppress_ifgroup != -1) {
index 0c121adbdbaaac5ed1f48a8e618cc0a2af2039d6..00c711c5f1a2965d6333aedccabf40843d92a4a3 100644 (file)
@@ -1890,6 +1890,202 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
        .arg4_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
+          struct bpf_map *, map, u32, key, u64, flags)
+{
+       /* If user passes invalid input drop the packet. */
+       if (unlikely(flags))
+               return SK_DROP;
+
+       msg->key = key;
+       msg->flags = flags;
+       msg->map = map;
+
+       return SK_PASS;
+}
+
+struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+{
+       struct sock *sk = NULL;
+
+       if (msg->map) {
+               sk = __sock_map_lookup_elem(msg->map, msg->key);
+
+               msg->key = 0;
+               msg->map = NULL;
+       }
+
+       return sk;
+}
+
+static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+       .func           = bpf_msg_redirect_map,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+       msg->apply_bytes = bytes;
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
+       .func           = bpf_msg_apply_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+       msg->cork_bytes = bytes;
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
+       .func           = bpf_msg_cork_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_pull_data,
+          struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+{
+       unsigned int len = 0, offset = 0, copy = 0;
+       struct scatterlist *sg = msg->sg_data;
+       int first_sg, last_sg, i, shift;
+       unsigned char *p, *to, *from;
+       int bytes = end - start;
+       struct page *page;
+
+       if (unlikely(flags || end <= start))
+               return -EINVAL;
+
+       /* First find the starting scatterlist element */
+       i = msg->sg_start;
+       do {
+               len = sg[i].length;
+               offset += len;
+               if (start < offset + len)
+                       break;
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       } while (i != msg->sg_end);
+
+       if (unlikely(start >= offset + len))
+               return -EINVAL;
+
+       if (!msg->sg_copy[i] && bytes <= len)
+               goto out;
+
+       first_sg = i;
+
+       /* At this point we need to linearize multiple scatterlist
+        * elements or a single shared page. Either way we need to
+        * copy into a linear buffer exclusively owned by BPF. Then
+        * place the buffer in the scatterlist and fixup the original
+        * entries by removing the entries now in the linear buffer
+        * and shifting the remaining entries. For now we do not try
+        * to copy partial entries to avoid complexity of running out
+        * of sg_entry slots. The downside is reading a single byte
+        * will copy the entire sg entry.
+        */
+       do {
+               copy += sg[i].length;
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+               if (bytes < copy)
+                       break;
+       } while (i != msg->sg_end);
+       last_sg = i;
+
+       if (unlikely(copy < end - start))
+               return -EINVAL;
+
+       page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+       if (unlikely(!page))
+               return -ENOMEM;
+       p = page_address(page);
+       offset = 0;
+
+       i = first_sg;
+       do {
+               from = sg_virt(&sg[i]);
+               len = sg[i].length;
+               to = p + offset;
+
+               memcpy(to, from, len);
+               offset += len;
+               sg[i].length = 0;
+               put_page(sg_page(&sg[i]));
+
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       } while (i != last_sg);
+
+       sg[first_sg].length = copy;
+       sg_set_page(&sg[first_sg], page, copy, 0);
+
+       /* To repair sg ring we need to shift entries. If we only
+        * had a single entry though we can just replace it and
+        * be done. Otherwise walk the ring and shift the entries.
+        */
+       shift = last_sg - first_sg - 1;
+       if (!shift)
+               goto out;
+
+       i = first_sg + 1;
+       do {
+               int move_from;
+
+               if (i + shift >= MAX_SKB_FRAGS)
+                       move_from = i + shift - MAX_SKB_FRAGS;
+               else
+                       move_from = i + shift;
+
+               if (move_from == msg->sg_end)
+                       break;
+
+               sg[i] = sg[move_from];
+               sg[move_from].length = 0;
+               sg[move_from].page_link = 0;
+               sg[move_from].offset = 0;
+
+               i++;
+               if (i == MAX_SKB_FRAGS)
+                       i = 0;
+       } while (1);
+       msg->sg_end -= shift;
+       if (msg->sg_end < 0)
+               msg->sg_end += MAX_SKB_FRAGS;
+out:
+       msg->data = sg_virt(&sg[i]) + start - offset;
+       msg->data_end = msg->data + bytes;
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pull_data_proto = {
+       .func           = bpf_msg_pull_data,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
        return task_get_classid(skb);
@@ -2087,6 +2283,10 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
        u32 off = skb_mac_header_len(skb);
        int ret;
 
+       /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+       if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+               return -ENOTSUPP;
+
        ret = skb_cow(skb, len_diff);
        if (unlikely(ret < 0))
                return ret;
@@ -2096,19 +2296,21 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
                return ret;
 
        if (skb_is_gso(skb)) {
+               struct skb_shared_info *shinfo = skb_shinfo(skb);
+
                /* SKB_GSO_TCPV4 needs to be changed into
                 * SKB_GSO_TCPV6.
                 */
-               if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
-                       skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
-                       skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV6;
+               if (shinfo->gso_type & SKB_GSO_TCPV4) {
+                       shinfo->gso_type &= ~SKB_GSO_TCPV4;
+                       shinfo->gso_type |=  SKB_GSO_TCPV6;
                }
 
                /* Due to IPv6 header, MSS needs to be downgraded. */
-               skb_shinfo(skb)->gso_size -= len_diff;
+               skb_decrease_gso_size(shinfo, len_diff);
                /* Header must be checked, and gso_segs recomputed. */
-               skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-               skb_shinfo(skb)->gso_segs = 0;
+               shinfo->gso_type |= SKB_GSO_DODGY;
+               shinfo->gso_segs = 0;
        }
 
        skb->protocol = htons(ETH_P_IPV6);
@@ -2123,6 +2325,10 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
        u32 off = skb_mac_header_len(skb);
        int ret;
 
+       /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+       if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+               return -ENOTSUPP;
+
        ret = skb_unclone(skb, GFP_ATOMIC);
        if (unlikely(ret < 0))
                return ret;
@@ -2132,19 +2338,21 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
                return ret;
 
        if (skb_is_gso(skb)) {
+               struct skb_shared_info *shinfo = skb_shinfo(skb);
+
                /* SKB_GSO_TCPV6 needs to be changed into
                 * SKB_GSO_TCPV4.
                 */
-               if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
-                       skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
-                       skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV4;
+               if (shinfo->gso_type & SKB_GSO_TCPV6) {
+                       shinfo->gso_type &= ~SKB_GSO_TCPV6;
+                       shinfo->gso_type |=  SKB_GSO_TCPV4;
                }
 
                /* Due to IPv4 header, MSS can be upgraded. */
-               skb_shinfo(skb)->gso_size += len_diff;
+               skb_increase_gso_size(shinfo, len_diff);
                /* Header must be checked, and gso_segs recomputed. */
-               skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-               skb_shinfo(skb)->gso_segs = 0;
+               shinfo->gso_type |= SKB_GSO_DODGY;
+               shinfo->gso_segs = 0;
        }
 
        skb->protocol = htons(ETH_P_IP);
@@ -2243,6 +2451,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
        u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
        int ret;
 
+       /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+       if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+               return -ENOTSUPP;
+
        ret = skb_cow(skb, len_diff);
        if (unlikely(ret < 0))
                return ret;
@@ -2252,11 +2464,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
                return ret;
 
        if (skb_is_gso(skb)) {
+               struct skb_shared_info *shinfo = skb_shinfo(skb);
+
                /* Due to header grow, MSS needs to be downgraded. */
-               skb_shinfo(skb)->gso_size -= len_diff;
+               skb_decrease_gso_size(shinfo, len_diff);
                /* Header must be checked, and gso_segs recomputed. */
-               skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-               skb_shinfo(skb)->gso_segs = 0;
+               shinfo->gso_type |= SKB_GSO_DODGY;
+               shinfo->gso_segs = 0;
        }
 
        return 0;
@@ -2267,6 +2481,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
        u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
        int ret;
 
+       /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+       if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+               return -ENOTSUPP;
+
        ret = skb_unclone(skb, GFP_ATOMIC);
        if (unlikely(ret < 0))
                return ret;
@@ -2276,11 +2494,13 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
                return ret;
 
        if (skb_is_gso(skb)) {
+               struct skb_shared_info *shinfo = skb_shinfo(skb);
+
                /* Due to header shrink, MSS can be upgraded. */
-               skb_shinfo(skb)->gso_size += len_diff;
+               skb_increase_gso_size(shinfo, len_diff);
                /* Header must be checked, and gso_segs recomputed. */
-               skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-               skb_shinfo(skb)->gso_segs = 0;
+               shinfo->gso_type |= SKB_GSO_DODGY;
+               shinfo->gso_segs = 0;
        }
 
        return 0;
@@ -2831,7 +3051,8 @@ bool bpf_helper_changes_pkt_data(void *func)
            func == bpf_l3_csum_replace ||
            func == bpf_l4_csum_replace ||
            func == bpf_xdp_adjust_head ||
-           func == bpf_xdp_adjust_meta)
+           func == bpf_xdp_adjust_meta ||
+           func == bpf_msg_pull_data)
                return true;
 
        return false;
@@ -2991,7 +3212,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
        struct ip_tunnel_info *info;
 
        if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
-                              BPF_F_DONT_FRAGMENT)))
+                              BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
                return -EINVAL;
        if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
                switch (size) {
@@ -3025,6 +3246,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
                info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
        if (flags & BPF_F_ZERO_CSUM_TX)
                info->key.tun_flags &= ~TUNNEL_CSUM;
+       if (flags & BPF_F_SEQ_NUMBER)
+               info->key.tun_flags |= TUNNEL_SEQ;
 
        info->key.tun_id = cpu_to_be64(from->tunnel_id);
        info->key.tos = from->tunnel_tos;
@@ -3589,6 +3812,22 @@ static const struct bpf_func_proto *
        }
 }
 
+static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
+{
+       switch (func_id) {
+       case BPF_FUNC_msg_redirect_map:
+               return &bpf_msg_redirect_map_proto;
+       case BPF_FUNC_msg_apply_bytes:
+               return &bpf_msg_apply_bytes_proto;
+       case BPF_FUNC_msg_cork_bytes:
+               return &bpf_msg_cork_bytes_proto;
+       case BPF_FUNC_msg_pull_data:
+               return &bpf_msg_pull_data_proto;
+       default:
+               return bpf_base_func_proto(func_id);
+       }
+}
+
 static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
 {
        switch (func_id) {
@@ -3978,6 +4217,32 @@ static bool sk_skb_is_valid_access(int off, int size,
        return bpf_skb_is_valid_access(off, size, type, info);
 }
 
+static bool sk_msg_is_valid_access(int off, int size,
+                                  enum bpf_access_type type,
+                                  struct bpf_insn_access_aux *info)
+{
+       if (type == BPF_WRITE)
+               return false;
+
+       switch (off) {
+       case offsetof(struct sk_msg_md, data):
+               info->reg_type = PTR_TO_PACKET;
+               break;
+       case offsetof(struct sk_msg_md, data_end):
+               info->reg_type = PTR_TO_PACKET_END;
+               break;
+       }
+
+       if (off < 0 || off >= sizeof(struct sk_msg_md))
+               return false;
+       if (off % size != 0)
+               return false;
+       if (size != sizeof(__u64))
+               return false;
+
+       return true;
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
                                  const struct bpf_insn *si,
                                  struct bpf_insn *insn_buf,
@@ -4776,6 +5041,29 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
        return insn - insn_buf;
 }
 
+static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
+                                    const struct bpf_insn *si,
+                                    struct bpf_insn *insn_buf,
+                                    struct bpf_prog *prog, u32 *target_size)
+{
+       struct bpf_insn *insn = insn_buf;
+
+       switch (si->off) {
+       case offsetof(struct sk_msg_md, data):
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct sk_msg_buff, data));
+               break;
+       case offsetof(struct sk_msg_md, data_end):
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct sk_msg_buff, data_end));
+               break;
+       }
+
+       return insn - insn_buf;
+}
+
 const struct bpf_verifier_ops sk_filter_verifier_ops = {
        .get_func_proto         = sk_filter_func_proto,
        .is_valid_access        = sk_filter_is_valid_access,
@@ -4866,6 +5154,15 @@ const struct bpf_verifier_ops sk_skb_verifier_ops = {
 const struct bpf_prog_ops sk_skb_prog_ops = {
 };
 
+const struct bpf_verifier_ops sk_msg_verifier_ops = {
+       .get_func_proto         = sk_msg_func_proto,
+       .is_valid_access        = sk_msg_is_valid_access,
+       .convert_ctx_access     = sk_msg_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_msg_prog_ops = {
+};
+
 int sk_detach_filter(struct sock *sk)
 {
        int ret = -ENOENT;
index 559db9ea8d86f509ae7caae7801c7502b22b293f..d29f09bc5ff90c457513c46200310cb00dd0c188 100644 (file)
@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
 }
 EXPORT_SYMBOL(__get_hash_from_flowi6);
 
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
-       memset(keys, 0, sizeof(*keys));
-
-       keys->addrs.v4addrs.src = fl4->saddr;
-       keys->addrs.v4addrs.dst = fl4->daddr;
-       keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-       keys->ports.src = fl4->fl4_sport;
-       keys->ports.dst = fl4->fl4_dport;
-       keys->keyid.keyid = fl4->fl4_gre_key;
-       keys->basic.ip_proto = fl4->flowi4_proto;
-
-       return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
 static const struct flow_dissector_key flow_keys_dissector_keys[] = {
        {
                .key_id = FLOW_DISSECTOR_KEY_CONTROL,
index 27a55236ad64fda06723af58eec66e3efb244f2d..95ba2c53bd9a280433959d22d60fa36b784cbe97 100644 (file)
@@ -301,6 +301,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
        net->user_ns = user_ns;
        idr_init(&net->netns_ids);
        spin_lock_init(&net->nsid_lock);
+       mutex_init(&net->ipv4.ra_mutex);
 
        list_for_each_entry(ops, &pernet_list, list) {
                error = ops_init(ops, net);
@@ -362,7 +363,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
        dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 }
 
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
 static struct workqueue_struct *netns_wq;
 
 static struct net *net_alloc(void)
@@ -882,7 +883,7 @@ static int __init net_ns_init(void)
 #ifdef CONFIG_NET_NS
        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
                                        SMP_CACHE_BYTES,
-                                       SLAB_PANIC, NULL);
+                                       SLAB_PANIC|SLAB_ACCOUNT, NULL);
 
        /* Create workqueue for cleanup */
        netns_wq = create_singlethread_workqueue("netns");
index b8ab5c8295113a8d30b64d8a260d2d7582ead0f3..545cf08cd558c559b7dec3aad1e84c44de3032d1 100644 (file)
@@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file,
        i += len;
 
        if (debug) {
-               size_t copy = min_t(size_t, count, 1023);
-               char tb[copy + 1];
-               if (copy_from_user(tb, user_buffer, copy))
-                       return -EFAULT;
-               tb[copy] = 0;
-               pr_debug("%s,%lu  buffer -:%s:-\n",
-                        name, (unsigned long)count, tb);
+               size_t copy = min_t(size_t, count + 1, 1024);
+               char *tp = strndup_user(user_buffer, copy);
+
+               if (IS_ERR(tp))
+                       return PTR_ERR(tp);
+
+               pr_debug("%s,%zu  buffer -:%s:-\n", name, count, tp);
+               kfree(tp);
        }
 
        if (!strcmp(name, "min_pkt_size")) {
@@ -3851,6 +3852,7 @@ static struct pernet_operations pg_net_ops = {
        .exit = pg_net_exit,
        .id   = &pg_net_id,
        .size = sizeof(struct pktgen_net),
+       .async = true,
 };
 
 static int __init pg_init(void)
index 67f375cfb9829806bb8beb86f15acfa3e76b8c4d..87079eaa871bb9c9de477246bd67f5aa40c2f87a 100644 (file)
@@ -75,6 +75,12 @@ void rtnl_lock(void)
 }
 EXPORT_SYMBOL(rtnl_lock);
 
+int rtnl_lock_killable(void)
+{
+       return mutex_lock_killable(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock_killable);
+
 static struct sk_buff *defer_kfree_skb_list;
 void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
 {
index 1a7485a2cdfa8dd72605a09722c21b02f77106bc..b5c75d4fcf3778415a30fd04abaf317a228c7b19 100644 (file)
@@ -77,8 +77,8 @@
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
 
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 EXPORT_SYMBOL(sysctl_max_skb_frags);
 
@@ -3460,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 }
 EXPORT_SYMBOL_GPL(skb_pull_rcsum);
 
+static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
+{
+       skb_frag_t head_frag;
+       struct page *page;
+
+       page = virt_to_head_page(frag_skb->head);
+       head_frag.page.p = page;
+       head_frag.page_offset = frag_skb->data -
+               (unsigned char *)page_address(page);
+       head_frag.size = skb_headlen(frag_skb);
+       return head_frag;
+}
+
 /**
  *     skb_segment - Perform protocol segmentation on skb.
  *     @head_skb: buffer to segment
@@ -3664,15 +3677,19 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
                while (pos < offset + len) {
                        if (i >= nfrags) {
-                               BUG_ON(skb_headlen(list_skb));
-
                                i = 0;
                                nfrags = skb_shinfo(list_skb)->nr_frags;
                                frag = skb_shinfo(list_skb)->frags;
                                frag_skb = list_skb;
+                               if (!skb_headlen(list_skb)) {
+                                       BUG_ON(!nfrags);
+                               } else {
+                                       BUG_ON(!list_skb->head_frag);
 
-                               BUG_ON(!nfrags);
-
+                                       /* to make room for head_frag. */
+                                       i--;
+                                       frag--;
+                               }
                                if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
                                    skb_zerocopy_clone(nskb, frag_skb,
                                                       GFP_ATOMIC))
@@ -3689,7 +3706,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
                                goto err;
                        }
 
-                       *nskb_frag = *frag;
+                       *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
                        __skb_frag_ref(nskb_frag);
                        size = skb_frag_size(nskb_frag);
 
@@ -4181,7 +4198,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 
        skb_queue_tail(&sk->sk_error_queue, skb);
        if (!sock_flag(sk, SOCK_DEAD))
-               sk->sk_data_ready(sk);
+               sk->sk_error_report(sk);
        return 0;
 }
 EXPORT_SYMBOL(sock_queue_err_skb);
@@ -4893,7 +4910,7 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet);
  *
  * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
  */
-unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
 {
        const struct skb_shared_info *shinfo = skb_shinfo(skb);
        unsigned int thlen = 0;
@@ -4906,7 +4923,7 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
                        thlen += inner_tcp_hdrlen(skb);
        } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
                thlen = tcp_hdrlen(skb);
-       } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
+       } else if (unlikely(skb_is_gso_sctp(skb))) {
                thlen = sizeof(struct sctphdr);
        }
        /* UFO sets gso_size to the size of the fragmentation
@@ -4915,7 +4932,40 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
         */
        return thlen + shinfo->gso_size;
 }
-EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+
+/**
+ * skb_gso_network_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_network_seglen is used to determine the real size of the
+ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
+ *
+ * The MAC/L2 header is not accounted for.
+ */
+static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
+{
+       unsigned int hdr_len = skb_transport_header(skb) -
+                              skb_network_header(skb);
+
+       return hdr_len + skb_gso_transport_seglen(skb);
+}
+
+/**
+ * skb_gso_mac_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_mac_seglen is used to determine the real size of the
+ * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
+ * headers (TCP/UDP).
+ */
+static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
+{
+       unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+       return hdr_len + skb_gso_transport_seglen(skb);
+}
 
 /**
  * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
@@ -4957,19 +5007,20 @@ static inline bool skb_gso_size_check(const struct sk_buff *skb,
 }
 
 /**
- * skb_gso_validate_mtu - Return in case such skb fits a given MTU
+ * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
  *
  * @skb: GSO skb
  * @mtu: MTU to validate against
  *
- * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
- * once split.
+ * skb_gso_validate_network_len validates if a given skb will fit a
+ * wanted MTU once split. It considers L3 headers, L4 headers, and the
+ * payload.
  */
-bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
+bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
 {
        return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
 }
-EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
+EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
 
 /**
  * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
@@ -4988,13 +5039,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
 
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
+       int mac_len;
+
        if (skb_cow(skb, skb_headroom(skb)) < 0) {
                kfree_skb(skb);
                return NULL;
        }
 
-       memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
-               2 * ETH_ALEN);
+       mac_len = skb->data - skb_mac_header(skb);
+       memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+               mac_len - VLAN_HLEN - ETH_TLEN);
        skb->mac_header += VLAN_HLEN;
        return skb;
 }
index 507d8c6c431965242efa19f206a1eef28d0f2cff..e689496dfd8a7d15a02f828f0b9e98651d15c705 100644 (file)
@@ -1052,8 +1052,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
                        if (sk->sk_protocol != IPPROTO_TCP)
                                ret = -ENOTSUPP;
-                       else if (sk->sk_state != TCP_CLOSE)
-                               ret = -EBUSY;
                } else if (sk->sk_family != PF_RDS) {
                        ret = -ENOTSUPP;
                }
@@ -1062,8 +1060,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                                ret = -EINVAL;
                        else
                                sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
-                       break;
                }
+               break;
+
        default:
                ret = -ENOPROTOOPT;
                break;
@@ -2238,6 +2237,67 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 }
 EXPORT_SYMBOL(sk_page_frag_refill);
 
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+               int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
+               int first_coalesce)
+{
+       int sg_curr = *sg_curr_index, use = 0, rc = 0;
+       unsigned int size = *sg_curr_size;
+       struct page_frag *pfrag;
+       struct scatterlist *sge;
+
+       len -= size;
+       pfrag = sk_page_frag(sk);
+
+       while (len > 0) {
+               unsigned int orig_offset;
+
+               if (!sk_page_frag_refill(sk, pfrag)) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               use = min_t(int, len, pfrag->size - pfrag->offset);
+
+               if (!sk_wmem_schedule(sk, use)) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               sk_mem_charge(sk, use);
+               size += use;
+               orig_offset = pfrag->offset;
+               pfrag->offset += use;
+
+               sge = sg + sg_curr - 1;
+               if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
+                   sg->offset + sg->length == orig_offset) {
+                       sg->length += use;
+               } else {
+                       sge = sg + sg_curr;
+                       sg_unmark_end(sge);
+                       sg_set_page(sge, pfrag->page, use, orig_offset);
+                       get_page(pfrag->page);
+                       sg_curr++;
+
+                       if (sg_curr == MAX_SKB_FRAGS)
+                               sg_curr = 0;
+
+                       if (sg_curr == sg_start) {
+                               rc = -ENOSPC;
+                               break;
+                       }
+               }
+
+               len -= use;
+       }
+out:
+       *sg_curr_size = size;
+       *sg_curr_index = sg_curr;
+       return rc;
+}
+EXPORT_SYMBOL(sk_alloc_sg);
+
 static void __lock_sock(struct sock *sk)
        __releases(&sk->sk_lock.slock)
        __acquires(&sk->sk_lock.slock)
@@ -3266,6 +3326,27 @@ void proto_unregister(struct proto *prot)
 }
 EXPORT_SYMBOL(proto_unregister);
 
+int sock_load_diag_module(int family, int protocol)
+{
+       if (!protocol) {
+               if (!sock_is_registered(family))
+                       return -ENOENT;
+
+               return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+                                     NETLINK_SOCK_DIAG, family);
+       }
+
+#ifdef CONFIG_INET
+       if (family == AF_INET &&
+           !rcu_access_pointer(inet_protos[protocol]))
+               return -ENOENT;
+#endif
+
+       return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
+                             NETLINK_SOCK_DIAG, family, protocol);
+}
+EXPORT_SYMBOL(sock_load_diag_module);
+
 #ifdef CONFIG_PROC_FS
 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(proto_list_mutex)
index aee5642affd93bba82e06570a5c6de7e0b9116f6..a3392a8f9276cfa0f2ee08b8ea843e145edb06b0 100644 (file)
@@ -220,8 +220,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
                return -EINVAL;
 
        if (sock_diag_handlers[req->sdiag_family] == NULL)
-               request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-                               NETLINK_SOCK_DIAG, req->sdiag_family);
+               sock_load_diag_module(req->sdiag_family, 0);
 
        mutex_lock(&sock_diag_table_mutex);
        hndl = sock_diag_handlers[req->sdiag_family];
@@ -247,8 +246,7 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
        case TCPDIAG_GETSOCK:
        case DCCPDIAG_GETSOCK:
                if (inet_rcv_compat == NULL)
-                       request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-                                       NETLINK_SOCK_DIAG, AF_INET);
+                       sock_load_diag_module(AF_INET, 0);
 
                mutex_lock(&sock_diag_table_mutex);
                if (inet_rcv_compat != NULL)
@@ -281,14 +279,12 @@ static int sock_diag_bind(struct net *net, int group)
        case SKNLGRP_INET_TCP_DESTROY:
        case SKNLGRP_INET_UDP_DESTROY:
                if (!sock_diag_handlers[AF_INET])
-                       request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-                                      NETLINK_SOCK_DIAG, AF_INET);
+                       sock_load_diag_module(AF_INET, 0);
                break;
        case SKNLGRP_INET6_TCP_DESTROY:
        case SKNLGRP_INET6_UDP_DESTROY:
                if (!sock_diag_handlers[AF_INET6])
-                       request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-                                      NETLINK_SOCK_DIAG, AF_INET6);
+                       sock_load_diag_module(AF_INET6, 0);
                break;
        }
        return 0;
index d714f65782b7c3dc6d944ca572882b59b24f96f8..4f47f92459cc31675d4c8ec20dd3baea1b62cac1 100644 (file)
@@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
 
 static int net_msg_warn;       /* Unused, but still a sysctl */
 
+int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
+EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
                                void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = {
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &zero,
        },
+       {
+               .procname       = "fb_tunnels_only_for_init_net",
+               .data           = &sysctl_fb_tunnels_only_for_init_net,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
        { }
 };
 
index e65fcb45c3f6c1edc70fc9898ebe6404175b102f..13ad28ab1e799e32cf6ed4461a3c0e943717a951 100644 (file)
@@ -1031,6 +1031,7 @@ static struct pernet_operations dccp_v4_ops = {
        .init   = dccp_v4_init_net,
        .exit   = dccp_v4_exit_net,
        .exit_batch = dccp_v4_exit_batch,
+       .async  = true,
 };
 
 static int __init dccp_v4_init(void)
index 5df7857fc0f3aeefb2ed0324d97d13cb68551383..2f48c020f8c33276cf31cebbe6cfc8fe05290b3c 100644 (file)
@@ -1116,6 +1116,7 @@ static struct pernet_operations dccp_v6_ops = {
        .init   = dccp_v6_init_net,
        .exit   = dccp_v6_exit_net,
        .exit_batch = dccp_v6_exit_batch,
+       .async  = true,
 };
 
 static int __init dccp_v6_init(void)
index 15bdc002d90c0fba1a532a330348facdf50547c8..84cd4e3fd01b1dec5ed4234291dde60d4f1d1d61 100644 (file)
@@ -794,6 +794,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        if (skb == NULL)
                goto out_release;
 
+       if (sk->sk_state == DCCP_CLOSED) {
+               rc = -ENOTCONN;
+               goto out_discard;
+       }
+
        skb_reserve(skb, sk->sk_prot->max_header);
        rc = memcpy_from_msg(skb_put(skb, len), msg, len);
        if (rc != 0)
index cb54b81d0bd9e68629dbc742e1242686fbcfb83e..42a7b85b84e1f66e83387867e6a3eea750647277 100644 (file)
@@ -194,7 +194,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
                ds->ports[i].dn = cd->port_dn[i];
                ds->ports[i].cpu_dp = dst->cpu_dp;
 
-               if (dsa_is_user_port(ds, i))
+               if (!dsa_is_user_port(ds, i))
                        continue;
 
                ret = dsa_slave_create(&ds->ports[i]);
index 00589147f0422341803fd1ba02f66997beaa59c8..90e6df0351eb218a6621151960ca2109c3698580 100644 (file)
@@ -42,7 +42,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
                count += ops->get_sset_count(dev, sset);
 
        if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
-               count += ds->ops->get_sset_count(ds);
+               count += ds->ops->get_sset_count(ds, cpu_dp->index);
 
        return count;
 }
@@ -76,7 +76,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
                 * constructed earlier
                 */
                ds->ops->get_strings(ds, port, ndata);
-               count = ds->ops->get_sset_count(ds);
+               count = ds->ops->get_sset_count(ds, port);
                for (i = 0; i < count; i++) {
                        memmove(ndata + (i * len + sizeof(pfx)),
                                ndata + i * len, len - sizeof(pfx));
index 3376dad6dcfddb48088271ab8bc422f6631ba80d..18561af7a8f1da0ec92773376ecac9a72a64751c 100644 (file)
@@ -605,7 +605,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 
                count = 4;
                if (ds->ops->get_sset_count)
-                       count += ds->ops->get_sset_count(ds);
+                       count += ds->ops->get_sset_count(ds, dp->index);
 
                return count;
        }
index 974765b7d92a75546fe5ae5dbc332078a54f627a..275449b0d633586a4befec517ab3a36c5e3ba5a5 100644 (file)
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
        /* We need an ipv6hdr as minimum len when calling xmit */
        ldev->hard_header_len   = sizeof(struct ipv6hdr);
        ldev->flags             = IFF_BROADCAST | IFF_MULTICAST;
+       ldev->priv_flags        |= IFF_NO_QUEUE;
 
        ldev->netdev_ops        = &lowpan_netdev_ops;
        ldev->header_ops        = &lowpan_header_ops;
@@ -206,9 +207,13 @@ static inline void lowpan_netlink_fini(void)
 static int lowpan_device_event(struct notifier_block *unused,
                               unsigned long event, void *ptr)
 {
-       struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
+       struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+       struct wpan_dev *wpan_dev;
 
-       if (wdev->type != ARPHRD_IEEE802154)
+       if (ndev->type != ARPHRD_IEEE802154)
+               return NOTIFY_DONE;
+       wpan_dev = ndev->ieee802154_ptr;
+       if (!wpan_dev)
                return NOTIFY_DONE;
 
        switch (event) {
@@ -217,8 +222,8 @@ static int lowpan_device_event(struct notifier_block *unused,
                 * also delete possible lowpan interfaces which belongs
                 * to the wpan interface.
                 */
-               if (wdev->ieee802154_ptr->lowpan_dev)
-                       lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
+               if (wpan_dev->lowpan_dev)
+                       lowpan_dellink(wpan_dev->lowpan_dev, NULL);
                break;
        default:
                return NOTIFY_DONE;
index 85bf86ad6b1801066a4252af18b5b511070a9e08..a9ccb1322f6933a0c0a7d2ce1edc73476a3d18cc 100644 (file)
@@ -603,6 +603,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
 static struct pernet_operations lowpan_frags_ops = {
        .init = lowpan_frags_init_net,
        .exit = lowpan_frags_exit_net,
+       .async = true,
 };
 
 int __init lowpan_net_frag_init(void)
index cb7176cd4cd62219ec52f2b82172d9b522d5b24c..9104943c15bad7e4bceadbfd11ef1d54f089049c 100644 (file)
@@ -345,6 +345,7 @@ static void __net_exit cfg802154_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg802154_pernet_ops = {
        .exit = cfg802154_pernet_exit,
+       .async = true,
 };
 
 static int __init wpan_phy_class_init(void)
index f48fe6fc7e8c413d7d7e4d7d37d1d859a566e8fb..80dad301361d9cc589b3a406ebb8d32a4e4a6827 100644 (file)
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
          Network), but can be distributed all over the Internet. If you want
          to do that, say Y here and to "IP multicast routing" below.
 
+config IP_MROUTE_COMMON
+       bool
+       depends on IP_MROUTE || IPV6_MROUTE
+
 config IP_MROUTE
        bool "IP: multicast routing"
        depends on IP_MULTICAST
+       select IP_MROUTE_COMMON
        help
          This is used if you want your machine to act as a router for IP
          packets that have several destination addresses. It is needed on the
index 47a0a6649a9d52aa1378511ba6023ea9e78e187f..a07b7dd06defbd2faf846b89ac65eb57d4e9acb0 100644 (file)
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
 gre-y := gre_demux.o
 obj-$(CONFIG_NET_FOU) += fou.o
index 35d646a62ad454ddced6a614dc7e10b67447a3f0..737d11bc8838bc175df587db4acd3fce3146c60d 100644 (file)
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
        if (r->tos && (r->tos != fl4->flowi4_tos))
                return 0;
 
+       if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->sport_range) &&
+           !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->dport_range) &&
+           !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+               return 0;
+
        return 1;
 }
 
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        }
 #endif
 
+       if (fib_rule_requires_fldissect(rule))
+               net->ipv4.fib_rules_require_fldissect++;
+
        rule4->src_len = frh->src_len;
        rule4->srcmask = inet_make_mask(rule4->src_len);
        rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
                net->ipv4.fib_num_tclassid_users--;
 #endif
        net->ipv4.fib_has_custom_rules = true;
+
+       if (net->ipv4.fib_rules_require_fldissect &&
+           fib_rule_requires_fldissect(rule))
+               net->ipv4.fib_rules_require_fldissect--;
 errout:
        return err;
 }
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
                goto fail;
        net->ipv4.rules_ops = ops;
        net->ipv4.fib_has_custom_rules = false;
+       net->ipv4.fib_rules_require_fldissect = 0;
        return 0;
 
 fail:
index cd46d7666598e6aa4b01eb2442e550c90d485d67..e7c602c600ace989d4d204c7704671a1088681e9 100644 (file)
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
                fnhe = rcu_dereference_protected(hash[i].chain, 1);
                while (fnhe) {
                        struct fib_nh_exception *next;
-                       
+
                        next = rcu_dereference_protected(fnhe->fnhe_next, 1);
 
                        rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi->fib_nhs > 1) {
-               int h = fib_multipath_hash(res->fi, fl4, skb);
+               int h = fib_multipath_hash(net, fl4, skb, NULL);
 
                fib_select_multipath(res, h);
        }
index 5530cd6fdbc7d44dc259653e0b26d2065d007f1b..62243a8abf92a58b7932a259c15f94bca3b8560d 100644 (file)
@@ -50,6 +50,7 @@
 
 #define VERSION "0.409"
 
+#include <linux/cache.h>
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
  */
 static const int sync_pages = 128;
 
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
 
 static inline struct tnode *tn_info(struct key_vector *kv)
 {
index 1540db65241a6fd4d96b00546f13a3e3d3cd1815..d3e1a9af478b0b243dedbe200cf8b4bc0430797b 100644 (file)
@@ -1081,6 +1081,7 @@ static struct pernet_operations fou_net_ops = {
        .exit = fou_exit_net,
        .id   = &fou_net_id,
        .size = sizeof(struct fou_net),
+       .async = true,
 };
 
 static int __init fou_init(void)
index a383f299ce246bd84b28bad1909bae600ad699c0..4e5bc4b2f14e6786ceb7d63e5902f8fc17819dfa 100644 (file)
@@ -53,8 +53,7 @@ static DEFINE_MUTEX(inet_diag_table_mutex);
 static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
 {
        if (!inet_diag_table[proto])
-               request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
-                              NETLINK_SOCK_DIAG, AF_INET, proto);
+               sock_load_diag_module(AF_INET, proto);
 
        mutex_lock(&inet_diag_table_mutex);
        if (!inet_diag_table[proto])
index 26a3d0315728ed2b16ca46080a3546668100bc8e..e8ec28999f5ce0c5d496e9a97ca1748b18db0cf0 100644 (file)
@@ -119,6 +119,9 @@ static void inet_frag_secret_rebuild(struct inet_frags *f)
 
 static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
 {
+       if (!hlist_unhashed(&q->list_evictor))
+               return false;
+
        return q->net->low_thresh == 0 ||
               frag_mem_limit(q->net) >= q->net->low_thresh;
 }
index 914d56928578c6251c9e4b03c5e8523e8dd31184..1f04bd91fc2e999ddb82f4be92d39d229166b691 100644 (file)
@@ -6,6 +6,7 @@
  *  Authors:   Andrey V. Savochkin <saw@msu.ru>
  */
 
+#include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -51,7 +52,7 @@
  *             daddr: unchangeable
  */
 
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
 
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
index 2dd21c3281a1cb4194dbb34dad086b701716220d..b54b948b059608fc3157fedf40e61519321c6912 100644 (file)
@@ -55,7 +55,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
        if (skb->ignore_df)
                return false;
 
-       if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+       if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
                return false;
 
        return true;
index 45d97e9b2759dc7430f56cd4e563eab5bf7d3e44..9ab1aa2f7660786538fa2b97a33aa0760c57585f 100644 (file)
@@ -522,6 +522,7 @@ static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
                        __be16 proto)
 {
+       struct ip_tunnel *tunnel = netdev_priv(dev);
        struct ip_tunnel_info *tun_info;
        const struct ip_tunnel_key *key;
        struct rtable *rt = NULL;
@@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
        if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
                goto err_free_rt;
 
-       flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+       flags = tun_info->key.tun_flags &
+               (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
        gre_build_header(skb, tunnel_hlen, flags, proto,
-                        tunnel_id_to_key32(tun_info->key.tun_id), 0);
+                        tunnel_id_to_key32(tun_info->key.tun_id),
+                        (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 
        df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 
@@ -970,9 +973,6 @@ static void __gre_tunnel_init(struct net_device *dev)
 
        t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
-       dev->needed_headroom    = LL_MAX_HEADER + t_hlen + 4;
-       dev->mtu                = ETH_DATA_LEN - t_hlen - 4;
-
        dev->features           |= GRE_FEATURES;
        dev->hw_features        |= GRE_FEATURES;
 
@@ -1044,6 +1044,7 @@ static struct pernet_operations ipgre_net_ops = {
        .exit_batch = ipgre_exit_batch_net,
        .id   = &ipgre_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1290,8 +1291,6 @@ static int erspan_tunnel_init(struct net_device *dev)
                       erspan_hdr_len(tunnel->erspan_ver);
        t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
-       dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
-       dev->mtu = ETH_DATA_LEN - t_hlen - 4;
        dev->features           |= GRE_FEATURES;
        dev->hw_features        |= GRE_FEATURES;
        dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
@@ -1322,6 +1321,12 @@ static void ipgre_tap_setup(struct net_device *dev)
        ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
+bool is_gretap_dev(const struct net_device *dev)
+{
+       return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
                         struct nlattr *tb[], struct nlattr *data[],
                         struct netlink_ext_ack *extack)
@@ -1623,6 +1628,7 @@ static struct pernet_operations ipgre_tap_net_ops = {
        .exit_batch = ipgre_tap_exit_batch_net,
        .id   = &gre_tap_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int __net_init erspan_init_net(struct net *net)
@@ -1641,6 +1647,7 @@ static struct pernet_operations erspan_net_ops = {
        .exit_batch = erspan_exit_batch_net,
        .id   = &erspan_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int __init ipgre_init(void)
index 57fc13c6ab2b7843a4fdb11680c82fc342f465c7..7582713dd18f37b5c27cdc85ff62626a8ad4f435 100644 (file)
@@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
        struct net_device *dev = skb->dev;
        struct net *net = dev_net(dev);
 
-       for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
+       for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
                struct sock *sk = ra->sk;
 
                /* If socket is bound to an interface, only report
@@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
                 */
                if (sk && inet_sk(sk)->inet_num == protocol &&
                    (!sk->sk_bound_dev_if ||
-                    sk->sk_bound_dev_if == dev->ifindex) &&
-                   net_eq(sock_net(sk), net)) {
+                    sk->sk_bound_dev_if == dev->ifindex)) {
                        if (ip_is_fragment(ip_hdr(skb))) {
                                if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
                                        return true;
index e8e675be60ec0044007c660bae1bb4d12c9a484e..66340ab750e69ff5775f7996192839a24ddc6e65 100644 (file)
@@ -248,7 +248,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
 
        /* common case: seglen is <= mtu
         */
-       if (skb_gso_validate_mtu(skb, mtu))
+       if (skb_gso_validate_network_len(skb, mtu))
                return ip_finish_output2(net, sk, skb);
 
        /* Slowpath -  GSO segment length exceeds the egress MTU.
index 74c962b9b09c3c234388686fad4fb217c4e4a36e..5ad2d8ed3a3fe2aa51d814af442df7ff5e074d3e 100644 (file)
@@ -322,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
        return 0;
 }
 
-
-/* Special input handler for packets caught by router alert option.
-   They are selected only by protocol field, and then processed likely
-   local ones; but only if someone wants them! Otherwise, router
-   not running rsvpd will kill RSVP.
-
-   It is user level problem, what it will make with them.
-   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
-   but receiver should be enough clever f.e. to forward mtrace requests,
-   sent to multicast group to reach destination designated router.
- */
-struct ip_ra_chain __rcu *ip_ra_chain;
-
-
 static void ip_ra_destroy_rcu(struct rcu_head *head)
 {
        struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
@@ -349,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 {
        struct ip_ra_chain *ra, *new_ra;
        struct ip_ra_chain __rcu **rap;
+       struct net *net = sock_net(sk);
 
        if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
                return -EINVAL;
 
        new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
-       for (rap = &ip_ra_chain;
-            (ra = rtnl_dereference(*rap)) != NULL;
+       mutex_lock(&net->ipv4.ra_mutex);
+       for (rap = &net->ipv4.ra_chain;
+            (ra = rcu_dereference_protected(*rap,
+                       lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
             rap = &ra->next) {
                if (ra->sk == sk) {
                        if (on) {
+                               mutex_unlock(&net->ipv4.ra_mutex);
                                kfree(new_ra);
                                return -EADDRINUSE;
                        }
                        /* dont let ip_call_ra_chain() use sk again */
                        ra->sk = NULL;
                        RCU_INIT_POINTER(*rap, ra->next);
+                       mutex_unlock(&net->ipv4.ra_mutex);
 
                        if (ra->destructor)
                                ra->destructor(sk);
@@ -379,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on,
                        return 0;
                }
        }
-       if (!new_ra)
+       if (!new_ra) {
+               mutex_unlock(&net->ipv4.ra_mutex);
                return -ENOBUFS;
+       }
        new_ra->sk = sk;
        new_ra->destructor = destructor;
 
        RCU_INIT_POINTER(new_ra->next, ra);
        rcu_assign_pointer(*rap, new_ra);
        sock_hold(sk);
+       mutex_unlock(&net->ipv4.ra_mutex);
 
        return 0;
 }
@@ -586,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname)
        case MCAST_LEAVE_GROUP:
        case MCAST_LEAVE_SOURCE_GROUP:
        case MCAST_UNBLOCK_SOURCE:
-       case IP_ROUTER_ALERT:
                return true;
        }
        return false;
@@ -639,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 
        /* If optlen==0, it is equivalent to val == 0 */
 
+       if (optname == IP_ROUTER_ALERT)
+               return ip_ra_control(sk, val ? 1 : 0, NULL);
        if (ip_mroute_opt(optname))
                return ip_mroute_setsockopt(sk, optname, optval, optlen);
 
@@ -1149,9 +1144,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                        goto e_inval;
                inet->mc_all = val;
                break;
-       case IP_ROUTER_ALERT:
-               err = ip_ra_control(sk, val ? 1 : 0, NULL);
-               break;
 
        case IP_FREEBIND:
                if (optlen < 1)
index d786a8441bce61d2624d0f4de94246345cc09119..5fcb17cb426bfcce40add19bdf664aec66775b1e 100644 (file)
@@ -290,22 +290,6 @@ static struct net_device *__ip_tunnel_create(struct net *net,
        return ERR_PTR(err);
 }
 
-static inline void init_tunnel_flow(struct flowi4 *fl4,
-                                   int proto,
-                                   __be32 daddr, __be32 saddr,
-                                   __be32 key, __u8 tos, int oif,
-                                   __u32 mark)
-{
-       memset(fl4, 0, sizeof(*fl4));
-       fl4->flowi4_oif = oif;
-       fl4->daddr = daddr;
-       fl4->saddr = saddr;
-       fl4->flowi4_tos = tos;
-       fl4->flowi4_proto = proto;
-       fl4->fl4_gre_key = key;
-       fl4->flowi4_mark = mark;
-}
-
 static int ip_tunnel_bind_dev(struct net_device *dev)
 {
        struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
                struct flowi4 fl4;
                struct rtable *rt;
 
-               init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
-                                iph->saddr, tunnel->parms.o_key,
-                                RT_TOS(iph->tos), tunnel->parms.link,
-                                tunnel->fwmark);
+               ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+                                   iph->saddr, tunnel->parms.o_key,
+                                   RT_TOS(iph->tos), tunnel->parms.link,
+                                   tunnel->fwmark);
                rt = ip_route_output_key(tunnel->net, &fl4);
 
                if (!IS_ERR(rt)) {
@@ -363,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
        struct net_device *dev;
        int t_hlen;
 
-       BUG_ON(!itn->fb_tunnel_dev);
-       dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+       dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
        if (IS_ERR(dev))
                return ERR_CAST(dev);
 
@@ -581,8 +564,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
                else if (skb->protocol == htons(ETH_P_IPV6))
                        tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
        }
-       init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
-                        RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+       ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+                           RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
        if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
                goto tx_error;
        rt = ip_route_output_key(tunnel->net, &fl4);
@@ -710,16 +693,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
                }
        }
 
-       if (tunnel->fwmark) {
-               init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-                                tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-                                tunnel->fwmark);
-       }
-       else {
-               init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-                                tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-                                skb->mark);
-       }
+       ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+                           tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+                           tunnel->fwmark);
 
        if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
                goto tx_error;
@@ -845,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
        struct net *net = t->net;
        struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 
-       BUG_ON(!itn->fb_tunnel_dev);
        switch (cmd) {
        case SIOCGETTUNNEL:
                if (dev == itn->fb_tunnel_dev) {
@@ -870,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
                                p->o_key = 0;
                }
 
-               t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+               t = ip_tunnel_find(itn, p, itn->type);
 
                if (cmd == SIOCADDTUNNEL) {
                        if (!t) {
@@ -1014,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
        struct ip_tunnel_parm parms;
        unsigned int i;
 
+       itn->rtnl_link_ops = ops;
        for (i = 0; i < IP_TNL_HASH_SIZE; i++)
                INIT_HLIST_HEAD(&itn->tunnels[i]);
 
-       if (!ops) {
+       if (!ops || !net_has_fallback_tunnels(net)) {
+               struct ip_tunnel_net *it_init_net;
+
+               it_init_net = net_generic(&init_net, ip_tnl_net_id);
+               itn->type = it_init_net->type;
                itn->fb_tunnel_dev = NULL;
                return 0;
        }
@@ -1035,6 +1015,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
                itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
                itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
                ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+               itn->type = itn->fb_tunnel_dev->type;
        }
        rtnl_unlock();
 
@@ -1042,10 +1023,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
 
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
+static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
+                             struct list_head *head,
                              struct rtnl_link_ops *ops)
 {
-       struct net *net = dev_net(itn->fb_tunnel_dev);
        struct net_device *dev, *aux;
        int h;
 
@@ -1077,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
        rtnl_lock();
        list_for_each_entry(net, net_list, exit_list) {
                itn = net_generic(net, id);
-               ip_tunnel_destroy(itn, &list, ops);
+               ip_tunnel_destroy(net, itn, &list, ops);
        }
        unregister_netdevice_many(&list);
        rtnl_unlock();
index 51b1669334fe6baeea0045fcfdd631700c1ccbf2..b10bf563afd97442f2795b976e81ef3534900249 100644 (file)
@@ -454,6 +454,7 @@ static struct pernet_operations vti_net_ops = {
        .exit_batch = vti_exit_batch_net,
        .id   = &vti_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
index c891235b4966cc4994d81acf39070d27c3060f13..9c5a4d164f095dd98cfcf51e9caba90ffb4ac0e3 100644 (file)
@@ -669,6 +669,7 @@ static struct pernet_operations ipip_net_ops = {
        .exit_batch = ipip_exit_batch_net,
        .id   = &ipip_net_id,
        .size = sizeof(struct ip_tunnel_net),
+       .async = true,
 };
 
 static int __init ipip_init(void)
index 7c7ac9d32e77961a4fa7b5a4a05fe785b6a6810d..f6be5db16da2640a44032770ca3ef8b90bc14b29 100644 (file)
@@ -28,9 +28,9 @@
 
 #include <linux/uaccess.h>
 #include <linux/types.h>
+#include <linux/cache.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
@@ -52,7 +52,6 @@
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/route.h>
-#include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
 #include <net/raw.h>
@@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
  * In this case data path is free of exclusive locks at all.
  */
 
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
 
 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static void ipmr_free_table(struct mr_table *mrt);
@@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                          struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
                             struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-                             struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
                                 int cmd);
 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
 #define ipmr_for_each_table(mrt, net) \
        list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+                                          struct mr_table *mrt)
+{
+       struct mr_table *ret;
+
+       if (!mrt)
+               ret = list_entry_rcu(net->ipv4.mr_tables.next,
+                                    struct mr_table, list);
+       else
+               ret = list_entry_rcu(mrt->list.next,
+                                    struct mr_table, list);
+
+       if (&ret->list == &net->ipv4.mr_tables)
+               return NULL;
+       return ret;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
        struct mr_table *mrt;
@@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
 #define ipmr_for_each_table(mrt, net) \
        for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+                                          struct mr_table *mrt)
+{
+       if (!mrt)
+               return net->ipv4.mrt;
+       return NULL;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
        return net->ipv4.mrt;
@@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
 }
 
 static const struct rhashtable_params ipmr_rht_params = {
-       .head_offset = offsetof(struct mfc_cache, mnode),
+       .head_offset = offsetof(struct mr_mfc, mnode),
        .key_offset = offsetof(struct mfc_cache, cmparg),
        .key_len = sizeof(struct mfc_cache_cmp_arg),
        .nelem_hint = 3,
@@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
        .automatic_shrinking = true,
 };
 
+static void ipmr_new_table_set(struct mr_table *mrt,
+                              struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+       list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+       .mfc_mcastgrp = htonl(INADDR_ANY),
+       .mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+       .rht_params = &ipmr_rht_params,
+       .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 {
        struct mr_table *mrt;
@@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
        if (mrt)
                return mrt;
 
-       mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-       if (!mrt)
-               return ERR_PTR(-ENOMEM);
-       write_pnet(&mrt->net, net);
-       mrt->id = id;
-
-       rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
-       INIT_LIST_HEAD(&mrt->mfc_cache_list);
-       INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-       timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
-       mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
-       list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
-       return mrt;
+       return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+                             ipmr_expire_process, ipmr_new_table_set);
 }
 
 static void ipmr_free_table(struct mr_table *mrt)
@@ -760,14 +785,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 
 static void ipmr_cache_free_rcu(struct rcu_head *head)
 {
-       struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+       struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 
-       kmem_cache_free(mrt_cachep, c);
+       kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
 }
 
 void ipmr_cache_free(struct mfc_cache *c)
 {
-       call_rcu(&c->rcu, ipmr_cache_free_rcu);
+       call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
 }
 EXPORT_SYMBOL(ipmr_cache_free);
 
@@ -782,7 +807,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 
        atomic_dec(&mrt->cache_resolve_queue_len);
 
-       while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+       while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
                if (ip_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct iphdr));
@@ -806,9 +831,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 static void ipmr_expire_process(struct timer_list *t)
 {
        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
-       unsigned long now;
+       struct mr_mfc *c, *next;
        unsigned long expires;
-       struct mfc_cache *c, *next;
+       unsigned long now;
 
        if (!spin_trylock(&mfc_unres_lock)) {
                mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -830,8 +855,8 @@ static void ipmr_expire_process(struct timer_list *t)
                }
 
                list_del(&c->list);
-               mroute_netlink_event(mrt, c, RTM_DELROUTE);
-               ipmr_destroy_unres(mrt, c);
+               mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+               ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
        }
 
        if (!list_empty(&mrt->mfc_unres_queue))
@@ -842,7 +867,7 @@ static void ipmr_expire_process(struct timer_list *t)
 }
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
                                   unsigned char *ttls)
 {
        int vifi;
@@ -944,6 +969,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
        ip_rt_multicast_event(in_dev);
 
        /* Fill in the VIF structures */
+       vif_device_init(v, dev, vifc->vifc_rate_limit,
+                       vifc->vifc_threshold,
+                       vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+                       (VIFF_TUNNEL | VIFF_REGISTER));
 
        attr.orig_dev = dev;
        if (!switchdev_port_attr_get(dev, &attr)) {
@@ -952,20 +981,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
        } else {
                v->dev_parent_id.id_len = 0;
        }
-       v->rate_limit = vifc->vifc_rate_limit;
+
        v->local = vifc->vifc_lcl_addr.s_addr;
        v->remote = vifc->vifc_rmt_addr.s_addr;
-       v->flags = vifc->vifc_flags;
-       if (!mrtsock)
-               v->flags |= VIFF_STATIC;
-       v->threshold = vifc->vifc_threshold;
-       v->bytes_in = 0;
-       v->bytes_out = 0;
-       v->pkt_in = 0;
-       v->pkt_out = 0;
-       v->link = dev->ifindex;
-       if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
-               v->link = dev_get_iflink(dev);
 
        /* And finish update writing critical data */
        write_lock_bh(&mrt_lock);
@@ -988,33 +1006,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = origin
        };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode)
-               return c;
-
-       return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
-                                                   int vifi)
-{
-       struct mfc_cache_cmp_arg arg = {
-                       .mfc_mcastgrp = htonl(INADDR_ANY),
-                       .mfc_origin = htonl(INADDR_ANY)
-       };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode)
-               if (c->mfc_un.res.ttls[vifi] < 255)
-                       return c;
 
-       return NULL;
+       return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
@@ -1025,25 +1018,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = htonl(INADDR_ANY)
        };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c, *proxy;
 
        if (mcastgrp == htonl(INADDR_ANY))
-               goto skip;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode) {
-               if (c->mfc_un.res.ttls[vifi] < 255)
-                       return c;
-
-               /* It's ok if the vifi is part of the static tree */
-               proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
-               if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
-                       return c;
-       }
-
-skip:
-       return ipmr_cache_find_any_parent(mrt, vifi);
+               return mr_mfc_find_any_parent(mrt, vifi);
+       return mr_mfc_find_any(mrt, vifi, &arg);
 }
 
 /* Look for a (S,G,iif) entry if parent != -1 */
@@ -1055,15 +1033,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = origin,
        };
-       struct rhlist_head *tmp, *list;
-       struct mfc_cache *c;
-
-       list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-       rhl_for_each_entry_rcu(c, tmp, list, mnode)
-               if (parent == -1 || parent == c->mfc_parent)
-                       return c;
 
-       return NULL;
+       return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
 /* Allocate a multicast cache entry */
@@ -1072,9 +1043,9 @@ static struct mfc_cache *ipmr_cache_alloc(void)
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 
        if (c) {
-               c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-               c->mfc_un.res.minvif = MAXVIFS;
-               refcount_set(&c->mfc_un.res.refcount, 1);
+               c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+               c->_c.mfc_un.res.minvif = MAXVIFS;
+               refcount_set(&c->_c.mfc_un.res.refcount, 1);
        }
        return c;
 }
@@ -1084,8 +1055,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 
        if (c) {
-               skb_queue_head_init(&c->mfc_un.unres.unresolved);
-               c->mfc_un.unres.expires = jiffies + 10*HZ;
+               skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+               c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
        }
        return c;
 }
@@ -1098,12 +1069,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
        struct nlmsgerr *e;
 
        /* Play the pending entries through our router */
-       while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+       while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
                if (ip_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct iphdr));
 
-                       if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+                       if (mr_fill_mroute(mrt, skb, &c->_c,
+                                          nlmsg_data(nlh)) > 0) {
                                nlh->nlmsg_len = skb_tail_pointer(skb) -
                                                 (u8 *)nlh;
                        } else {
@@ -1211,7 +1183,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
        int err;
 
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+       list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
                if (c->mfc_mcastgrp == iph->daddr &&
                    c->mfc_origin == iph->saddr) {
                        found = true;
@@ -1230,12 +1202,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                }
 
                /* Fill in the new cache entry */
-               c->mfc_parent   = -1;
+               c->_c.mfc_parent = -1;
                c->mfc_origin   = iph->saddr;
                c->mfc_mcastgrp = iph->daddr;
 
                /* Reflect first query at mrouted. */
                err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
                if (err < 0) {
                        /* If the report failed throw the cache entry
                           out - Brad Parker
@@ -1248,15 +1221,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                }
 
                atomic_inc(&mrt->cache_resolve_queue_len);
-               list_add(&c->list, &mrt->mfc_unres_queue);
+               list_add(&c->_c.list, &mrt->mfc_unres_queue);
                mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 
                if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
-                       mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+                       mod_timer(&mrt->ipmr_expire_timer,
+                                 c->_c.mfc_un.unres.expires);
        }
 
        /* See if we can append the packet */
-       if (c->mfc_un.unres.unresolved.qlen > 3) {
+       if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
                kfree_skb(skb);
                err = -ENOBUFS;
        } else {
@@ -1264,7 +1238,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                        skb->dev = dev;
                        skb->skb_iif = dev->ifindex;
                }
-               skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+               skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
                err = 0;
        }
 
@@ -1286,8 +1260,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
        rcu_read_unlock();
        if (!c)
                return -ENOENT;
-       rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
-       list_del_rcu(&c->list);
+       rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+       list_del_rcu(&c->_c.list);
        call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
        mroute_netlink_event(mrt, c, RTM_DELROUTE);
        ipmr_cache_put(c);
@@ -1299,6 +1273,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
                        struct mfcctl *mfc, int mrtsock, int parent)
 {
        struct mfc_cache *uc, *c;
+       struct mr_mfc *_uc;
        bool found;
        int ret;
 
@@ -1312,10 +1287,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
        rcu_read_unlock();
        if (c) {
                write_lock_bh(&mrt_lock);
-               c->mfc_parent = mfc->mfcc_parent;
-               ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+               c->_c.mfc_parent = mfc->mfcc_parent;
+               ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
                if (!mrtsock)
-                       c->mfc_flags |= MFC_STATIC;
+                       c->_c.mfc_flags |= MFC_STATIC;
                write_unlock_bh(&mrt_lock);
                call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
                                              mrt->id);
@@ -1333,28 +1308,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 
        c->mfc_origin = mfc->mfcc_origin.s_addr;
        c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
-       c->mfc_parent = mfc->mfcc_parent;
-       ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+       c->_c.mfc_parent = mfc->mfcc_parent;
+       ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
        if (!mrtsock)
-               c->mfc_flags |= MFC_STATIC;
+               c->_c.mfc_flags |= MFC_STATIC;
 
-       ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+       ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
                                  ipmr_rht_params);
        if (ret) {
                pr_err("ipmr: rhtable insert error %d\n", ret);
                ipmr_cache_free(c);
                return ret;
        }
-       list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+       list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
        /* Check to see if we resolved a queued list. If so we
         * need to send on the frames and tidy up.
         */
        found = false;
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+       list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+               uc = (struct mfc_cache *)_uc;
                if (uc->mfc_origin == c->mfc_origin &&
                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
-                       list_del(&uc->list);
+                       list_del(&_uc->list);
                        atomic_dec(&mrt->cache_resolve_queue_len);
                        found = true;
                        break;
@@ -1377,7 +1353,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
        struct net *net = read_pnet(&mrt->net);
-       struct mfc_cache *c, *tmp;
+       struct mr_mfc *c, *tmp;
+       struct mfc_cache *cache;
        LIST_HEAD(list);
        int i;
 
@@ -1395,18 +1372,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
                        continue;
                rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
                list_del_rcu(&c->list);
-               call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+               cache = (struct mfc_cache *)c;
+               call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
                                              mrt->id);
-               mroute_netlink_event(mrt, c, RTM_DELROUTE);
-               ipmr_cache_put(c);
+               mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+               ipmr_cache_put(cache);
        }
 
        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
                spin_lock_bh(&mfc_unres_lock);
                list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
                        list_del(&c->list);
-                       mroute_netlink_event(mrt, c, RTM_DELROUTE);
-                       ipmr_destroy_unres(mrt, c);
+                       cache = (struct mfc_cache *)c;
+                       mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+                       ipmr_destroy_unres(mrt, cache);
                }
                spin_unlock_bh(&mfc_unres_lock);
        }
@@ -1420,7 +1399,7 @@ static void mrtsock_destruct(struct sock *sk)
        struct net *net = sock_net(sk);
        struct mr_table *mrt;
 
-       ASSERT_RTNL();
+       rtnl_lock();
        ipmr_for_each_table(mrt, net) {
                if (sk == rtnl_dereference(mrt->mroute_sk)) {
                        IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1432,6 +1411,7 @@ static void mrtsock_destruct(struct sock *sk)
                        mroute_clean_tables(mrt, false);
                }
        }
+       rtnl_unlock();
 }
 
 /* Socket options and virtual interface manipulation. The whole
@@ -1496,8 +1476,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
                if (sk != rcu_access_pointer(mrt->mroute_sk)) {
                        ret = -EACCES;
                } else {
+                       /* We need to unlock here because mrtsock_destruct takes
+                        * care of rtnl itself and we can't change that due to
+                        * the IP_ROUTER_ALERT setsockopt which runs without it.
+                        */
+                       rtnl_unlock();
                        ret = ip_ra_control(sk, 0, NULL);
-                       goto out_unlock;
+                       goto out;
                }
                break;
        case MRT_ADD_VIF:
@@ -1609,6 +1594,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
        }
 out_unlock:
        rtnl_unlock();
+out:
        return ret;
 }
 
@@ -1698,9 +1684,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
                rcu_read_lock();
                c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
                        rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1772,9 +1758,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
                rcu_read_lock();
                c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
                        rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1998,26 +1984,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                          struct net_device *dev, struct sk_buff *skb,
-                         struct mfc_cache *cache, int local)
+                         struct mfc_cache *c, int local)
 {
        int true_vifi = ipmr_find_vif(mrt, dev);
        int psend = -1;
        int vif, ct;
 
-       vif = cache->mfc_parent;
-       cache->mfc_un.res.pkt++;
-       cache->mfc_un.res.bytes += skb->len;
-       cache->mfc_un.res.lastuse = jiffies;
+       vif = c->_c.mfc_parent;
+       c->_c.mfc_un.res.pkt++;
+       c->_c.mfc_un.res.bytes += skb->len;
+       c->_c.mfc_un.res.lastuse = jiffies;
 
-       if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+       if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
                struct mfc_cache *cache_proxy;
 
                /* For an (*,G) entry, we only check that the incomming
                 * interface is part of the static tree.
                 */
-               cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+               cache_proxy = mr_mfc_find_any_parent(mrt, vif);
                if (cache_proxy &&
-                   cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+                   cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
                        goto forward;
        }
 
@@ -2038,7 +2024,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                        goto dont_forward;
                }
 
-               cache->mfc_un.res.wrong_if++;
+               c->_c.mfc_un.res.wrong_if++;
 
                if (true_vifi >= 0 && mrt->mroute_do_assert &&
                    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2047,10 +2033,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                     * large chunk of pimd to kernel. Ough... --ANK
                     */
                    (mrt->mroute_do_pim ||
-                    cache->mfc_un.res.ttls[true_vifi] < 255) &&
+                    c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
                    time_after(jiffies,
-                              cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-                       cache->mfc_un.res.last_assert = jiffies;
+                              c->_c.mfc_un.res.last_assert +
+                              MFC_ASSERT_THRESH)) {
+                       c->_c.mfc_un.res.last_assert = jiffies;
                        ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
                }
                goto dont_forward;
@@ -2061,33 +2048,33 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
        mrt->vif_table[vif].bytes_in += skb->len;
 
        /* Forward the frame */
-       if (cache->mfc_origin == htonl(INADDR_ANY) &&
-           cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+       if (c->mfc_origin == htonl(INADDR_ANY) &&
+           c->mfc_mcastgrp == htonl(INADDR_ANY)) {
                if (true_vifi >= 0 &&
-                   true_vifi != cache->mfc_parent &&
+                   true_vifi != c->_c.mfc_parent &&
                    ip_hdr(skb)->ttl >
-                               cache->mfc_un.res.ttls[cache->mfc_parent]) {
+                               c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
                        /* It's an (*,*) entry and the packet is not coming from
                         * the upstream: forward the packet to the upstream
                         * only.
                         */
-                       psend = cache->mfc_parent;
+                       psend = c->_c.mfc_parent;
                        goto last_forward;
                }
                goto dont_forward;
        }
-       for (ct = cache->mfc_un.res.maxvif - 1;
-            ct >= cache->mfc_un.res.minvif; ct--) {
+       for (ct = c->_c.mfc_un.res.maxvif - 1;
+            ct >= c->_c.mfc_un.res.minvif; ct--) {
                /* For (*,G) entry, don't forward to the incoming interface */
-               if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+               if ((c->mfc_origin != htonl(INADDR_ANY) ||
                     ct != true_vifi) &&
-                   ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+                   ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
                        if (psend != -1) {
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
                                if (skb2)
                                        ipmr_queue_xmit(net, mrt, true_vifi,
-                                                       skb2, cache, psend);
+                                                       skb2, c, psend);
                        }
                        psend = ct;
                }
@@ -2099,9 +2086,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 
                        if (skb2)
                                ipmr_queue_xmit(net, mrt, true_vifi, skb2,
-                                               cache, psend);
+                                               c, psend);
                } else {
-                       ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+                       ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
                        return;
                }
        }
@@ -2299,62 +2286,6 @@ static int pim_rcv(struct sk_buff *skb)
 }
 #endif
 
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-                             struct mfc_cache *c, struct rtmsg *rtm)
-{
-       struct rta_mfc_stats mfcs;
-       struct nlattr *mp_attr;
-       struct rtnexthop *nhp;
-       unsigned long lastuse;
-       int ct;
-
-       /* If cache is unresolved, don't try to parse IIF and OIF */
-       if (c->mfc_parent >= MAXVIFS) {
-               rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-               return -ENOENT;
-       }
-
-       if (VIF_EXISTS(mrt, c->mfc_parent) &&
-           nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
-               return -EMSGSIZE;
-
-       if (c->mfc_flags & MFC_OFFLOAD)
-               rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
-       if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
-               return -EMSGSIZE;
-
-       for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-               if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-                       if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
-                               nla_nest_cancel(skb, mp_attr);
-                               return -EMSGSIZE;
-                       }
-
-                       nhp->rtnh_flags = 0;
-                       nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-                       nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
-                       nhp->rtnh_len = sizeof(*nhp);
-               }
-       }
-
-       nla_nest_end(skb, mp_attr);
-
-       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-       mfcs.mfcs_packets = c->mfc_un.res.pkt;
-       mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-       mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-       if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-                             RTA_PAD))
-               return -EMSGSIZE;
-
-       rtm->rtm_type = RTN_MULTICAST;
-       return 1;
-}
-
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
                   __be32 saddr, __be32 daddr,
                   struct rtmsg *rtm, u32 portid)
@@ -2412,7 +2343,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
        }
 
        read_lock(&mrt_lock);
-       err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+       err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
        read_unlock(&mrt_lock);
        rcu_read_unlock();
        return err;
@@ -2440,7 +2371,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
                goto nla_put_failure;
        rtm->rtm_type     = RTN_MULTICAST;
        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-       if (c->mfc_flags & MFC_STATIC)
+       if (c->_c.mfc_flags & MFC_STATIC)
                rtm->rtm_protocol = RTPROT_STATIC;
        else
                rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2449,7 +2380,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
        if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
            nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
                goto nla_put_failure;
-       err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+       err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
        /* do not break the dump if cache is unresolved */
        if (err < 0 && err != -ENOENT)
                goto nla_put_failure;
@@ -2462,6 +2393,14 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
        return -EMSGSIZE;
 }
 
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                            u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+                            int flags)
+{
+       return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+                               cmd, flags);
+}
+
 static size_t mroute_msgsize(bool unresolved, int maxvif)
 {
        size_t len =
@@ -2490,7 +2429,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
        struct sk_buff *skb;
        int err = -ENOBUFS;
 
-       skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+       skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+                                      mrt->maxvif),
                        GFP_ATOMIC);
        if (!skb)
                goto errout;
@@ -2634,62 +2574,8 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       struct net *net = sock_net(skb->sk);
-       struct mr_table *mrt;
-       struct mfc_cache *mfc;
-       unsigned int t = 0, s_t;
-       unsigned int e = 0, s_e;
-
-       s_t = cb->args[0];
-       s_e = cb->args[1];
-
-       rcu_read_lock();
-       ipmr_for_each_table(mrt, net) {
-               if (t < s_t)
-                       goto next_table;
-               list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
-                       if (e < s_e)
-                               goto next_entry;
-                       if (ipmr_fill_mroute(mrt, skb,
-                                            NETLINK_CB(cb->skb).portid,
-                                            cb->nlh->nlmsg_seq,
-                                            mfc, RTM_NEWROUTE,
-                                            NLM_F_MULTI) < 0)
-                               goto done;
-next_entry:
-                       e++;
-               }
-               e = 0;
-               s_e = 0;
-
-               spin_lock_bh(&mfc_unres_lock);
-               list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
-                       if (e < s_e)
-                               goto next_entry2;
-                       if (ipmr_fill_mroute(mrt, skb,
-                                            NETLINK_CB(cb->skb).portid,
-                                            cb->nlh->nlmsg_seq,
-                                            mfc, RTM_NEWROUTE,
-                                            NLM_F_MULTI) < 0) {
-                               spin_unlock_bh(&mfc_unres_lock);
-                               goto done;
-                       }
-next_entry2:
-                       e++;
-               }
-               spin_unlock_bh(&mfc_unres_lock);
-               e = 0;
-               s_e = 0;
-next_table:
-               t++;
-       }
-done:
-       rcu_read_unlock();
-
-       cb->args[1] = e;
-       cb->args[0] = t;
-
-       return skb->len;
+       return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+                               _ipmr_fill_mroute, &mfc_unres_lock);
 }
 
 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2946,31 +2832,11 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
 /* The /proc interfaces to multicast routing :
  * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
  */
-struct ipmr_vif_iter {
-       struct seq_net_private p;
-       struct mr_table *mrt;
-       int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
-                                          struct ipmr_vif_iter *iter,
-                                          loff_t pos)
-{
-       struct mr_table *mrt = iter->mrt;
-
-       for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-               if (!VIF_EXISTS(mrt, iter->ct))
-                       continue;
-               if (pos-- == 0)
-                       return &mrt->vif_table[iter->ct];
-       }
-       return NULL;
-}
 
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(mrt_lock)
 {
-       struct ipmr_vif_iter *iter = seq->private;
+       struct mr_vif_iter *iter = seq->private;
        struct net *net = seq_file_net(seq);
        struct mr_table *mrt;
 
@@ -2981,26 +2847,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        iter->mrt = mrt;
 
        read_lock(&mrt_lock);
-       return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct ipmr_vif_iter *iter = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr_table *mrt = iter->mrt;
-
-       ++*pos;
-       if (v == SEQ_START_TOKEN)
-               return ipmr_vif_seq_idx(net, iter, 0);
-
-       while (++iter->ct < mrt->maxvif) {
-               if (!VIF_EXISTS(mrt, iter->ct))
-                       continue;
-               return &mrt->vif_table[iter->ct];
-       }
-       return NULL;
+       return mr_vif_seq_start(seq, pos);
 }
 
 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3011,7 +2858,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
-       struct ipmr_vif_iter *iter = seq->private;
+       struct mr_vif_iter *iter = seq->private;
        struct mr_table *mrt = iter->mrt;
 
        if (v == SEQ_START_TOKEN) {
@@ -3019,7 +2866,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
        } else {
                const struct vif_device *vif = v;
-               const char *name =  vif->dev ? vif->dev->name : "none";
+               const char *name =  vif->dev ?
+                                   vif->dev->name : "none";
 
                seq_printf(seq,
                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
@@ -3033,7 +2881,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_vif_seq_ops = {
        .start = ipmr_vif_seq_start,
-       .next  = ipmr_vif_seq_next,
+       .next  = mr_vif_seq_next,
        .stop  = ipmr_vif_seq_stop,
        .show  = ipmr_vif_seq_show,
 };
@@ -3041,7 +2889,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
 static int ipmr_vif_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ipmr_vif_seq_ops,
-                           sizeof(struct ipmr_vif_iter));
+                           sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ipmr_vif_fops = {
@@ -3051,40 +2899,8 @@ static const struct file_operations ipmr_vif_fops = {
        .release = seq_release_net,
 };
 
-struct ipmr_mfc_iter {
-       struct seq_net_private p;
-       struct mr_table *mrt;
-       struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
-                                         struct ipmr_mfc_iter *it, loff_t pos)
-{
-       struct mr_table *mrt = it->mrt;
-       struct mfc_cache *mfc;
-
-       rcu_read_lock();
-       it->cache = &mrt->mfc_cache_list;
-       list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-               if (pos-- == 0)
-                       return mfc;
-       rcu_read_unlock();
-
-       spin_lock_bh(&mfc_unres_lock);
-       it->cache = &mrt->mfc_unres_queue;
-       list_for_each_entry(mfc, it->cache, list)
-               if (pos-- == 0)
-                       return mfc;
-       spin_unlock_bh(&mfc_unres_lock);
-
-       it->cache = NULL;
-       return NULL;
-}
-
-
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       struct ipmr_mfc_iter *it = seq->private;
        struct net *net = seq_file_net(seq);
        struct mr_table *mrt;
 
@@ -3092,54 +2908,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
        if (!mrt)
                return ERR_PTR(-ENOENT);
 
-       it->mrt = mrt;
-       it->cache = NULL;
-       return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct ipmr_mfc_iter *it = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr_table *mrt = it->mrt;
-       struct mfc_cache *mfc = v;
-
-       ++*pos;
-
-       if (v == SEQ_START_TOKEN)
-               return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-       if (mfc->list.next != it->cache)
-               return list_entry(mfc->list.next, struct mfc_cache, list);
-
-       if (it->cache == &mrt->mfc_unres_queue)
-               goto end_of_list;
-
-       /* exhausted cache_array, show unresolved */
-       rcu_read_unlock();
-       it->cache = &mrt->mfc_unres_queue;
-
-       spin_lock_bh(&mfc_unres_lock);
-       if (!list_empty(it->cache))
-               return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
-       spin_unlock_bh(&mfc_unres_lock);
-       it->cache = NULL;
-
-       return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-       struct ipmr_mfc_iter *it = seq->private;
-       struct mr_table *mrt = it->mrt;
-
-       if (it->cache == &mrt->mfc_unres_queue)
-               spin_unlock_bh(&mfc_unres_lock);
-       else if (it->cache == &mrt->mfc_cache_list)
-               rcu_read_unlock();
+       return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3151,26 +2920,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
        } else {
                const struct mfc_cache *mfc = v;
-               const struct ipmr_mfc_iter *it = seq->private;
+               const struct mr_mfc_iter *it = seq->private;
                const struct mr_table *mrt = it->mrt;
 
                seq_printf(seq, "%08X %08X %-3hd",
                           (__force u32) mfc->mfc_mcastgrp,
                           (__force u32) mfc->mfc_origin,
-                          mfc->mfc_parent);
+                          mfc->_c.mfc_parent);
 
                if (it->cache != &mrt->mfc_unres_queue) {
                        seq_printf(seq, " %8lu %8lu %8lu",
-                                  mfc->mfc_un.res.pkt,
-                                  mfc->mfc_un.res.bytes,
-                                  mfc->mfc_un.res.wrong_if);
-                       for (n = mfc->mfc_un.res.minvif;
-                            n < mfc->mfc_un.res.maxvif; n++) {
+                                  mfc->_c.mfc_un.res.pkt,
+                                  mfc->_c.mfc_un.res.bytes,
+                                  mfc->_c.mfc_un.res.wrong_if);
+                       for (n = mfc->_c.mfc_un.res.minvif;
+                            n < mfc->_c.mfc_un.res.maxvif; n++) {
                                if (VIF_EXISTS(mrt, n) &&
-                                   mfc->mfc_un.res.ttls[n] < 255)
+                                   mfc->_c.mfc_un.res.ttls[n] < 255)
                                        seq_printf(seq,
                                           " %2d:%-3d",
-                                          n, mfc->mfc_un.res.ttls[n]);
+                                          n, mfc->_c.mfc_un.res.ttls[n]);
                        }
                } else {
                        /* unresolved mfc_caches don't contain
@@ -3185,15 +2954,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
        .start = ipmr_mfc_seq_start,
-       .next  = ipmr_mfc_seq_next,
-       .stop  = ipmr_mfc_seq_stop,
+       .next  = mr_mfc_seq_next,
+       .stop  = mr_mfc_seq_stop,
        .show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-                           sizeof(struct ipmr_mfc_iter));
+                           sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ipmr_mfc_fops = {
@@ -3229,7 +2998,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
 
        ipmr_for_each_table(mrt, net) {
                struct vif_device *v = &mrt->vif_table[0];
-               struct mfc_cache *mfc;
+               struct mr_mfc *mfc;
                int vifi;
 
                /* Notifiy on table VIF entries */
@@ -3246,7 +3015,8 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
                /* Notify on table MFC entries */
                list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
                        call_ipmr_mfc_entry_notifier(nb, net,
-                                                    FIB_EVENT_ENTRY_ADD, mfc,
+                                                    FIB_EVENT_ENTRY_ADD,
+                                                    (struct mfc_cache *)mfc,
                                                     mrt->id);
        }
 
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644 (file)
index 0000000..8ba55bf
--- /dev/null
@@ -0,0 +1,323 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+                    struct net_device *dev,
+                    unsigned long rate_limit,
+                    unsigned char threshold,
+                    unsigned short flags,
+                    unsigned short get_iflink_mask)
+{
+       v->dev = NULL;
+       v->bytes_in = 0;
+       v->bytes_out = 0;
+       v->pkt_in = 0;
+       v->pkt_out = 0;
+       v->rate_limit = rate_limit;
+       v->flags = flags;
+       v->threshold = threshold;
+       if (v->flags & get_iflink_mask)
+               v->link = dev_get_iflink(dev);
+       else
+               v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+              struct mr_table_ops *ops,
+              void (*expire_func)(struct timer_list *t),
+              void (*table_set)(struct mr_table *mrt,
+                                struct net *net))
+{
+       struct mr_table *mrt;
+
+       mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+       if (!mrt)
+               return NULL;
+       mrt->id = id;
+       write_pnet(&mrt->net, net);
+
+       mrt->ops = *ops;
+       rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+       INIT_LIST_HEAD(&mrt->mfc_cache_list);
+       INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+       timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+       mrt->mroute_reg_vif_num = -1;
+       table_set(mrt, net);
+       return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+       struct rhlist_head *tmp, *list;
+       struct mr_mfc *c;
+
+       list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+       rhl_for_each_entry_rcu(c, tmp, list, mnode)
+               if (parent == -1 || parent == c->mfc_parent)
+                       return c;
+
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+       struct rhlist_head *tmp, *list;
+       struct mr_mfc *c;
+
+       list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+                              *mrt->ops.rht_params);
+       rhl_for_each_entry_rcu(c, tmp, list, mnode)
+               if (c->mfc_un.res.ttls[vifi] < 255)
+                       return c;
+
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+       struct rhlist_head *tmp, *list;
+       struct mr_mfc *c, *proxy;
+
+       list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+       rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+               if (c->mfc_un.res.ttls[vifi] < 255)
+                       return c;
+
+               /* It's ok if the vifi is part of the static tree */
+               proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+               if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+                       return c;
+       }
+
+       return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+       struct mr_table *mrt = iter->mrt;
+
+       for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+               if (!VIF_EXISTS(mrt, iter->ct))
+                       continue;
+               if (pos-- == 0)
+                       return &mrt->vif_table[iter->ct];
+       }
+       return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct mr_vif_iter *iter = seq->private;
+       struct net *net = seq_file_net(seq);
+       struct mr_table *mrt = iter->mrt;
+
+       ++*pos;
+       if (v == SEQ_START_TOKEN)
+               return mr_vif_seq_idx(net, iter, 0);
+
+       while (++iter->ct < mrt->maxvif) {
+               if (!VIF_EXISTS(mrt, iter->ct))
+                       continue;
+               return &mrt->vif_table[iter->ct];
+       }
+       return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+                    struct mr_mfc_iter *it, loff_t pos)
+{
+       struct mr_table *mrt = it->mrt;
+       struct mr_mfc *mfc;
+
+       rcu_read_lock();
+       it->cache = &mrt->mfc_cache_list;
+       list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+               if (pos-- == 0)
+                       return mfc;
+       rcu_read_unlock();
+
+       spin_lock_bh(it->lock);
+       it->cache = &mrt->mfc_unres_queue;
+       list_for_each_entry(mfc, it->cache, list)
+               if (pos-- == 0)
+                       return mfc;
+       spin_unlock_bh(it->lock);
+
+       it->cache = NULL;
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+                     loff_t *pos)
+{
+       struct mr_mfc_iter *it = seq->private;
+       struct net *net = seq_file_net(seq);
+       struct mr_table *mrt = it->mrt;
+       struct mr_mfc *c = v;
+
+       ++*pos;
+
+       if (v == SEQ_START_TOKEN)
+               return mr_mfc_seq_idx(net, seq->private, 0);
+
+       if (c->list.next != it->cache)
+               return list_entry(c->list.next, struct mr_mfc, list);
+
+       if (it->cache == &mrt->mfc_unres_queue)
+               goto end_of_list;
+
+       /* exhausted cache_array, show unresolved */
+       rcu_read_unlock();
+       it->cache = &mrt->mfc_unres_queue;
+
+       spin_lock_bh(it->lock);
+       if (!list_empty(it->cache))
+               return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+       spin_unlock_bh(it->lock);
+       it->cache = NULL;
+
+       return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                  struct mr_mfc *c, struct rtmsg *rtm)
+{
+       struct rta_mfc_stats mfcs;
+       struct nlattr *mp_attr;
+       struct rtnexthop *nhp;
+       unsigned long lastuse;
+       int ct;
+
+       /* If cache is unresolved, don't try to parse IIF and OIF */
+       if (c->mfc_parent >= MAXVIFS) {
+               rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+               return -ENOENT;
+       }
+
+       if (VIF_EXISTS(mrt, c->mfc_parent) &&
+           nla_put_u32(skb, RTA_IIF,
+                       mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+               return -EMSGSIZE;
+
+       if (c->mfc_flags & MFC_OFFLOAD)
+               rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+       mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+       if (!mp_attr)
+               return -EMSGSIZE;
+
+       for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+               if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+                       struct vif_device *vif;
+
+                       nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+                       if (!nhp) {
+                               nla_nest_cancel(skb, mp_attr);
+                               return -EMSGSIZE;
+                       }
+
+                       nhp->rtnh_flags = 0;
+                       nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+                       vif = &mrt->vif_table[ct];
+                       nhp->rtnh_ifindex = vif->dev->ifindex;
+                       nhp->rtnh_len = sizeof(*nhp);
+               }
+       }
+
+       nla_nest_end(skb, mp_attr);
+
+       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+       mfcs.mfcs_packets = c->mfc_un.res.pkt;
+       mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+       mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+       if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+                             RTA_PAD))
+               return -EMSGSIZE;
+
+       rtm->rtm_type = RTN_MULTICAST;
+       return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+                    struct mr_table *(*iter)(struct net *net,
+                                             struct mr_table *mrt),
+                    int (*fill)(struct mr_table *mrt,
+                                struct sk_buff *skb,
+                                u32 portid, u32 seq, struct mr_mfc *c,
+                                int cmd, int flags),
+                    spinlock_t *lock)
+{
+       unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+       struct net *net = sock_net(skb->sk);
+       struct mr_table *mrt;
+       struct mr_mfc *mfc;
+
+       rcu_read_lock();
+       for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+               if (t < s_t)
+                       goto next_table;
+               list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+                       if (e < s_e)
+                               goto next_entry;
+                       if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+                                cb->nlh->nlmsg_seq, mfc,
+                                RTM_NEWROUTE, NLM_F_MULTI) < 0)
+                               goto done;
+next_entry:
+                       e++;
+               }
+               e = 0;
+               s_e = 0;
+
+               spin_lock_bh(lock);
+               list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+                       if (e < s_e)
+                               goto next_entry2;
+                       if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+                                cb->nlh->nlmsg_seq, mfc,
+                                RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+                               spin_unlock_bh(lock);
+                               goto done;
+                       }
+next_entry2:
+                       e++;
+               }
+               spin_unlock_bh(lock);
+               e = 0;
+               s_e = 0;
+next_table:
+               t++;
+       }
+done:
+       rcu_read_unlock();
+
+       cb->args[1] = e;
+       cb->args[0] = t;
+
+       return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
index e3e420f3ba7b2de96be867912695efb3ae2b193c..c36ffce3c81249cceb3ecdf8a2ad8ebd1161e41e 100644 (file)
@@ -1635,6 +1635,7 @@ static void __net_exit arp_tables_net_exit(struct net *net)
 static struct pernet_operations arp_tables_net_ops = {
        .init = arp_tables_net_init,
        .exit = arp_tables_net_exit,
+       .async = true,
 };
 
 static int __init arp_tables_init(void)
index 8f8713b4388fbfa9a0d36298603995b02718d21d..49c2490193aeeed2c6739ad42d2a38ef27945581 100644 (file)
@@ -65,6 +65,7 @@ static void __net_exit arptable_filter_net_exit(struct net *net)
 
 static struct pernet_operations arptable_filter_net_ops = {
        .exit = arptable_filter_net_exit,
+       .async = true,
 };
 
 static int __init arptable_filter_init(void)
index 4b02ab39ebc54b73f6ec532fa6329fa9df47bb77..0fc88fa7a4dc046dda2e9c003062a3a10b986b3e 100644 (file)
@@ -232,7 +232,6 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
        c->hash_mode = i->hash_mode;
        c->hash_initval = i->hash_initval;
        refcount_set(&c->refcount, 1);
-       refcount_set(&c->entries, 1);
 
        spin_lock_bh(&cn->lock);
        if (__clusterip_config_find(net, ip)) {
@@ -263,8 +262,10 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
 
        c->notifier.notifier_call = clusterip_netdev_event;
        err = register_netdevice_notifier(&c->notifier);
-       if (!err)
+       if (!err) {
+               refcount_set(&c->entries, 1);
                return c;
+       }
 
 #ifdef CONFIG_PROC_FS
        proc_remove(c->pde);
@@ -273,7 +274,7 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
        spin_lock_bh(&cn->lock);
        list_del_rcu(&c->list);
        spin_unlock_bh(&cn->lock);
-       kfree(c);
+       clusterip_config_put(c);
 
        return ERR_PTR(err);
 }
@@ -496,12 +497,15 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
                                return PTR_ERR(config);
                }
        }
-       cipinfo->config = config;
 
        ret = nf_ct_netns_get(par->net, par->family);
-       if (ret < 0)
+       if (ret < 0) {
                pr_info("cannot load conntrack support for proto=%u\n",
                        par->family);
+               clusterip_config_entry_put(par->net, config);
+               clusterip_config_put(config);
+               return ret;
+       }
 
        if (!par->net->xt.clusterip_deprecated_warning) {
                pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
@@ -509,6 +513,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
                par->net->xt.clusterip_deprecated_warning = true;
        }
 
+       cipinfo->config = config;
        return ret;
 }
 
@@ -840,6 +845,7 @@ static struct pernet_operations clusterip_net_ops = {
        .exit = clusterip_net_exit,
        .id   = &clusterip_net_id,
        .size = sizeof(struct clusterip_net),
+       .async = true,
 };
 
 static int __init clusterip_tg_init(void)
index dea138ca892543cbc560f56cb6aedc7aa93336f2..f6074059531ae031c71bc931a16a675c34574be8 100644 (file)
@@ -113,6 +113,7 @@ static void __net_exit iptable_mangle_net_exit(struct net *net)
 
 static struct pernet_operations iptable_mangle_net_ops = {
        .exit = iptable_mangle_net_exit,
+       .async = true,
 };
 
 static int __init iptable_mangle_init(void)
index 0f7255cc65ee14bc0f028e3d2bc793c33bcc57f8..b771af74be792b098999e1fbcc3ff55fd6765c01 100644 (file)
@@ -129,6 +129,7 @@ static void __net_exit iptable_nat_net_exit(struct net *net)
 
 static struct pernet_operations iptable_nat_net_ops = {
        .exit   = iptable_nat_net_exit,
+       .async  = true,
 };
 
 static int __init iptable_nat_init(void)
index 960625aabf0454b3814f5094faca1468e0d37059..963753e508422e75ce8ee4e9cdd0be6a17879a61 100644 (file)
@@ -76,6 +76,7 @@ static void __net_exit iptable_raw_net_exit(struct net *net)
 
 static struct pernet_operations iptable_raw_net_ops = {
        .exit = iptable_raw_net_exit,
+       .async = true,
 };
 
 static int __init iptable_raw_init(void)
index e5379fe57b64184c9c5f3340c268ff968ed1131e..c40d6b3d8b6a06b3afa80bf83654dadfa52cae01 100644 (file)
@@ -76,6 +76,7 @@ static void __net_exit iptable_security_net_exit(struct net *net)
 
 static struct pernet_operations iptable_security_net_ops = {
        .exit = iptable_security_net_exit,
+       .async = true,
 };
 
 static int __init iptable_security_init(void)
index b50721d9d30ef6f98c419c5a745a6df5f1f6a62d..6531f69db01012900c515fd806f31a910bc8f5a4 100644 (file)
@@ -399,6 +399,7 @@ static struct pernet_operations ipv4_net_ops = {
        .exit = ipv4_net_exit,
        .id = &conntrack4_net_id,
        .size = sizeof(struct conntrack4_net),
+       .async = true,
 };
 
 static int __init nf_conntrack_l3proto_ipv4_init(void)
index a0d3ad60a41132822eb54845c9fbfe8098e161fc..57244b62a4fc239937aa5779ee964e8948dc1728 100644 (file)
@@ -118,6 +118,7 @@ static void __net_exit defrag4_net_exit(struct net *net)
 
 static struct pernet_operations defrag4_net_ops = {
        .exit = defrag4_net_exit,
+       .async = true,
 };
 
 static int __init nf_defrag_init(void)
index 25d2975da156fb015848a8fe7e0f6f8cc9d4842b..0cd46bffa46914efab9f26b7d85d7612f1b41450 100644 (file)
@@ -111,6 +111,7 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
        default:
                return -1;
        }
+       csum_replace4(&iph->check, addr, new_addr);
 
        return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 }
@@ -185,7 +186,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
        if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
                return false;
 
-       if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+       if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
                return false;
 
        return true;
index df5c2a2061a4b1e76c2d26f4b9b44164883a4dcb..162293469ac22c5ecebc64a0817bd2b4a47c8ad4 100644 (file)
@@ -122,6 +122,7 @@ static void __net_exit nf_log_arp_net_exit(struct net *net)
 static struct pernet_operations nf_log_arp_net_ops = {
        .init = nf_log_arp_net_init,
        .exit = nf_log_arp_net_exit,
+       .async = true,
 };
 
 static int __init nf_log_arp_init(void)
index 4388de0e5380c6423fbdfe7438727900fc297d7c..7a06de140f3ca29bec9f3c5b0a5ae1251d686d6e 100644 (file)
@@ -358,6 +358,7 @@ static void __net_exit nf_log_ipv4_net_exit(struct net *net)
 static struct pernet_operations nf_log_ipv4_net_ops = {
        .init = nf_log_ipv4_net_init,
        .exit = nf_log_ipv4_net_exit,
+       .async = true,
 };
 
 static int __init nf_log_ipv4_init(void)
index fdabc70283b646fe496a87228f380493eb8fd8ee..d97e83b2dd3336e14c6121d5d9d2dd7553f03a7b 100644 (file)
@@ -556,4 +556,3 @@ int __init ip_misc_proc_init(void)
 {
        return register_pernet_subsys(&ip_proc_ops);
 }
-
index 54648d20bf0f87e1b9a2cceb7bb18a31c8df319a..720bef7da2f6d13e9b09269c38d14a7945ae4c5e 100644 (file)
@@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout)
        /*
         * Raw sockets may have direct kernel references. Kill them.
         */
-       rtnl_lock();
        ip_ra_control(sk, 0, NULL);
-       rtnl_unlock();
 
        sk_common_release(sk);
 }
index 26eefa2eaa448a00e03918f6d25f365110071bdf..4ac5728689f57245f5f74a838be303cf2177ae00 100644 (file)
@@ -128,10 +128,11 @@ static int ip_rt_redirect_silence __read_mostly   = ((HZ / 50) << (9 + 1));
 static int ip_rt_error_cost __read_mostly      = HZ;
 static int ip_rt_error_burst __read_mostly     = 5 * HZ;
 static int ip_rt_mtu_expires __read_mostly     = 10 * 60 * HZ;
-static int ip_rt_min_pmtu __read_mostly                = 512 + 20 + 20;
+static u32 ip_rt_min_pmtu __read_mostly                = 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly      = 256;
 
 static int ip_rt_gc_timeout __read_mostly      = RT_GC_TIMEOUT;
+
 /*
  *     Interface to generic destination cache.
  */
@@ -634,6 +635,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
 static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
 {
        rt->rt_pmtu = fnhe->fnhe_pmtu;
+       rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
        rt->dst.expires = fnhe->fnhe_expires;
 
        if (fnhe->fnhe_gw) {
@@ -644,7 +646,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 }
 
 static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
-                                 u32 pmtu, unsigned long expires)
+                                 u32 pmtu, bool lock, unsigned long expires)
 {
        struct fnhe_hash_bucket *hash;
        struct fib_nh_exception *fnhe;
@@ -681,8 +683,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
                        fnhe->fnhe_genid = genid;
                if (gw)
                        fnhe->fnhe_gw = gw;
-               if (pmtu)
+               if (pmtu) {
                        fnhe->fnhe_pmtu = pmtu;
+                       fnhe->fnhe_mtu_locked = lock;
+               }
                fnhe->fnhe_expires = max(1UL, expires);
                /* Update all cached dsts too */
                rt = rcu_dereference(fnhe->fnhe_rth_input);
@@ -706,6 +710,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
                fnhe->fnhe_daddr = daddr;
                fnhe->fnhe_gw = gw;
                fnhe->fnhe_pmtu = pmtu;
+               fnhe->fnhe_mtu_locked = lock;
                fnhe->fnhe_expires = expires;
 
                /* Exception created; mark the cached routes for the nexthop
@@ -787,7 +792,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
                                struct fib_nh *nh = &FIB_RES_NH(res);
 
                                update_or_create_fnhe(nh, fl4->daddr, new_gw,
-                                               0, jiffies + ip_rt_gc_timeout);
+                                               0, false,
+                                               jiffies + ip_rt_gc_timeout);
                        }
                        if (kill_route)
                                rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -931,14 +937,23 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 
 static int ip_error(struct sk_buff *skb)
 {
-       struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
        struct rtable *rt = skb_rtable(skb);
+       struct net_device *dev = skb->dev;
+       struct in_device *in_dev;
        struct inet_peer *peer;
        unsigned long now;
        struct net *net;
        bool send;
        int code;
 
+       if (netif_is_l3_master(skb->dev)) {
+               dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+               if (!dev)
+                       goto out;
+       }
+
+       in_dev = __in_dev_get_rcu(dev);
+
        /* IP on this device is disabled. */
        if (!in_dev)
                goto out;
@@ -1000,15 +1015,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 {
        struct dst_entry *dst = &rt->dst;
        struct fib_result res;
+       bool lock = false;
 
-       if (dst_metric_locked(dst, RTAX_MTU))
+       if (ip_mtu_locked(dst))
                return;
 
        if (ipv4_mtu(dst) < mtu)
                return;
 
-       if (mtu < ip_rt_min_pmtu)
+       if (mtu < ip_rt_min_pmtu) {
+               lock = true;
                mtu = ip_rt_min_pmtu;
+       }
 
        if (rt->rt_pmtu == mtu &&
            time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
@@ -1018,7 +1036,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
        if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
                struct fib_nh *nh = &FIB_RES_NH(res);
 
-               update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
+               update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
                                      jiffies + ip_rt_mtu_expires);
        }
        rcu_read_unlock();
@@ -1271,7 +1289,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 
        mtu = READ_ONCE(dst->dev->mtu);
 
-       if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+       if (unlikely(ip_mtu_locked(dst))) {
                if (rt->rt_uses_gateway && mtu > 576)
                        mtu = 576;
        }
@@ -1384,7 +1402,7 @@ struct uncached_list {
 
 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
 
-static void rt_add_uncached_list(struct rtable *rt)
+void rt_add_uncached_list(struct rtable *rt)
 {
        struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
 
@@ -1395,14 +1413,8 @@ static void rt_add_uncached_list(struct rtable *rt)
        spin_unlock_bh(&ul->lock);
 }
 
-static void ipv4_dst_destroy(struct dst_entry *dst)
+void rt_del_uncached_list(struct rtable *rt)
 {
-       struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
-       struct rtable *rt = (struct rtable *) dst;
-
-       if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
-               kfree(p);
-
        if (!list_empty(&rt->rt_uncached)) {
                struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1412,6 +1424,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
        }
 }
 
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+       struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
+       struct rtable *rt = (struct rtable *)dst;
+
+       if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
+               kfree(p);
+
+       rt_del_uncached_list(rt);
+}
+
 void rt_flush_dev(struct net_device *dev)
 {
        struct net *net = dev_net(dev);
@@ -1507,6 +1530,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
                rt->rt_is_input = 0;
                rt->rt_iif = 0;
                rt->rt_pmtu = 0;
+               rt->rt_mtu_locked = 0;
                rt->rt_gateway = 0;
                rt->rt_uses_gateway = 0;
                INIT_LIST_HEAD(&rt->rt_uncached);
@@ -1748,44 +1772,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
                                 struct flow_keys *hash_keys)
 {
        const struct iphdr *outer_iph = ip_hdr(skb);
+       const struct iphdr *key_iph = outer_iph;
        const struct iphdr *inner_iph;
        const struct icmphdr *icmph;
        struct iphdr _inner_iph;
        struct icmphdr _icmph;
 
-       hash_keys->addrs.v4addrs.src = outer_iph->saddr;
-       hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
        if (likely(outer_iph->protocol != IPPROTO_ICMP))
-               return;
+               goto out;
 
        if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
-               return;
+               goto out;
 
        icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
                                   &_icmph);
        if (!icmph)
-               return;
+               goto out;
 
        if (icmph->type != ICMP_DEST_UNREACH &&
            icmph->type != ICMP_REDIRECT &&
            icmph->type != ICMP_TIME_EXCEEDED &&
            icmph->type != ICMP_PARAMETERPROB)
-               return;
+               goto out;
 
        inner_iph = skb_header_pointer(skb,
                                       outer_iph->ihl * 4 + sizeof(_icmph),
                                       sizeof(_inner_iph), &_inner_iph);
        if (!inner_iph)
-               return;
-       hash_keys->addrs.v4addrs.src = inner_iph->saddr;
-       hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+               goto out;
+
+       key_iph = inner_iph;
+out:
+       hash_keys->addrs.v4addrs.src = key_iph->saddr;
+       hash_keys->addrs.v4addrs.dst = key_iph->daddr;
 }
 
 /* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-                      const struct sk_buff *skb)
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+                      const struct sk_buff *skb, struct flow_keys *flkeys)
 {
-       struct net *net = fi->fib_net;
        struct flow_keys hash_keys;
        u32 mhash;
 
@@ -1809,15 +1834,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
                        /* short-circuit if we already have L4 hash present */
                        if (skb->l4_hash)
                                return skb_get_hash_raw(skb) >> 1;
+
                        memset(&hash_keys, 0, sizeof(hash_keys));
-                       skb_flow_dissect_flow_keys(skb, &keys, flag);
+
+                       if (!flkeys) {
+                               skb_flow_dissect_flow_keys(skb, &keys, flag);
+                               flkeys = &keys;
+                       }
 
                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-                       hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
-                       hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
-                       hash_keys.ports.src = keys.ports.src;
-                       hash_keys.ports.dst = keys.ports.dst;
-                       hash_keys.basic.ip_proto = keys.basic.ip_proto;
+                       hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+                       hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+                       hash_keys.ports.src = flkeys->ports.src;
+                       hash_keys.ports.dst = flkeys->ports.dst;
+                       hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
                } else {
                        memset(&hash_keys, 0, sizeof(hash_keys));
                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1838,11 +1868,12 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 static int ip_mkroute_input(struct sk_buff *skb,
                            struct fib_result *res,
                            struct in_device *in_dev,
-                           __be32 daddr, __be32 saddr, u32 tos)
+                           __be32 daddr, __be32 saddr, u32 tos,
+                           struct flow_keys *hkeys)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi && res->fi->fib_nhs > 1) {
-               int h = fib_multipath_hash(res->fi, NULL, skb);
+               int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
 
                fib_select_multipath(res, h);
        }
@@ -1868,13 +1899,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                               struct fib_result *res)
 {
        struct in_device *in_dev = __in_dev_get_rcu(dev);
+       struct flow_keys *flkeys = NULL, _flkeys;
+       struct net    *net = dev_net(dev);
        struct ip_tunnel_info *tun_info;
-       struct flowi4   fl4;
+       int             err = -EINVAL;
        unsigned int    flags = 0;
        u32             itag = 0;
        struct rtable   *rth;
-       int             err = -EINVAL;
-       struct net    *net = dev_net(dev);
+       struct flowi4   fl4;
        bool do_cache;
 
        /* IP on this device is disabled. */
@@ -1933,6 +1965,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        fl4.daddr = daddr;
        fl4.saddr = saddr;
        fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+       if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+               flkeys = &_flkeys;
+
        err = fib_lookup(net, &fl4, res, 0);
        if (err != 0) {
                if (!IN_DEV_FORWARD(in_dev))
@@ -1958,7 +1994,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        if (res->type != RTN_UNICAST)
                goto martian_destination;
 
-       err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+       err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:   return err;
 
 brd_input:
@@ -2511,6 +2547,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
                rt->rt_is_input = ort->rt_is_input;
                rt->rt_iif = ort->rt_iif;
                rt->rt_pmtu = ort->rt_pmtu;
+               rt->rt_mtu_locked = ort->rt_mtu_locked;
 
                rt->rt_genid = rt_genid_ipv4(net);
                rt->rt_flags = ort->rt_flags;
@@ -2613,6 +2650,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
        memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
        if (rt->rt_pmtu && expires)
                metrics[RTAX_MTU - 1] = rt->rt_pmtu;
+       if (rt->rt_mtu_locked && expires)
+               metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
        if (rtnetlink_put_metrics(skb, metrics) < 0)
                goto nla_put_failure;
 
@@ -2798,6 +2837,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
 static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
 static int ip_rt_gc_elasticity __read_mostly   = 8;
+static int ip_min_valid_pmtu __read_mostly     = IPV4_MIN_MTU;
 
 static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
                                        void __user *buffer,
@@ -2913,7 +2953,8 @@ static struct ctl_table ipv4_route_table[] = {
                .data           = &ip_rt_min_pmtu,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &ip_min_valid_pmtu,
        },
        {
                .procname       = "min_adv_mss",
index 89683d868b37132297fe8ad9d52180b0c9a0fbfe..5b72d97693f82ef56bed43b14865d649a6e59d38 100644 (file)
@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
 
        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (write && ret == 0)
-               call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+               call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
 
        return ret;
 }
@@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_doulongvec_minmax,
        },
-       {
-               .procname       = "udp_rmem_min",
-               .data           = &sysctl_udp_rmem_min,
-               .maxlen         = sizeof(sysctl_udp_rmem_min),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &one
-       },
-       {
-               .procname       = "udp_wmem_min",
-               .data           = &sysctl_udp_wmem_min,
-               .maxlen         = sizeof(sysctl_udp_wmem_min),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &one
-       },
        { }
 };
 
@@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = {
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &one,
        },
+       {
+               .procname       = "udp_rmem_min",
+               .data           = &init_net.ipv4.sysctl_udp_rmem_min,
+               .maxlen         = sizeof(init_net.ipv4.sysctl_udp_rmem_min),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one
+       },
+       {
+               .procname       = "udp_wmem_min",
+               .data           = &init_net.ipv4.sysctl_udp_wmem_min,
+               .maxlen         = sizeof(init_net.ipv4.sysctl_udp_wmem_min),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one
+       },
        { }
 };
 
index a33539798bf61b99760b8f5923e3df14cd7400a7..0c31be306572acdecaf45cdb0357bb0f7f9eca8b 100644 (file)
@@ -994,7 +994,9 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
                        get_page(page);
                        skb_fill_page_desc(skb, i, page, offset, copy);
                }
-               skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+
+               if (!(flags & MSG_NO_SHARED_FRAGS))
+                       skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 
                skb->len += copy;
                skb->data_len += copy;
@@ -3031,8 +3033,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
        u32 rate;
 
        stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
-                         3 * nla_total_size(sizeof(u32)) +
-                         2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
+                         5 * nla_total_size(sizeof(u32)) +
+                         3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
        if (!stats)
                return NULL;
 
@@ -3061,6 +3063,10 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 
        nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
        nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+       nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+
+       nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
+       nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
        return stats;
 }
 
@@ -3539,6 +3545,7 @@ int tcp_abort(struct sock *sk, int err)
 
        bh_unlock_sock(sk);
        local_bh_enable();
+       tcp_write_queue_purge(sk);
        release_sock(sk);
        return 0;
 }
index a471f696e13c82cddd11633fd4bfdbc6d84f4bcc..158d105e76da1b5fcf29db4a5b87b063645a5ac1 100644 (file)
@@ -97,10 +97,9 @@ struct bbr {
                packet_conservation:1,  /* use packet conservation? */
                restore_cwnd:1,      /* decided to revert cwnd to old value */
                round_start:1,       /* start of packet-timed tx->ack round? */
-               tso_segs_goal:7,     /* segments we want in each skb we send */
                idle_restart:1,      /* restarting after idle? */
                probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
-               unused:5,
+               unused:12,
                lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
                lt_rtt_cnt:7,        /* round trips in long-term interval */
                lt_use_bw:1;         /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
                sk->sk_pacing_rate = rate;
 }
 
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
 {
-       struct bbr *bbr = inet_csk_ca(sk);
-
-       return bbr->tso_segs_goal;
+       return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       struct bbr *bbr = inet_csk_ca(sk);
-       u32 min_segs;
+       u32 segs, bytes;
+
+       /* Sort of tcp_tso_autosize() but ignoring
+        * driver provided sk_gso_max_size.
+        */
+       bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+                     GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+       segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-       min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
-       bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
-                                0x7FU);
+       return min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
        cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
 
        /* Allow enough full-sized skbs in flight to utilize end systems. */
-       cwnd += 3 * bbr->tso_segs_goal;
+       cwnd += 3 * bbr_tso_segs_goal(sk);
 
        /* Reduce delayed ACKs by rounding up cwnd to the next even number. */
        cwnd = (cwnd + 1) & ~1U;
@@ -730,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
                bbr->mode = BBR_DRAIN;  /* drain queue we created */
                bbr->pacing_gain = bbr_drain_gain;      /* pace slow to drain */
                bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
+               tcp_sk(sk)->snd_ssthresh =
+                               bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
        }       /* fall through to check if in-flight is already small: */
        if (bbr->mode == BBR_DRAIN &&
            tcp_packets_in_flight(tcp_sk(sk)) <=
@@ -824,7 +827,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
 
        bw = bbr_bw(sk);
        bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-       bbr_set_tso_segs_goal(sk);
        bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
 }
 
@@ -834,7 +836,7 @@ static void bbr_init(struct sock *sk)
        struct bbr *bbr = inet_csk_ca(sk);
 
        bbr->prior_cwnd = 0;
-       bbr->tso_segs_goal = 0;  /* default segs per skb until first ACK */
+       tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
        bbr->rtt_cnt = 0;
        bbr->next_rtt_delivered = 0;
        bbr->prev_ca_state = TCP_CA_Open;
@@ -887,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
 static u32 bbr_ssthresh(struct sock *sk)
 {
        bbr_save_cwnd(sk);
-       return TCP_INFINITE_SSTHRESH;    /* BBR does not use ssthresh */
+       return tcp_sk(sk)->snd_ssthresh;
 }
 
 static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
@@ -936,7 +938,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
        .undo_cwnd      = bbr_undo_cwnd,
        .cwnd_event     = bbr_cwnd_event,
        .ssthresh       = bbr_ssthresh,
-       .tso_segs_goal  = bbr_tso_segs_goal,
+       .min_tso_segs   = bbr_min_tso_segs,
        .get_info       = bbr_get_info,
        .set_state      = bbr_set_state,
 };
index 7c843578f2333db58100cedbc2a9d0784f72d861..faddf4f9a707f1583fc71e0711e3db95b5d08255 100644 (file)
@@ -6,7 +6,7 @@
  * The algorithm is described in:
  * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
  *  for High-Speed Networks"
- * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf
+ * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf
  *
  * Implemented from description in paper and ns-2 simulation.
  * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
index 06b9c4765f422ff6b23b75f4345b0c77a0d967e2..451ef30126367d5934ebd85157def04156f7fb1e 100644 (file)
@@ -1968,11 +1968,6 @@ void tcp_enter_loss(struct sock *sk)
        /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
         * loss recovery is underway except recurring timeout(s) on
         * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
-        *
-        * In theory F-RTO can be used repeatedly during loss recovery.
-        * In practice this interacts badly with broken middle-boxes that
-        * falsely raise the receive window, which results in repeated
-        * timeouts and stop-and-go behavior.
         */
        tp->frto = net->ipv4.sysctl_tcp_frto &&
                   (new_recovery || icsk->icsk_retransmits) &&
@@ -2628,18 +2623,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
            tcp_try_undo_loss(sk, false))
                return;
 
-       /* The ACK (s)acks some never-retransmitted data meaning not all
-        * the data packets before the timeout were lost. Therefore we
-        * undo the congestion window and state. This is essentially
-        * the operation in F-RTO (RFC5682 section 3.1 step 3.b). Since
-        * a retransmitted skb is permantly marked, we can apply such an
-        * operation even if F-RTO was not used.
-        */
-       if ((flag & FLAG_ORIG_SACK_ACKED) &&
-           tcp_try_undo_loss(sk, tp->undo_marker))
-               return;
-
        if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
+               /* Step 3.b. A timeout is spurious if not all data are
+                * lost, i.e., never-retransmitted data are (s)acked.
+                */
+               if ((flag & FLAG_ORIG_SACK_ACKED) &&
+                   tcp_try_undo_loss(sk, true))
+                       return;
+
                if (after(tp->snd_nxt, tp->high_seq)) {
                        if (flag & FLAG_DATA_SACKED || is_dupack)
                                tp->frto = 0; /* Step 3.a. loss was real */
@@ -3998,6 +3989,7 @@ void tcp_reset(struct sock *sk)
        /* This barrier is coupled with smp_rmb() in tcp_poll() */
        smp_wmb();
 
+       tcp_write_queue_purge(sk);
        tcp_done(sk);
 
        if (!sock_flag(sk, SOCK_DEAD))
index 49d043de3476bdfcaf6e9a606d0da0f2094373a8..383cac0ff0ec059ca7dbc1a6304cc7f8183e008d 100644 (file)
@@ -1703,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
 /* Return how many segs we'd like on a TSO packet,
  * to send one TSO packet per ms
  */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-                    int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+                           int min_tso_segs)
 {
        u32 bytes, segs;
 
@@ -1720,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 
        return segs;
 }
-EXPORT_SYMBOL(tcp_tso_autosize);
 
 /* Return the number of segments we want in the skb we are transmitting.
  * See if congestion control module wants to decide; otherwise, autosize.
@@ -1728,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
        const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-       u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+       u32 min_tso, tso_segs;
 
-       if (!tso_segs)
-               tso_segs = tcp_tso_autosize(sk, mss_now,
-                               sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+       min_tso = ca_ops->min_tso_segs ?
+                       ca_ops->min_tso_segs(sk) :
+                       sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+       tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
        return min_t(u32, tso_segs, sk->sk_gso_max_segs);
 }
 
index 71fc60f1b326f25fe4dbd73312a5a91758464069..f7d944855f8ebd0a312fe73a53a56ab8d451ee44 100644 (file)
@@ -34,6 +34,7 @@ static void tcp_write_err(struct sock *sk)
        sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
        sk->sk_error_report(sk);
 
+       tcp_write_queue_purge(sk);
        tcp_done(sk);
        __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
 }
index ec35eaa5c029d3eeaa5a34435658a3d829692234..c0630013c1aed55b9a515079eaa613ed6957a667 100644 (file)
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
        for (handler = rcu_dereference(head);           \
             handler != NULL;                           \
             handler = rcu_dereference(handler->next))  \
-       
+
 static int tunnel4_rcv(struct sk_buff *skb)
 {
        struct xfrm_tunnel *handler;
index 3013404d093550069c42e214d6ee0a38ff6ec1d9..c6dc019bc64b56b00b64ffe9ae1d84ce8097e5b4 100644 (file)
@@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table);
 long sysctl_udp_mem[3] __read_mostly;
 EXPORT_SYMBOL(sysctl_udp_mem);
 
-int sysctl_udp_rmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_rmem_min);
-
-int sysctl_udp_wmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_wmem_min);
-
 atomic_long_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
@@ -2533,35 +2527,35 @@ int udp_abort(struct sock *sk, int err)
 EXPORT_SYMBOL_GPL(udp_abort);
 
 struct proto udp_prot = {
-       .name              = "UDP",
-       .owner             = THIS_MODULE,
-       .close             = udp_lib_close,
-       .connect           = ip4_datagram_connect,
-       .disconnect        = udp_disconnect,
-       .ioctl             = udp_ioctl,
-       .init              = udp_init_sock,
-       .destroy           = udp_destroy_sock,
-       .setsockopt        = udp_setsockopt,
-       .getsockopt        = udp_getsockopt,
-       .sendmsg           = udp_sendmsg,
-       .recvmsg           = udp_recvmsg,
-       .sendpage          = udp_sendpage,
-       .release_cb        = ip4_datagram_release_cb,
-       .hash              = udp_lib_hash,
-       .unhash            = udp_lib_unhash,
-       .rehash            = udp_v4_rehash,
-       .get_port          = udp_v4_get_port,
-       .memory_allocated  = &udp_memory_allocated,
-       .sysctl_mem        = sysctl_udp_mem,
-       .sysctl_wmem       = &sysctl_udp_wmem_min,
-       .sysctl_rmem       = &sysctl_udp_rmem_min,
-       .obj_size          = sizeof(struct udp_sock),
-       .h.udp_table       = &udp_table,
+       .name                   = "UDP",
+       .owner                  = THIS_MODULE,
+       .close                  = udp_lib_close,
+       .connect                = ip4_datagram_connect,
+       .disconnect             = udp_disconnect,
+       .ioctl                  = udp_ioctl,
+       .init                   = udp_init_sock,
+       .destroy                = udp_destroy_sock,
+       .setsockopt             = udp_setsockopt,
+       .getsockopt             = udp_getsockopt,
+       .sendmsg                = udp_sendmsg,
+       .recvmsg                = udp_recvmsg,
+       .sendpage               = udp_sendpage,
+       .release_cb             = ip4_datagram_release_cb,
+       .hash                   = udp_lib_hash,
+       .unhash                 = udp_lib_unhash,
+       .rehash                 = udp_v4_rehash,
+       .get_port               = udp_v4_get_port,
+       .memory_allocated       = &udp_memory_allocated,
+       .sysctl_mem             = sysctl_udp_mem,
+       .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+       .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+       .obj_size               = sizeof(struct udp_sock),
+       .h.udp_table            = &udp_table,
 #ifdef CONFIG_COMPAT
-       .compat_setsockopt = compat_udp_setsockopt,
-       .compat_getsockopt = compat_udp_getsockopt,
+       .compat_setsockopt      = compat_udp_setsockopt,
+       .compat_getsockopt      = compat_udp_getsockopt,
 #endif
-       .diag_destroy      = udp_abort,
+       .diag_destroy           = udp_abort,
 };
 EXPORT_SYMBOL(udp_prot);
 
@@ -2831,6 +2825,27 @@ u32 udp_flow_hashrnd(void)
 }
 EXPORT_SYMBOL(udp_flow_hashrnd);
 
+static void __udp_sysctl_init(struct net *net)
+{
+       net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+       net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+       net->ipv4.sysctl_udp_l3mdev_accept = 0;
+#endif
+}
+
+static int __net_init udp_sysctl_init(struct net *net)
+{
+       __udp_sysctl_init(net);
+       return 0;
+}
+
+static struct pernet_operations __net_initdata udp_sysctl_ops = {
+       .init   = udp_sysctl_init,
+       .async  = true,
+};
+
 void __init udp_init(void)
 {
        unsigned long limit;
@@ -2843,8 +2858,7 @@ void __init udp_init(void)
        sysctl_udp_mem[1] = limit;
        sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
 
-       sysctl_udp_rmem_min = SK_MEM_QUANTUM;
-       sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+       __udp_sysctl_init(&init_net);
 
        /* 16 spinlocks per cpu */
        udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
@@ -2854,4 +2868,7 @@ void __init udp_init(void)
                panic("UDP: failed to alloc udp_busylocks\n");
        for (i = 0; i < (1U << udp_busylocks_log); i++)
                spin_lock_init(udp_busylocks + i);
+
+       if (register_pernet_subsys(&udp_sysctl_ops))
+               panic("UDP: failed to init sysctl parameters.\n");
 }
index 63faeee989a99dc7f714d1120cb3228349b1362d..2a9764bd17196966b41755269e03f487cb757288 100644 (file)
@@ -92,7 +92,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
        skb_reset_network_header(skb);
        skb_mac_header_rebuild(skb);
-       eth_hdr(skb)->h_proto = skb->protocol;
+       if (skb->mac_len)
+               eth_hdr(skb)->h_proto = skb->protocol;
 
        err = 0;
 
index 94b8702603bc54f36542977ed0d9c3b93d43b6b7..be980c195fc55b0a23f144eb55641cb16a2eb2de 100644 (file)
@@ -30,7 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 
        mtu = dst_mtu(skb_dst(skb));
        if ((!skb_is_gso(skb) && skb->len > mtu) ||
-           (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) {
+           (skb_is_gso(skb) &&
+            !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
                skb->protocol = htons(ETH_P_IP);
 
                if (skb->sk)
index 796ac4115485362cfd4711f1b7c56d820f9ee3c9..6c76a757fa4a71c2ffa8b4fc63a10c9bde3b2319 100644 (file)
@@ -100,7 +100,9 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
        xdst->u.rt.rt_gateway = rt->rt_gateway;
        xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
        xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+       xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
        INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
+       rt_add_uncached_list(&xdst->u.rt);
 
        return 0;
 }
@@ -240,7 +242,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
        struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
        dst_destroy_metrics_generic(dst);
-
+       if (xdst->u.rt.rt_uncached_list)
+               rt_del_uncached_list(&xdst->u.rt);
        xfrm_dst_destroy(xdst);
 }
 
@@ -379,4 +382,3 @@ void __init xfrm4_init(void)
        xfrm4_protocol_init();
        register_pernet_subsys(&xfrm4_net_ops);
 }
-
index ea71e4b0ab7aea80fc6b564fddeea7a6b01feaeb..6794ddf0547cd0bec8b5ac7e4069b6f746809a49 100644 (file)
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
 config IPV6_MROUTE
        bool "IPv6: multicast routing"
        depends on IPV6
+       select IP_MROUTE_COMMON
        ---help---
          Experimental support for IPv6 multicast forwarding.
          If unsure, say N.
index 4facfe0b18882f0ec911c8f2d2137d823ff60d2c..6fd4bbdc444f3c6c3fe118b8f652f8772371bf9e 100644 (file)
@@ -1459,6 +1459,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
 #endif
 }
 
+static bool ipv6_allow_optimistic_dad(struct net *net,
+                                     struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+       if (!idev)
+               return false;
+       if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+               return false;
+
+       return true;
+#else
+       return false;
+#endif
+}
+
 static int ipv6_get_saddr_eval(struct net *net,
                               struct ipv6_saddr_score *score,
                               struct ipv6_saddr_dst *dst,
@@ -1836,22 +1851,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
                  const struct net_device *dev, int strict)
 {
-       return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+       return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+                                      strict, IFA_F_TENTATIVE);
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ *   1. does the address exist in the L3 domain that dev is part of
+ *      (skip_dev_check = true), or
+ *
+ *   2. does the address exist on the specific device
+ *      (skip_dev_check = false)
+ */
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-                           const struct net_device *dev, int strict,
-                           u32 banned_flags)
+                           const struct net_device *dev, bool skip_dev_check,
+                           int strict, u32 banned_flags)
 {
        unsigned int hash = inet6_addr_hash(net, addr);
+       const struct net_device *l3mdev;
        struct inet6_ifaddr *ifp;
        u32 ifp_flags;
 
        rcu_read_lock();
+
+       l3mdev = l3mdev_master_dev_rcu(dev);
+       if (skip_dev_check)
+               dev = NULL;
+
        hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
                if (!net_eq(dev_net(ifp->idev->dev), net))
                        continue;
+
+               if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+                       continue;
+
                /* Decouple optimistic from tentative for evaluation here.
                 * Ban optimistic addresses explicitly, when required.
                 */
@@ -1968,6 +2003,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
                spin_lock_bh(&ifp->lock);
                addrconf_del_dad_work(ifp);
                ifp->flags |= IFA_F_TENTATIVE;
+               if (dad_failed)
+                       ifp->flags &= ~IFA_F_OPTIMISTIC;
                spin_unlock_bh(&ifp->lock);
                if (dad_failed)
                        ipv6_ifa_notify(0, ifp);
@@ -4501,6 +4538,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
            (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
                return -EINVAL;
 
+       if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+               ifa_flags &= ~IFA_F_OPTIMISTIC;
+
        timeout = addrconf_timeout_fixup(valid_lft, HZ);
        if (addrconf_finite_timeout(timeout)) {
                expires = jiffies_to_clock_t(timeout * HZ);
@@ -4574,6 +4614,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct in6_addr *pfx, *peer_pfx;
        struct inet6_ifaddr *ifa;
        struct net_device *dev;
+       struct inet6_dev *idev;
        u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
        u32 ifa_flags;
        int err;
@@ -4607,7 +4648,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        /* We ignore other flags so far. */
        ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
-                    IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+                    IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+       idev = ipv6_find_idev(dev);
+       if (IS_ERR(idev))
+               return PTR_ERR(idev);
+
+       if (!ipv6_allow_optimistic_dad(net, idev))
+               ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+       if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+               NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+               return -EINVAL;
+       }
 
        ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
        if (!ifa) {
index 8e085cc05aebb8ccc14e8fb4973789275c3d610b..d580d4d456a518679c9edf23f2914eff1c07cba9 100644 (file)
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
                return -EPERM;
        if (ipv6_addr_is_multicast(addr))
                return -EINVAL;
-       if (ipv6_chk_addr(net, addr, NULL, 0))
+
+       if (ifindex)
+               dev = __dev_get_by_index(net, ifindex);
+
+       if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
                return -EINVAL;
 
        pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -78,7 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
        if (ifindex == 0) {
                struct rt6_info *rt;
 
-               rt = rt6_lookup(net, addr, NULL, 0, 0);
+               rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
                if (rt) {
                        dev = rt->dst.dev;
                        ip6_rt_put(rt);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
                        dev = __dev_get_by_flags(net, IFF_UP,
                                                 IFF_UP | IFF_LOOPBACK);
                }
-       } else
-               dev = __dev_get_by_index(net, ifindex);
+       }
 
        if (!dev) {
                err = -ENODEV;
@@ -552,4 +555,3 @@ void ac6_proc_exit(struct net *net)
        remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
-
index fbf08ce3f5ab75c3d21c20741421f4153c4dcd67..88bc2ef7c7a810b19a2ddb75babe3a37219532b0 100644 (file)
@@ -146,10 +146,12 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
        struct sockaddr_in6     *usin = (struct sockaddr_in6 *) uaddr;
        struct inet_sock        *inet = inet_sk(sk);
        struct ipv6_pinfo       *np = inet6_sk(sk);
-       struct in6_addr         *daddr;
+       struct in6_addr         *daddr, old_daddr;
+       __be32                  fl6_flowlabel = 0;
+       __be32                  old_fl6_flowlabel;
+       __be16                  old_dport;
        int                     addr_type;
        int                     err;
-       __be32                  fl6_flowlabel = 0;
 
        if (usin->sin6_family == AF_INET) {
                if (__ipv6_only_sock(sk))
@@ -238,9 +240,13 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
                }
        }
 
+       /* save the current peer information before updating it */
+       old_daddr = sk->sk_v6_daddr;
+       old_fl6_flowlabel = np->flow_label;
+       old_dport = inet->inet_dport;
+
        sk->sk_v6_daddr = *daddr;
        np->flow_label = fl6_flowlabel;
-
        inet->inet_dport = usin->sin6_port;
 
        /*
@@ -250,11 +256,12 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
 
        err = ip6_datagram_dst_update(sk, true);
        if (err) {
-               /* Reset daddr and dport so that udp_v6_early_demux()
-                * fails to find this socket
+               /* Restore the socket peer info, to keep it consistent with
+                * the old socket state
                 */
-               memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
-               inet->inet_dport = 0;
+               sk->sk_v6_daddr = old_daddr;
+               np->flow_label = old_fl6_flowlabel;
+               inet->inet_dport = old_dport;
                goto out;
        }
 
@@ -801,8 +808,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
                        if (addr_type != IPV6_ADDR_ANY) {
                                int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
                                if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
-                                   !ipv6_chk_addr(net, &src_info->ipi6_addr,
-                                                  strict ? dev : NULL, 0) &&
+                                   !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+                                                            dev, !strict, 0,
+                                                            IFA_F_TENTATIVE) &&
                                    !ipv6_chk_acast_addr_src(net, dev,
                                                             &src_info->ipi6_addr))
                                        err = -EINVAL;
index 11025f8d124b86be72d51f9aa934ad1c85e04626..b643f5ce6c8078ff91f02555a33964acdedcded4 100644 (file)
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
        return nexthdr;
 }
 EXPORT_SYMBOL(ipv6_find_hdr);
-
index 95a2c9e8699a7a40056a987a3eeb5bf42edfa0b4..00ef9467f3c04e1eca3449df41ba9e8885ea2027 100644 (file)
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+                                  const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup)
 {
        if (net->ipv6.fib6_has_custom_rules) {
                struct fib_lookup_arg arg = {
                        .lookup_ptr = lookup,
+                       .lookup_data = skb,
                        .flags = FIB_LOOKUP_NOREF,
                };
 
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
        } else {
                struct rt6_info *rt;
 
-               rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+               rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
                if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
                        return &rt->dst;
                ip6_rt_put(rt);
-               rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+               rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
                if (rt->dst.error != -EAGAIN)
                        return &rt->dst;
                ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
                goto out;
        }
 
-       rt = lookup(net, table, flp6, flags);
+       rt = lookup(net, table, flp6, arg->lookup_data, flags);
        if (rt != net->ipv6.ip6_null_entry) {
                struct fib6_rule *r = (struct fib6_rule *)rule;
 
@@ -223,6 +225,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
        if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
                return 0;
 
+       if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->sport_range) &&
+           !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+               return 0;
+
+       if (fib_rule_port_range_set(&rule->dport_range) &&
+           !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+               return 0;
+
        return 1;
 }
 
@@ -258,12 +271,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        rule6->dst.plen = frh->dst_len;
        rule6->tclass = frh->tos;
 
+       if (fib_rule_requires_fldissect(rule))
+               net->ipv6.fib6_rules_require_fldissect++;
+
        net->ipv6.fib6_has_custom_rules = true;
        err = 0;
 errout:
        return err;
 }
 
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+       struct net *net = rule->fr_net;
+
+       if (net->ipv6.fib6_rules_require_fldissect &&
+           fib_rule_requires_fldissect(rule))
+               net->ipv6.fib6_rules_require_fldissect--;
+
+       return 0;
+}
+
 static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
                             struct nlattr **tb)
 {
@@ -323,6 +350,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
        .match                  = fib6_rule_match,
        .suppress               = fib6_rule_suppress,
        .configure              = fib6_rule_configure,
+       .delete                 = fib6_rule_delete,
        .compare                = fib6_rule_compare,
        .fill                   = fib6_rule_fill,
        .nlmsg_payload          = fib6_rule_nlmsg_payload,
@@ -350,6 +378,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
                goto out_fib6_rules_ops;
 
        net->ipv6.fib6_rules_ops = ops;
+       net->ipv6.fib6_rules_require_fldissect = 0;
 out:
        return err;
 
index 4fa4f1b150a4d149385a8422de9faa1906d9b19c..6f84668be6ea2a193c7bfe54cdb6c8764631bfd5 100644 (file)
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        fl6.fl6_icmp_type = type;
        fl6.fl6_icmp_code = code;
        fl6.flowi6_uid = sock_net_uid(net, NULL);
-       fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+       fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
        sk = icmpv6_xmit_lock(net);
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
        skb_pull(skb2, nhs);
        skb_reset_network_header(skb2);
 
-       rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+       rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+                       skb, 0);
 
        if (rt && rt->dst.dev)
                skb2->dev = rt->dst.dev;
index 44c39c5f06384c6c83901036a2e94bcda439f91c..e438699f000f196ebafa4063884a4d440b0b714d 100644 (file)
@@ -613,6 +613,7 @@ static struct pernet_operations ila_net_ops = {
        .exit = ila_exit_net,
        .id   = &ila_net_id,
        .size = sizeof(struct ila_net),
+       .async = true,
 };
 
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
index cab95cf3b39fcfaf1f17c8e26098d0b7cc66bffa..2f995e9e3050c0c98c447830994d1340a6d22566 100644 (file)
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+                                  const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup)
 {
        struct rt6_info *rt;
 
-       rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+       rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
        if (rt->dst.error == -EAGAIN) {
                ip6_rt_put(rt);
                rt = net->ipv6.ip6_null_entry;
index 3c353125546d8701febaad95f81bdaa675cd5d7d..3a98c694da5f12d070b4ccc54e659a50691acd60 100644 (file)
@@ -126,7 +126,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
        struct ip6_tnl *t, *cand = NULL;
        struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
        int dev_type = (gre_proto == htons(ETH_P_TEB) ||
-                       gre_proto == htons(ETH_P_ERSPAN)) ?
+                       gre_proto == htons(ETH_P_ERSPAN) ||
+                       gre_proto == htons(ETH_P_ERSPAN2)) ?
                       ARPHRD_ETHER : ARPHRD_IP6GRE;
        int score, cand_score = 4;
 
@@ -236,7 +237,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
                return t;
 
        dev = ign->fb_tunnel_dev;
-       if (dev->flags & IFF_UP)
+       if (dev && dev->flags & IFF_UP)
                return netdev_priv(dev);
 
        return NULL;
@@ -695,9 +696,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
        else
                fl6->daddr = tunnel->parms.raddr;
 
-       if (tunnel->parms.o_flags & TUNNEL_SEQ)
-               tunnel->o_seqno++;
-
        /* Push GRE header. */
        protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 
@@ -720,14 +718,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
                fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 
                dsfield = key->tos;
-               flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+               flags = key->tun_flags &
+                       (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
                tunnel->tun_hlen = gre_calc_hlen(flags);
 
                gre_build_header(skb, tunnel->tun_hlen,
                                 flags, protocol,
-                                tunnel_id_to_key32(tun_info->key.tun_id), 0);
+                                tunnel_id_to_key32(tun_info->key.tun_id),
+                                (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+                                                     : 0);
 
        } else {
+               if (tunnel->parms.o_flags & TUNNEL_SEQ)
+                       tunnel->o_seqno++;
+
                gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
                                 protocol, tunnel->parms.o_key,
                                 htonl(tunnel->o_seqno));
@@ -902,6 +906,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                truncate = true;
        }
 
+       if (skb_cow_head(skb, dev->needed_headroom))
+               goto tx_err;
+
        t->parms.o_flags &= ~TUNNEL_KEY;
        IPCB(skb)->flags = 0;
 
@@ -944,6 +951,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                                               md->u.md2.dir,
                                               get_hwid(&md->u.md2),
                                               truncate, false);
+               } else {
+                       goto tx_err;
                }
        } else {
                switch (skb->protocol) {
@@ -1053,7 +1062,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 
                struct rt6_info *rt = rt6_lookup(t->net,
                                                 &p->raddr, &p->laddr,
-                                                p->link, strict);
+                                                p->link, NULL, strict);
 
                if (!rt)
                        return;
@@ -1469,6 +1478,8 @@ static int __net_init ip6gre_init_net(struct net *net)
        struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
        int err;
 
+       if (!net_has_fallback_tunnels(net))
+               return 0;
        ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
                                          NET_NAME_UNKNOWN,
                                          ip6gre_tunnel_setup);
@@ -1517,6 +1528,7 @@ static struct pernet_operations ip6gre_net_ops = {
        .exit_batch = ip6gre_exit_batch_net,
        .id   = &ip6gre_net_id,
        .size = sizeof(struct ip6gre_net),
+       .async = true,
 };
 
 static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1784,6 +1796,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
        netif_keep_dst(dev);
 }
 
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+       return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
 static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
                                       struct ip_tunnel_encap *ipencap)
 {
index 997c7f19ad62e61b04927f62eccaf54ca6f42801..2c7f09c3c39ed8a1e85a967e105ff3cc30dce5b9 100644 (file)
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
-                   ((mroute6_socket(net, skb) &&
+                   ((mroute6_is_socket(net, skb) &&
                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
                                         &ipv6_hdr(skb)->saddr))) {
@@ -412,7 +412,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
        if (skb->ignore_df)
                return false;
 
-       if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+       if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
                return false;
 
        return true;
index 4b15fe92827867493cca7ad58d70701c0dea599f..456fcf942f9553a26a154f9a1ccf32f1099773f8 100644 (file)
@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
                /* Try to guess incoming interface */
                rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
-                               NULL, 0, 0);
+                               NULL, 0, skb2, 0);
 
                if (rt && rt->dst.dev)
                        skb2->dev = rt->dst.dev;
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
                        ldev = dev_get_by_index_rcu(net, p->link);
 
                if ((ipv6_addr_is_multicast(laddr) ||
-                    likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+                    likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+                                                   0, IFA_F_TENTATIVE))) &&
                    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
-                    likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+                    likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+                                                    0, IFA_F_TENTATIVE))))
                        ret = 1;
        }
        return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
                if (p->link)
                        ldev = dev_get_by_index_rcu(net, p->link);
 
-               if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+               if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+                                                     0, IFA_F_TENTATIVE)))
                        pr_warn("%s xmit: Local address not yet configured!\n",
                                p->name);
                else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
                         !ipv6_addr_is_multicast(raddr) &&
-                        unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+                        unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+                                                         true, 0, IFA_F_TENTATIVE)))
                        pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
                                p->name);
                else
@@ -1444,7 +1448,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 
                struct rt6_info *rt = rt6_lookup(t->net,
                                                 &p->raddr, &p->laddr,
-                                                p->link, strict);
+                                                p->link, NULL, strict);
 
                if (!rt)
                        return;
@@ -1982,14 +1986,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 {
        struct net *net = dev_net(dev);
        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
-       struct ip6_tnl *nt, *t;
        struct ip_tunnel_encap ipencap;
+       struct ip6_tnl *nt, *t;
+       int err;
 
        nt = netdev_priv(dev);
 
        if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
-               int err = ip6_tnl_encap_setup(nt, &ipencap);
-
+               err = ip6_tnl_encap_setup(nt, &ipencap);
                if (err < 0)
                        return err;
        }
@@ -2005,7 +2009,11 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
                        return -EEXIST;
        }
 
-       return ip6_tnl_create2(dev);
+       err = ip6_tnl_create2(dev);
+       if (!err && tb[IFLA_MTU])
+               ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+
+       return err;
 }
 
 static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -2201,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
        ip6n->tnls[0] = ip6n->tnls_wc;
        ip6n->tnls[1] = ip6n->tnls_r_l;
 
+       if (!net_has_fallback_tunnels(net))
+               return 0;
        err = -ENOMEM;
        ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
                                        NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
@@ -2250,6 +2260,7 @@ static struct pernet_operations ip6_tnl_net_ops = {
        .exit_batch = ip6_tnl_exit_batch_net,
        .id   = &ip6_tnl_net_id,
        .size = sizeof(struct ip6_tnl_net),
+       .async = true,
 };
 
 /**
index fa3ae1cb50d32628f6344500a2bb43b58832ece8..a482b854eeea99d3ce05ba94c031dcc8faa84d36 100644 (file)
@@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t)
                              (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
                struct rt6_info *rt = rt6_lookup(t->net,
                                                 &p->raddr, &p->laddr,
-                                                p->link, strict);
+                                                p->link, NULL, strict);
 
                if (rt)
                        tdev = rt->dst.dev;
@@ -1148,6 +1148,7 @@ static struct pernet_operations vti6_net_ops = {
        .exit_batch = vti6_exit_batch_net,
        .id   = &vti6_net_id,
        .size = sizeof(struct vti6_net),
+       .async = true,
 };
 
 static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
index 295eb5ecaee574104b9fc0733eca542189e6361e..7345bd6c4b7dda39c0d73d542e9ca9a5366542ff 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/compat.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
 #include <net/raw.h>
 #include <linux/notifier.h>
 #include <linux/if_arp.h>
 #include <net/ip6_checksum.h>
 #include <linux/netconf.h>
 
-struct mr6_table {
-       struct list_head        list;
-       possible_net_t          net;
-       u32                     id;
-       struct sock             *mroute6_sk;
-       struct timer_list       ipmr_expire_timer;
-       struct list_head        mfc6_unres_queue;
-       struct list_head        mfc6_cache_array[MFC6_LINES];
-       struct mif_device       vif6_table[MAXMIFS];
-       int                     maxvif;
-       atomic_t                cache_resolve_queue_len;
-       bool                    mroute_do_assert;
-       bool                    mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
-       int                     mroute_reg_vif_num;
-#endif
-};
-
 struct ip6mr_rule {
        struct fib_rule         common;
 };
 
 struct ip6mr_result {
-       struct mr6_table        *mrt;
+       struct mr_table *mrt;
 };
 
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
 
 static DEFINE_RWLOCK(mrt_lock);
 
-/*
- *     Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
                           struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
                              mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-                              struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
                              int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
                               struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 #define ip6mr_for_each_table(mrt, net) \
        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+                                           struct mr_table *mrt)
+{
+       struct mr_table *ret;
+
+       if (!mrt)
+               ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+                                    struct mr_table, list);
+       else
+               ret = list_entry_rcu(mrt->list.next,
+                                    struct mr_table, list);
+
+       if (&ret->list == &net->ipv6.mr6_tables)
+               return NULL;
+       return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        ip6mr_for_each_table(mrt, net) {
                if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-                           struct mr6_table **mrt)
+                           struct mr_table **mrt)
 {
        int err;
        struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
                             int flags, struct fib_lookup_arg *arg)
 {
        struct ip6mr_result *res = arg->result;
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        switch (rule->action) {
        case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 static int __net_init ip6mr_rules_init(struct net *net)
 {
        struct fib_rules_ops *ops;
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        int err;
 
        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ static int __net_init ip6mr_rules_init(struct net *net)
 
 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
-       struct mr6_table *mrt, *next;
+       struct mr_table *mrt, *next;
 
        rtnl_lock();
        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -272,13 +262,21 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 #define ip6mr_for_each_table(mrt, net) \
        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+                                           struct mr_table *mrt)
+{
+       if (!mrt)
+               return net->ipv6.mrt6;
+       return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
        return net->ipv6.mrt6;
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-                           struct mr6_table **mrt)
+                           struct mr_table **mrt)
 {
        *mrt = net->ipv6.mrt6;
        return 0;
@@ -299,112 +297,75 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 }
 #endif
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+                         const void *ptr)
 {
-       struct mr6_table *mrt;
-       unsigned int i;
+       const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+       struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 
-       mrt = ip6mr_get_table(net, id);
-       if (mrt)
-               return mrt;
-
-       mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-       if (!mrt)
-               return NULL;
-       mrt->id = id;
-       write_pnet(&mrt->net, net);
-
-       /* Forwarding cache */
-       for (i = 0; i < MFC6_LINES; i++)
-               INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
-       INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+       return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+              !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
 
-       timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+       .head_offset = offsetof(struct mr_mfc, mnode),
+       .key_offset = offsetof(struct mfc6_cache, cmparg),
+       .key_len = sizeof(struct mfc6_cache_cmp_arg),
+       .nelem_hint = 3,
+       .locks_mul = 1,
+       .obj_cmpfn = ip6mr_hash_cmp,
+       .automatic_shrinking = true,
+};
 
-#ifdef CONFIG_IPV6_PIMSM_V2
-       mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+                               struct net *net)
+{
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 #endif
-       return mrt;
 }
 
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
-       del_timer_sync(&mrt->ipmr_expire_timer);
-       mroute_clean_tables(mrt, true);
-       kfree(mrt);
-}
-
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
-       struct seq_net_private p;
-       struct mr6_table *mrt;
-       struct list_head *cache;
-       int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+       .mf6c_origin = IN6ADDR_ANY_INIT,
+       .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 };
 
+static struct mr_table_ops ip6mr_mr_table_ops = {
+       .rht_params = &ip6mr_rht_params,
+       .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
 
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
-                                          struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 {
-       struct mr6_table *mrt = it->mrt;
-       struct mfc6_cache *mfc;
-
-       read_lock(&mrt_lock);
-       for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
-               it->cache = &mrt->mfc6_cache_array[it->ct];
-               list_for_each_entry(mfc, it->cache, list)
-                       if (pos-- == 0)
-                               return mfc;
-       }
-       read_unlock(&mrt_lock);
+       struct mr_table *mrt;
 
-       spin_lock_bh(&mfc_unres_lock);
-       it->cache = &mrt->mfc6_unres_queue;
-       list_for_each_entry(mfc, it->cache, list)
-               if (pos-- == 0)
-                       return mfc;
-       spin_unlock_bh(&mfc_unres_lock);
+       mrt = ip6mr_get_table(net, id);
+       if (mrt)
+               return mrt;
 
-       it->cache = NULL;
-       return NULL;
+       return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+                             ipmr_expire_process, ip6mr_new_table_set);
 }
 
-/*
- *     The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
-       struct seq_net_private p;
-       struct mr6_table *mrt;
-       int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
-                                           struct ipmr_vif_iter *iter,
-                                           loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
 {
-       struct mr6_table *mrt = iter->mrt;
-
-       for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-               if (!MIF_EXISTS(mrt, iter->ct))
-                       continue;
-               if (pos-- == 0)
-                       return &mrt->vif6_table[iter->ct];
-       }
-       return NULL;
+       del_timer_sync(&mrt->ipmr_expire_timer);
+       mroute_clean_tables(mrt, true);
+       rhltable_destroy(&mrt->mfc_hash);
+       kfree(mrt);
 }
 
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(mrt_lock)
 {
-       struct ipmr_vif_iter *iter = seq->private;
+       struct mr_vif_iter *iter = seq->private;
        struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
        if (!mrt)
@@ -413,26 +374,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        iter->mrt = mrt;
 
        read_lock(&mrt_lock);
-       return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct ipmr_vif_iter *iter = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt = iter->mrt;
-
-       ++*pos;
-       if (v == SEQ_START_TOKEN)
-               return ip6mr_vif_seq_idx(net, iter, 0);
-
-       while (++iter->ct < mrt->maxvif) {
-               if (!MIF_EXISTS(mrt, iter->ct))
-                       continue;
-               return &mrt->vif6_table[iter->ct];
-       }
-       return NULL;
+       return mr_vif_seq_start(seq, pos);
 }
 
 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +385,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
-       struct ipmr_vif_iter *iter = seq->private;
-       struct mr6_table *mrt = iter->mrt;
+       struct mr_vif_iter *iter = seq->private;
+       struct mr_table *mrt = iter->mrt;
 
        if (v == SEQ_START_TOKEN) {
                seq_puts(seq,
                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
        } else {
-               const struct mif_device *vif = v;
+               const struct vif_device *vif = v;
                const char *name = vif->dev ? vif->dev->name : "none";
 
                seq_printf(seq,
                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-                          vif - mrt->vif6_table,
+                          vif - mrt->vif_table,
                           name, vif->bytes_in, vif->pkt_in,
                           vif->bytes_out, vif->pkt_out,
                           vif->flags);
@@ -465,7 +407,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ip6mr_vif_seq_ops = {
        .start = ip6mr_vif_seq_start,
-       .next  = ip6mr_vif_seq_next,
+       .next  = mr_vif_seq_next,
        .stop  = ip6mr_vif_seq_stop,
        .show  = ip6mr_vif_seq_show,
 };
@@ -473,7 +415,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
 static int ip6mr_vif_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
-                           sizeof(struct ipmr_vif_iter));
+                           sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +427,14 @@ static const struct file_operations ip6mr_vif_fops = {
 
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       struct ipmr_mfc_iter *it = seq->private;
        struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
        if (!mrt)
                return ERR_PTR(-ENOENT);
 
-       it->mrt = mrt;
-       it->cache = NULL;
-       return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-               : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct mfc6_cache *mfc = v;
-       struct ipmr_mfc_iter *it = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct mr6_table *mrt = it->mrt;
-
-       ++*pos;
-
-       if (v == SEQ_START_TOKEN)
-               return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-       if (mfc->list.next != it->cache)
-               return list_entry(mfc->list.next, struct mfc6_cache, list);
-
-       if (it->cache == &mrt->mfc6_unres_queue)
-               goto end_of_list;
-
-       BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
-       while (++it->ct < MFC6_LINES) {
-               it->cache = &mrt->mfc6_cache_array[it->ct];
-               if (list_empty(it->cache))
-                       continue;
-               return list_first_entry(it->cache, struct mfc6_cache, list);
-       }
-
-       /* exhausted cache_array, show unresolved */
-       read_unlock(&mrt_lock);
-       it->cache = &mrt->mfc6_unres_queue;
-       it->ct = 0;
-
-       spin_lock_bh(&mfc_unres_lock);
-       if (!list_empty(it->cache))
-               return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
-       spin_unlock_bh(&mfc_unres_lock);
-       it->cache = NULL;
-
-       return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-       struct ipmr_mfc_iter *it = seq->private;
-       struct mr6_table *mrt = it->mrt;
-
-       if (it->cache == &mrt->mfc6_unres_queue)
-               spin_unlock_bh(&mfc_unres_lock);
-       else if (it->cache == &mrt->mfc6_cache_array[it->ct])
-               read_unlock(&mrt_lock);
+       return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +448,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
        } else {
                const struct mfc6_cache *mfc = v;
-               const struct ipmr_mfc_iter *it = seq->private;
-               struct mr6_table *mrt = it->mrt;
+               const struct mr_mfc_iter *it = seq->private;
+               struct mr_table *mrt = it->mrt;
 
                seq_printf(seq, "%pI6 %pI6 %-3hd",
                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
-                          mfc->mf6c_parent);
+                          mfc->_c.mfc_parent);
 
-               if (it->cache != &mrt->mfc6_unres_queue) {
+               if (it->cache != &mrt->mfc_unres_queue) {
                        seq_printf(seq, " %8lu %8lu %8lu",
-                                  mfc->mfc_un.res.pkt,
-                                  mfc->mfc_un.res.bytes,
-                                  mfc->mfc_un.res.wrong_if);
-                       for (n = mfc->mfc_un.res.minvif;
-                            n < mfc->mfc_un.res.maxvif; n++) {
-                               if (MIF_EXISTS(mrt, n) &&
-                                   mfc->mfc_un.res.ttls[n] < 255)
+                                  mfc->_c.mfc_un.res.pkt,
+                                  mfc->_c.mfc_un.res.bytes,
+                                  mfc->_c.mfc_un.res.wrong_if);
+                       for (n = mfc->_c.mfc_un.res.minvif;
+                            n < mfc->_c.mfc_un.res.maxvif; n++) {
+                               if (VIF_EXISTS(mrt, n) &&
+                                   mfc->_c.mfc_un.res.ttls[n] < 255)
                                        seq_printf(seq,
-                                                  " %2d:%-3d",
-                                                  n, mfc->mfc_un.res.ttls[n]);
+                                                  " %2d:%-3d", n,
+                                                  mfc->_c.mfc_un.res.ttls[n]);
                        }
                } else {
                        /* unresolved mfc_caches don't contain
@@ -597,15 +481,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
        .start = ipmr_mfc_seq_start,
-       .next  = ipmr_mfc_seq_next,
-       .stop  = ipmr_mfc_seq_stop,
+       .next  = mr_mfc_seq_next,
+       .stop  = mr_mfc_seq_stop,
        .show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-                           sizeof(struct ipmr_mfc_iter));
+                           sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +508,7 @@ static int pim6_rcv(struct sk_buff *skb)
        struct ipv6hdr   *encap;
        struct net_device  *reg_dev = NULL;
        struct net *net = dev_net(skb->dev);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_iif     = skb->dev->ifindex,
                .flowi6_mark    = skb->mark,
@@ -658,7 +542,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
        read_lock(&mrt_lock);
        if (reg_vif_num >= 0)
-               reg_dev = mrt->vif6_table[reg_vif_num].dev;
+               reg_dev = mrt->vif_table[reg_vif_num].dev;
        if (reg_dev)
                dev_hold(reg_dev);
        read_unlock(&mrt_lock);
@@ -693,7 +577,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
                                      struct net_device *dev)
 {
        struct net *net = dev_net(dev);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_oif     = dev->ifindex,
                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +620,7 @@ static void reg_vif_setup(struct net_device *dev)
        dev->features           |= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 {
        struct net_device *dev;
        char name[IFNAMSIZ];
@@ -773,17 +657,17 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
  *     Delete a VIF entry
  */
 
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
                       struct list_head *head)
 {
-       struct mif_device *v;
+       struct vif_device *v;
        struct net_device *dev;
        struct inet6_dev *in6_dev;
 
        if (vifi < 0 || vifi >= mrt->maxvif)
                return -EADDRNOTAVAIL;
 
-       v = &mrt->vif6_table[vifi];
+       v = &mrt->vif_table[vifi];
 
        write_lock_bh(&mrt_lock);
        dev = v->dev;
@@ -802,7 +686,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
        if (vifi + 1 == mrt->maxvif) {
                int tmp;
                for (tmp = vifi - 1; tmp >= 0; tmp--) {
-                       if (MIF_EXISTS(mrt, tmp))
+                       if (VIF_EXISTS(mrt, tmp))
                                break;
                }
                mrt->maxvif = tmp + 1;
@@ -827,23 +711,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
        return 0;
 }
 
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+       struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+       kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-       kmem_cache_free(mrt_cachep, c);
+       call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 }
 
 /* Destroy an unresolved cache entry, killing queued skbs
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 {
        struct net *net = read_pnet(&mrt->net);
        struct sk_buff *skb;
 
        atomic_dec(&mrt->cache_resolve_queue_len);
 
-       while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+       while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
                if (ipv6_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct ipv6hdr));
@@ -862,13 +753,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 
 /* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
 {
        unsigned long now = jiffies;
        unsigned long expires = 10 * HZ;
-       struct mfc6_cache *c, *next;
+       struct mr_mfc *c, *next;
 
-       list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+       list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
                if (time_after(c->mfc_un.unres.expires, now)) {
                        /* not yet... */
                        unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +769,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
                }
 
                list_del(&c->list);
-               mr6_netlink_event(mrt, c, RTM_DELROUTE);
-               ip6mr_destroy_unres(mrt, c);
+               mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+               ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
        }
 
-       if (!list_empty(&mrt->mfc6_unres_queue))
+       if (!list_empty(&mrt->mfc_unres_queue))
                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 }
 
 static void ipmr_expire_process(struct timer_list *t)
 {
-       struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+       struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 
        if (!spin_trylock(&mfc_unres_lock)) {
                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
                return;
        }
 
-       if (!list_empty(&mrt->mfc6_unres_queue))
+       if (!list_empty(&mrt->mfc_unres_queue))
                ipmr_do_expire_process(mrt);
 
        spin_unlock(&mfc_unres_lock);
@@ -903,7 +794,8 @@ static void ipmr_expire_process(struct timer_list *t)
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+                                   struct mr_mfc *cache,
                                    unsigned char *ttls)
 {
        int vifi;
@@ -913,7 +805,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
-               if (MIF_EXISTS(mrt, vifi) &&
+               if (VIF_EXISTS(mrt, vifi) &&
                    ttls[vifi] && ttls[vifi] < 255) {
                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
                        if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +817,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
        cache->mfc_un.res.lastuse = jiffies;
 }
 
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
                    struct mif6ctl *vifc, int mrtsock)
 {
        int vifi = vifc->mif6c_mifi;
-       struct mif_device *v = &mrt->vif6_table[vifi];
+       struct vif_device *v = &mrt->vif_table[vifi];
        struct net_device *dev;
        struct inet6_dev *in6_dev;
        int err;
 
        /* Is vif busy ? */
-       if (MIF_EXISTS(mrt, vifi))
+       if (VIF_EXISTS(mrt, vifi))
                return -EADDRINUSE;
 
        switch (vifc->mif6c_flags) {
@@ -980,21 +872,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
                                             dev->ifindex, &in6_dev->cnf);
        }
 
-       /*
-        *      Fill in the VIF structures
-        */
-       v->rate_limit = vifc->vifc_rate_limit;
-       v->flags = vifc->mif6c_flags;
-       if (!mrtsock)
-               v->flags |= VIFF_STATIC;
-       v->threshold = vifc->vifc_threshold;
-       v->bytes_in = 0;
-       v->bytes_out = 0;
-       v->pkt_in = 0;
-       v->pkt_out = 0;
-       v->link = dev->ifindex;
-       if (v->flags & MIFF_REGISTER)
-               v->link = dev_get_iflink(dev);
+       /* Fill in the VIF structures */
+       vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+                       vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+                       MIFF_REGISTER);
 
        /* And finish update writing critical data */
        write_lock_bh(&mrt_lock);
@@ -1009,75 +890,56 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
        return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
                                           const struct in6_addr *origin,
                                           const struct in6_addr *mcastgrp)
 {
-       int line = MFC6_HASH(mcastgrp, origin);
-       struct mfc6_cache *c;
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-               if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
-                       return c;
-       }
-       return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
-                                                     mifi_t mifi)
-{
-       int line = MFC6_HASH(&in6addr_any, &in6addr_any);
-       struct mfc6_cache *c;
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-               if (ipv6_addr_any(&c->mf6c_origin) &&
-                   ipv6_addr_any(&c->mf6c_mcastgrp) &&
-                   (c->mfc_un.res.ttls[mifi] < 255))
-                       return c;
+       struct mfc6_cache_cmp_arg arg = {
+               .mf6c_origin = *origin,
+               .mf6c_mcastgrp = *mcastgrp,
+       };
 
-       return NULL;
+       return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
                                               struct in6_addr *mcastgrp,
                                               mifi_t mifi)
 {
-       int line = MFC6_HASH(mcastgrp, &in6addr_any);
-       struct mfc6_cache *c, *proxy;
+       struct mfc6_cache_cmp_arg arg = {
+               .mf6c_origin = in6addr_any,
+               .mf6c_mcastgrp = *mcastgrp,
+       };
 
        if (ipv6_addr_any(mcastgrp))
-               goto skip;
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-               if (ipv6_addr_any(&c->mf6c_origin) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
-                       if (c->mfc_un.res.ttls[mifi] < 255)
-                               return c;
-
-                       /* It's ok if the mifi is part of the static tree */
-                       proxy = ip6mr_cache_find_any_parent(mrt,
-                                                           c->mf6c_parent);
-                       if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
-                               return c;
-               }
+               return mr_mfc_find_any_parent(mrt, mifi);
+       return mr_mfc_find_any(mrt, mifi, &arg);
+}
 
-skip:
-       return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+                       const struct in6_addr *origin,
+                       const struct in6_addr *mcastgrp,
+                       int parent)
+{
+       struct mfc6_cache_cmp_arg arg = {
+               .mf6c_origin = *origin,
+               .mf6c_mcastgrp = *mcastgrp,
+       };
+
+       return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
-/*
- *     Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
 static struct mfc6_cache *ip6mr_cache_alloc(void)
 {
        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
        if (!c)
                return NULL;
-       c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-       c->mfc_un.res.minvif = MAXMIFS;
+       c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+       c->_c.mfc_un.res.minvif = MAXMIFS;
        return c;
 }
 
@@ -1086,8 +948,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
        if (!c)
                return NULL;
-       skb_queue_head_init(&c->mfc_un.unres.unresolved);
-       c->mfc_un.unres.expires = jiffies + 10 * HZ;
+       skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+       c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
        return c;
 }
 
@@ -1095,7 +957,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
  *     A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
                                struct mfc6_cache *uc, struct mfc6_cache *c)
 {
        struct sk_buff *skb;
@@ -1104,12 +966,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
         *      Play the pending entries through our router
         */
 
-       while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+       while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
                if (ipv6_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct ipv6hdr));
 
-                       if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+                       if (mr_fill_mroute(mrt, skb, &c->_c,
+                                          nlmsg_data(nlh)) > 0) {
                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
                        } else {
                                nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +992,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
  *     Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
                              mifi_t mifi, int assert)
 {
+       struct sock *mroute6_sk;
        struct sk_buff *skb;
        struct mrt6msg *msg;
        int ret;
@@ -1201,17 +1065,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
        skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
 
-       if (!mrt->mroute6_sk) {
+       rcu_read_lock();
+       mroute6_sk = rcu_dereference(mrt->mroute_sk);
+       if (!mroute6_sk) {
+               rcu_read_unlock();
                kfree_skb(skb);
                return -EINVAL;
        }
 
        mrt6msg_netlink_event(mrt, skb);
 
-       /*
-        *      Deliver to user space multicast routing algorithms
-        */
-       ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+       /* Deliver to user space multicast routing algorithms */
+       ret = sock_queue_rcv_skb(mroute6_sk, skb);
+       rcu_read_unlock();
        if (ret < 0) {
                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
                kfree_skb(skb);
@@ -1220,19 +1086,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
        return ret;
 }
 
-/*
- *     Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+                                 struct sk_buff *skb)
 {
+       struct mfc6_cache *c;
        bool found = false;
        int err;
-       struct mfc6_cache *c;
 
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+       list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
                        found = true;
@@ -1253,10 +1116,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
                        return -ENOBUFS;
                }
 
-               /*
-                *      Fill in the new cache entry
-                */
-               c->mf6c_parent = -1;
+               /* Fill in the new cache entry */
+               c->_c.mfc_parent = -1;
                c->mf6c_origin = ipv6_hdr(skb)->saddr;
                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
 
@@ -1276,20 +1137,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
                }
 
                atomic_inc(&mrt->cache_resolve_queue_len);
-               list_add(&c->list, &mrt->mfc6_unres_queue);
+               list_add(&c->_c.list, &mrt->mfc_unres_queue);
                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 
                ipmr_do_expire_process(mrt);
        }
 
-       /*
-        *      See if we can append the packet
-        */
-       if (c->mfc_un.unres.unresolved.qlen > 3) {
+       /* See if we can append the packet */
+       if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
                kfree_skb(skb);
                err = -ENOBUFS;
        } else {
-               skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+               skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
                err = 0;
        }
 
@@ -1301,29 +1160,24 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
  *     MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
                            int parent)
 {
-       int line;
-       struct mfc6_cache *c, *next;
-
-       line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+       struct mfc6_cache *c;
 
-       list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
-               if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp,
-                                   &mfc->mf6cc_mcastgrp.sin6_addr) &&
-                   (parent == -1 || parent == c->mf6c_parent)) {
-                       write_lock_bh(&mrt_lock);
-                       list_del(&c->list);
-                       write_unlock_bh(&mrt_lock);
+       /* The entries are added/deleted only under RTNL */
+       rcu_read_lock();
+       c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+                                   &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+       rcu_read_unlock();
+       if (!c)
+               return -ENOENT;
+       rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+       list_del_rcu(&c->_c.list);
 
-                       mr6_netlink_event(mrt, c, RTM_DELROUTE);
-                       ip6mr_cache_free(c);
-                       return 0;
-               }
-       }
-       return -ENOENT;
+       mr6_netlink_event(mrt, c, RTM_DELROUTE);
+       ip6mr_cache_free(c);
+       return 0;
 }
 
 static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1185,15 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net *net = dev_net(dev);
-       struct mr6_table *mrt;
-       struct mif_device *v;
+       struct mr_table *mrt;
+       struct vif_device *v;
        int ct;
 
        if (event != NETDEV_UNREGISTER)
                return NOTIFY_DONE;
 
        ip6mr_for_each_table(mrt, net) {
-               v = &mrt->vif6_table[0];
+               v = &mrt->vif_table[0];
                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
                        if (v->dev == dev)
                                mif6_delete(mrt, ct, 1, NULL);
@@ -1453,14 +1307,14 @@ void ip6_mr_cleanup(void)
        kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
                         struct mf6cctl *mfc, int mrtsock, int parent)
 {
-       bool found = false;
-       int line;
-       struct mfc6_cache *uc, *c;
        unsigned char ttls[MAXMIFS];
-       int i;
+       struct mfc6_cache *uc, *c;
+       struct mr_mfc *_uc;
+       bool found;
+       int i, err;
 
        if (mfc->mf6cc_parent >= MAXMIFS)
                return -ENFILE;
@@ -1469,27 +1323,19 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
        for (i = 0; i < MAXMIFS; i++) {
                if (IF_ISSET(i, &mfc->mf6cc_ifset))
                        ttls[i] = 1;
-
-       }
-
-       line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
-       list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-               if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-                   ipv6_addr_equal(&c->mf6c_mcastgrp,
-                                   &mfc->mf6cc_mcastgrp.sin6_addr) &&
-                   (parent == -1 || parent == mfc->mf6cc_parent)) {
-                       found = true;
-                       break;
-               }
        }
 
-       if (found) {
+       /* The entries are added/deleted only under RTNL */
+       rcu_read_lock();
+       c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+                                   &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+       rcu_read_unlock();
+       if (c) {
                write_lock_bh(&mrt_lock);
-               c->mf6c_parent = mfc->mf6cc_parent;
-               ip6mr_update_thresholds(mrt, c, ttls);
+               c->_c.mfc_parent = mfc->mf6cc_parent;
+               ip6mr_update_thresholds(mrt, &c->_c, ttls);
                if (!mrtsock)
-                       c->mfc_flags |= MFC_STATIC;
+                       c->_c.mfc_flags |= MFC_STATIC;
                write_unlock_bh(&mrt_lock);
                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
                return 0;
@@ -1505,31 +1351,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 
        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
-       c->mf6c_parent = mfc->mf6cc_parent;
-       ip6mr_update_thresholds(mrt, c, ttls);
+       c->_c.mfc_parent = mfc->mf6cc_parent;
+       ip6mr_update_thresholds(mrt, &c->_c, ttls);
        if (!mrtsock)
-               c->mfc_flags |= MFC_STATIC;
+               c->_c.mfc_flags |= MFC_STATIC;
 
-       write_lock_bh(&mrt_lock);
-       list_add(&c->list, &mrt->mfc6_cache_array[line]);
-       write_unlock_bh(&mrt_lock);
+       err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+                                 ip6mr_rht_params);
+       if (err) {
+               pr_err("ip6mr: rhtable insert error %d\n", err);
+               ip6mr_cache_free(c);
+               return err;
+       }
+       list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
 
-       /*
-        *      Check to see if we resolved a queued list. If so we
-        *      need to send on the frames and tidy up.
+       /* Check to see if we resolved a queued list. If so we
+        * need to send on the frames and tidy up.
         */
        found = false;
        spin_lock_bh(&mfc_unres_lock);
-       list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+       list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+               uc = (struct mfc6_cache *)_uc;
                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
-                       list_del(&uc->list);
+                       list_del(&_uc->list);
                        atomic_dec(&mrt->cache_resolve_queue_len);
                        found = true;
                        break;
                }
        }
-       if (list_empty(&mrt->mfc6_unres_queue))
+       if (list_empty(&mrt->mfc_unres_queue))
                del_timer(&mrt->ipmr_expire_timer);
        spin_unlock_bh(&mfc_unres_lock);
 
@@ -1545,61 +1396,55 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
  *     Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
-       int i;
+       struct mr_mfc *c, *tmp;
        LIST_HEAD(list);
-       struct mfc6_cache *c, *next;
+       int i;
 
-       /*
-        *      Shut down all active vif entries
-        */
+       /* Shut down all active vif entries */
        for (i = 0; i < mrt->maxvif; i++) {
-               if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+               if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
                        continue;
                mif6_delete(mrt, i, 0, &list);
        }
        unregister_netdevice_many(&list);
 
-       /*
-        *      Wipe the cache
-        */
-       for (i = 0; i < MFC6_LINES; i++) {
-               list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
-                       if (!all && (c->mfc_flags & MFC_STATIC))
-                               continue;
-                       write_lock_bh(&mrt_lock);
-                       list_del(&c->list);
-                       write_unlock_bh(&mrt_lock);
-
-                       mr6_netlink_event(mrt, c, RTM_DELROUTE);
-                       ip6mr_cache_free(c);
-               }
+       /* Wipe the cache */
+       list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+               if (!all && (c->mfc_flags & MFC_STATIC))
+                       continue;
+               rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+               list_del_rcu(&c->list);
+               mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+               ip6mr_cache_free((struct mfc6_cache *)c);
        }
 
        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
                spin_lock_bh(&mfc_unres_lock);
-               list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+               list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
                        list_del(&c->list);
-                       mr6_netlink_event(mrt, c, RTM_DELROUTE);
-                       ip6mr_destroy_unres(mrt, c);
+                       mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+                                         RTM_DELROUTE);
+                       ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
                }
                spin_unlock_bh(&mfc_unres_lock);
        }
 }
 
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 {
        int err = 0;
        struct net *net = sock_net(sk);
 
        rtnl_lock();
        write_lock_bh(&mrt_lock);
-       if (likely(mrt->mroute6_sk == NULL)) {
-               mrt->mroute6_sk = sk;
-               net->ipv6.devconf_all->mc_forwarding++;
-       } else {
+       if (rtnl_dereference(mrt->mroute_sk)) {
                err = -EADDRINUSE;
+       } else {
+               rcu_assign_pointer(mrt->mroute_sk, sk);
+               sock_set_flag(sk, SOCK_RCU_FREE);
+               net->ipv6.devconf_all->mc_forwarding++;
        }
        write_unlock_bh(&mrt_lock);
 
@@ -1617,7 +1462,7 @@ int ip6mr_sk_done(struct sock *sk)
 {
        int err = -EACCES;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1625,9 +1470,13 @@ int ip6mr_sk_done(struct sock *sk)
 
        rtnl_lock();
        ip6mr_for_each_table(mrt, net) {
-               if (sk == mrt->mroute6_sk) {
+               if (sk == rtnl_dereference(mrt->mroute_sk)) {
                        write_lock_bh(&mrt_lock);
-                       mrt->mroute6_sk = NULL;
+                       RCU_INIT_POINTER(mrt->mroute_sk, NULL);
+                       /* Note that mroute_sk had SOCK_RCU_FREE set,
+                        * so the RCU grace period before sk freeing
+                        * is guaranteed by sk_destruct()
+                        */
                        net->ipv6.devconf_all->mc_forwarding--;
                        write_unlock_bh(&mrt_lock);
                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1645,9 +1494,9 @@ int ip6mr_sk_done(struct sock *sk)
        return err;
 }
 
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
                .flowi6_oif     = skb->dev->ifindex,
@@ -1657,8 +1506,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
                return NULL;
 
-       return mrt->mroute6_sk;
+       return rcu_access_pointer(mrt->mroute_sk);
 }
+EXPORT_SYMBOL(mroute6_is_socket);
 
 /*
  *     Socket options and virtual interface manipulation. The whole
@@ -1674,7 +1524,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
        struct mf6cctl mfc;
        mifi_t mifi;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1685,7 +1535,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                return -ENOENT;
 
        if (optname != MRT6_INIT) {
-               if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+               if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+                   !ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EACCES;
        }
 
@@ -1707,7 +1558,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                if (vif.mif6c_mifi >= MAXMIFS)
                        return -ENFILE;
                rtnl_lock();
-               ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+               ret = mif6_add(net, mrt, &vif,
+                              sk == rtnl_dereference(mrt->mroute_sk));
                rtnl_unlock();
                return ret;
 
@@ -1742,7 +1594,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
                else
                        ret = ip6mr_mfc_add(net, mrt, &mfc,
-                                           sk == mrt->mroute6_sk, parent);
+                                           sk ==
+                                           rtnl_dereference(mrt->mroute_sk),
+                                           parent);
                rtnl_unlock();
                return ret;
 
@@ -1794,7 +1648,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
                if (v != RT_TABLE_DEFAULT && v >= 100000000)
                        return -EINVAL;
-               if (sk == mrt->mroute6_sk)
+               if (sk == rcu_access_pointer(mrt->mroute_sk))
                        return -EBUSY;
 
                rtnl_lock();
@@ -1825,7 +1679,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
        int olr;
        int val;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1873,10 +1727,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 {
        struct sioc_sg_req6 sr;
        struct sioc_mif_req6 vr;
-       struct mif_device *vif;
+       struct vif_device *vif;
        struct mfc6_cache *c;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
        if (!mrt)
@@ -1889,8 +1743,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
                if (vr.mifi >= mrt->maxvif)
                        return -EINVAL;
                read_lock(&mrt_lock);
-               vif = &mrt->vif6_table[vr.mifi];
-               if (MIF_EXISTS(mrt, vr.mifi)) {
+               vif = &mrt->vif_table[vr.mifi];
+               if (VIF_EXISTS(mrt, vr.mifi)) {
                        vr.icount = vif->pkt_in;
                        vr.ocount = vif->pkt_out;
                        vr.ibytes = vif->bytes_in;
@@ -1907,19 +1761,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
                if (copy_from_user(&sr, arg, sizeof(sr)))
                        return -EFAULT;
 
-               read_lock(&mrt_lock);
+               rcu_read_lock();
                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
-                       read_unlock(&mrt_lock);
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+                       rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
                                return -EFAULT;
                        return 0;
                }
-               read_unlock(&mrt_lock);
+               rcu_read_unlock();
                return -EADDRNOTAVAIL;
        default:
                return -ENOIOCTLCMD;
@@ -1947,10 +1801,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 {
        struct compat_sioc_sg_req6 sr;
        struct compat_sioc_mif_req6 vr;
-       struct mif_device *vif;
+       struct vif_device *vif;
        struct mfc6_cache *c;
        struct net *net = sock_net(sk);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
 
        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
        if (!mrt)
@@ -1963,8 +1817,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
                if (vr.mifi >= mrt->maxvif)
                        return -EINVAL;
                read_lock(&mrt_lock);
-               vif = &mrt->vif6_table[vr.mifi];
-               if (MIF_EXISTS(mrt, vr.mifi)) {
+               vif = &mrt->vif_table[vr.mifi];
+               if (VIF_EXISTS(mrt, vr.mifi)) {
                        vr.icount = vif->pkt_in;
                        vr.ocount = vif->pkt_out;
                        vr.ibytes = vif->bytes_in;
@@ -1981,19 +1835,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
                if (copy_from_user(&sr, arg, sizeof(sr)))
                        return -EFAULT;
 
-               read_lock(&mrt_lock);
+               rcu_read_lock();
                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
                if (c) {
-                       sr.pktcnt = c->mfc_un.res.pkt;
-                       sr.bytecnt = c->mfc_un.res.bytes;
-                       sr.wrong_if = c->mfc_un.res.wrong_if;
-                       read_unlock(&mrt_lock);
+                       sr.pktcnt = c->_c.mfc_un.res.pkt;
+                       sr.bytecnt = c->_c.mfc_un.res.bytes;
+                       sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+                       rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
                                return -EFAULT;
                        return 0;
                }
-               read_unlock(&mrt_lock);
+               rcu_read_unlock();
                return -EADDRNOTAVAIL;
        default:
                return -ENOIOCTLCMD;
@@ -2014,11 +1868,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
  *     Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
        struct ipv6hdr *ipv6h;
-       struct mif_device *vif = &mrt->vif6_table[vifi];
+       struct vif_device *vif = &mrt->vif_table[vifi];
        struct net_device *dev;
        struct dst_entry *dst;
        struct flowi6 fl6;
@@ -2088,46 +1942,50 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
        return 0;
 }
 
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
 {
        int ct;
 
        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
-               if (mrt->vif6_table[ct].dev == dev)
+               if (mrt->vif_table[ct].dev == dev)
                        break;
        }
        return ct;
 }
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
-                          struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+                          struct sk_buff *skb, struct mfc6_cache *c)
 {
        int psend = -1;
        int vif, ct;
        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
-       vif = cache->mf6c_parent;
-       cache->mfc_un.res.pkt++;
-       cache->mfc_un.res.bytes += skb->len;
-       cache->mfc_un.res.lastuse = jiffies;
+       vif = c->_c.mfc_parent;
+       c->_c.mfc_un.res.pkt++;
+       c->_c.mfc_un.res.bytes += skb->len;
+       c->_c.mfc_un.res.lastuse = jiffies;
 
-       if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+       if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
                struct mfc6_cache *cache_proxy;
 
                /* For an (*,G) entry, we only check that the incoming
                 * interface is part of the static tree.
                 */
-               cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+               rcu_read_lock();
+               cache_proxy = mr_mfc_find_any_parent(mrt, vif);
                if (cache_proxy &&
-                   cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+                   cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+                       rcu_read_unlock();
                        goto forward;
+               }
+               rcu_read_unlock();
        }
 
        /*
         * Wrong interface: drop packet and (maybe) send PIM assert.
         */
-       if (mrt->vif6_table[vif].dev != skb->dev) {
-               cache->mfc_un.res.wrong_if++;
+       if (mrt->vif_table[vif].dev != skb->dev) {
+               c->_c.mfc_un.res.wrong_if++;
 
                if (true_vifi >= 0 && mrt->mroute_do_assert &&
                    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2136,52 +1994,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
                       large chunk of pimd to kernel. Ough... --ANK
                     */
                    (mrt->mroute_do_pim ||
-                    cache->mfc_un.res.ttls[true_vifi] < 255) &&
+                    c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
                    time_after(jiffies,
-                              cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-                       cache->mfc_un.res.last_assert = jiffies;
+                              c->_c.mfc_un.res.last_assert +
+                              MFC_ASSERT_THRESH)) {
+                       c->_c.mfc_un.res.last_assert = jiffies;
                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
                }
                goto dont_forward;
        }
 
 forward:
-       mrt->vif6_table[vif].pkt_in++;
-       mrt->vif6_table[vif].bytes_in += skb->len;
+       mrt->vif_table[vif].pkt_in++;
+       mrt->vif_table[vif].bytes_in += skb->len;
 
        /*
         *      Forward the frame
         */
-       if (ipv6_addr_any(&cache->mf6c_origin) &&
-           ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+       if (ipv6_addr_any(&c->mf6c_origin) &&
+           ipv6_addr_any(&c->mf6c_mcastgrp)) {
                if (true_vifi >= 0 &&
-                   true_vifi != cache->mf6c_parent &&
+                   true_vifi != c->_c.mfc_parent &&
                    ipv6_hdr(skb)->hop_limit >
-                               cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+                               c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
                        /* It's an (*,*) entry and the packet is not coming from
                         * the upstream: forward the packet to the upstream
                         * only.
                         */
-                       psend = cache->mf6c_parent;
+                       psend = c->_c.mfc_parent;
                        goto last_forward;
                }
                goto dont_forward;
        }
-       for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+       for (ct = c->_c.mfc_un.res.maxvif - 1;
+            ct >= c->_c.mfc_un.res.minvif; ct--) {
                /* For (*,G) entry, don't forward to the incoming interface */
-               if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
-                   ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+               if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+                   ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
                        if (psend != -1) {
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
                                if (skb2)
-                                       ip6mr_forward2(net, mrt, skb2, cache, psend);
+                                       ip6mr_forward2(net, mrt, skb2,
+                                                      c, psend);
                        }
                        psend = ct;
                }
        }
 last_forward:
        if (psend != -1) {
-               ip6mr_forward2(net, mrt, skb, cache, psend);
+               ip6mr_forward2(net, mrt, skb, c, psend);
                return;
        }
 
@@ -2198,7 +2059,7 @@ int ip6_mr_input(struct sk_buff *skb)
 {
        struct mfc6_cache *cache;
        struct net *net = dev_net(skb->dev);
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct flowi6 fl6 = {
                .flowi6_iif     = skb->dev->ifindex,
                .flowi6_mark    = skb->mark,
@@ -2248,66 +2109,11 @@ int ip6_mr_input(struct sk_buff *skb)
        return 0;
 }
 
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-                              struct mfc6_cache *c, struct rtmsg *rtm)
-{
-       struct rta_mfc_stats mfcs;
-       struct nlattr *mp_attr;
-       struct rtnexthop *nhp;
-       unsigned long lastuse;
-       int ct;
-
-       /* If cache is unresolved, don't try to parse IIF and OIF */
-       if (c->mf6c_parent >= MAXMIFS) {
-               rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-               return -ENOENT;
-       }
-
-       if (MIF_EXISTS(mrt, c->mf6c_parent) &&
-           nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
-               return -EMSGSIZE;
-       mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
-       if (!mp_attr)
-               return -EMSGSIZE;
-
-       for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-               if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-                       nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
-                       if (!nhp) {
-                               nla_nest_cancel(skb, mp_attr);
-                               return -EMSGSIZE;
-                       }
-
-                       nhp->rtnh_flags = 0;
-                       nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-                       nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
-                       nhp->rtnh_len = sizeof(*nhp);
-               }
-       }
-
-       nla_nest_end(skb, mp_attr);
-
-       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-       mfcs.mfcs_packets = c->mfc_un.res.pkt;
-       mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-       mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-       if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-                             RTA_PAD))
-               return -EMSGSIZE;
-
-       rtm->rtm_type = RTN_MULTICAST;
-       return 1;
-}
-
 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
                    u32 portid)
 {
        int err;
-       struct mr6_table *mrt;
+       struct mr_table *mrt;
        struct mfc6_cache *cache;
        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
@@ -2368,15 +2174,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
                return err;
        }
 
-       if (rtm->rtm_flags & RTM_F_NOTIFY)
-               cache->mfc_flags |= MFC_NOTIFY;
-
-       err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+       err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
        read_unlock(&mrt_lock);
        return err;
 }
 
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
                             int flags)
 {
@@ -2398,7 +2201,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
                goto nla_put_failure;
        rtm->rtm_type = RTN_MULTICAST;
        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-       if (c->mfc_flags & MFC_STATIC)
+       if (c->_c.mfc_flags & MFC_STATIC)
                rtm->rtm_protocol = RTPROT_STATIC;
        else
                rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2407,7 +2210,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
                goto nla_put_failure;
-       err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+       err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
        /* do not break the dump if cache is unresolved */
        if (err < 0 && err != -ENOENT)
                goto nla_put_failure;
@@ -2420,6 +2223,14 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
        return -EMSGSIZE;
 }
 
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+                             u32 portid, u32 seq, struct mr_mfc *c,
+                             int cmd, int flags)
+{
+       return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+                                cmd, flags);
+}
+
 static int mr6_msgsize(bool unresolved, int maxvif)
 {
        size_t len =
@@ -2441,14 +2252,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
        return len;
 }
 
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
                              int cmd)
 {
        struct net *net = read_pnet(&mrt->net);
        struct sk_buff *skb;
        int err = -ENOBUFS;
 
-       skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+       skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
                        GFP_ATOMIC);
        if (!skb)
                goto errout;
@@ -2483,7 +2294,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
        return len;
 }
 
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
 {
        struct net *net = read_pnet(&mrt->net);
        struct nlmsghdr *nlh;
@@ -2533,65 +2344,6 @@ static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
 
 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       struct net *net = sock_net(skb->sk);
-       struct mr6_table *mrt;
-       struct mfc6_cache *mfc;
-       unsigned int t = 0, s_t;
-       unsigned int h = 0, s_h;
-       unsigned int e = 0, s_e;
-
-       s_t = cb->args[0];
-       s_h = cb->args[1];
-       s_e = cb->args[2];
-
-       read_lock(&mrt_lock);
-       ip6mr_for_each_table(mrt, net) {
-               if (t < s_t)
-                       goto next_table;
-               if (t > s_t)
-                       s_h = 0;
-               for (h = s_h; h < MFC6_LINES; h++) {
-                       list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
-                               if (e < s_e)
-                                       goto next_entry;
-                               if (ip6mr_fill_mroute(mrt, skb,
-                                                     NETLINK_CB(cb->skb).portid,
-                                                     cb->nlh->nlmsg_seq,
-                                                     mfc, RTM_NEWROUTE,
-                                                     NLM_F_MULTI) < 0)
-                                       goto done;
-next_entry:
-                               e++;
-                       }
-                       e = s_e = 0;
-               }
-               spin_lock_bh(&mfc_unres_lock);
-               list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
-                       if (e < s_e)
-                               goto next_entry2;
-                       if (ip6mr_fill_mroute(mrt, skb,
-                                             NETLINK_CB(cb->skb).portid,
-                                             cb->nlh->nlmsg_seq,
-                                             mfc, RTM_NEWROUTE,
-                                             NLM_F_MULTI) < 0) {
-                               spin_unlock_bh(&mfc_unres_lock);
-                               goto done;
-                       }
-next_entry2:
-                       e++;
-               }
-               spin_unlock_bh(&mfc_unres_lock);
-               e = s_e = 0;
-               s_h = 0;
-next_table:
-               t++;
-       }
-done:
-       read_unlock(&mrt_lock);
-
-       cb->args[2] = e;
-       cb->args[1] = h;
-       cb->args[0] = t;
-
-       return skb->len;
+       return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+                               _ip6mr_fill_mroute, &mfc_unres_lock);
 }
index 24535169663dc501700c95e354761dd824e5a962..4d780c7f013060732dda2db760d7ba0474c812e3 100644 (file)
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 }
 EXPORT_SYMBOL(compat_ipv6_getsockopt);
 #endif
-
index d9bb933dd5c422fd8cba974e3c4e6fe961999d7d..d1a0cefac27301896bda5f6d1006c4c4969fd044 100644 (file)
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
        if (ifindex == 0) {
                struct rt6_info *rt;
-               rt = rt6_lookup(net, addr, NULL, 0, 0);
+               rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
                if (rt) {
                        dev = rt->dst.dev;
                        ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
        struct inet6_dev *idev = NULL;
 
        if (ifindex == 0) {
-               struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+               struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
 
                if (rt) {
                        dev = rt->dst.dev;
index 0a19ce3a6f7ff1664b41c225f9117382dbf06815..d1d0b2fa7a07e9432025c3026cf5651a7237b566 100644 (file)
@@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
        }
 
        if (!dev->addr_len)
-               inc_opt = 0;
+               inc_opt = false;
        if (inc_opt)
                optlen += ndisc_opt_addr_space(dev,
                                               NDISC_NEIGHBOUR_ADVERTISEMENT);
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
        int probes = atomic_read(&neigh->probes);
 
        if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
-                                          dev, 1,
+                                          dev, false, 1,
                                           IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
                saddr = &ipv6_hdr(skb)->saddr;
        probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
@@ -1554,7 +1554,8 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
        *(opt++) = (rd_len >> 3);
        opt += 6;
 
-       memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
+       skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
+                     rd_len - 8);
 }
 
 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
index d95ceca7ff8f648ff301d91a2e3eb60fc2050f1c..531d6957af36c4af48176f9360e9d95f78a45d55 100644 (file)
 int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 {
        const struct ipv6hdr *iph = ipv6_hdr(skb);
+       struct sock *sk = sk_to_full_sk(skb->sk);
        unsigned int hh_len;
        struct dst_entry *dst;
        struct flowi6 fl6 = {
-               .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+               .flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
                .flowi6_mark = skb->mark,
-               .flowi6_uid = sock_net_uid(net, skb->sk),
+               .flowi6_uid = sock_net_uid(net, sk),
                .daddr = iph->daddr,
                .saddr = iph->saddr,
        };
        int err;
 
-       dst = ip6_route_output(net, skb->sk, &fl6);
+       dst = ip6_route_output(net, sk, &fl6);
        err = dst->error;
        if (err) {
                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
@@ -50,7 +51,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
        if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
            xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
                skb_dst_set(skb, NULL);
-               dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
+               dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
                if (IS_ERR(dst))
                        return PTR_ERR(dst);
                skb_dst_set(skb, dst);
index 62358b93bbac5250676a067464c11e4e3d649faa..4de8ac1e5af4a6c4137f04b0757015aa2c10104b 100644 (file)
@@ -1928,6 +1928,7 @@ static void __net_exit ip6_tables_net_exit(struct net *net)
 static struct pernet_operations ip6_tables_net_ops = {
        .init = ip6_tables_net_init,
        .exit = ip6_tables_net_exit,
+       .async = true,
 };
 
 static int __init ip6_tables_init(void)
index 94deb69bbbdaaa34ca14dcaab943233d0a58a8a6..d12f511929f53ecf0a50e831d56427e5a7784ff5 100644 (file)
@@ -48,12 +48,8 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
        }
 
        fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
-       if ((flags & XT_RPFILTER_LOOSE) == 0) {
-               fl6.flowi6_oif = dev->ifindex;
-               lookup_flags |= RT6_LOOKUP_F_IFACE;
-       }
 
-       rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+       rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
        if (rt->dst.error)
                goto out;
 
index 1343077dde938f29cb6262937c8587d2ed640b69..06561c84c0bc0012f669743e01352a745c3c87a6 100644 (file)
@@ -87,6 +87,7 @@ static void __net_exit ip6table_filter_net_exit(struct net *net)
 static struct pernet_operations ip6table_filter_net_ops = {
        .init = ip6table_filter_net_init,
        .exit = ip6table_filter_net_exit,
+       .async = true,
 };
 
 static int __init ip6table_filter_init(void)
index b0524b18c4fb3b64f941ea2531c3e0ccba800ba7..a11e25936b451bfbed1b0daf8d9020a95e7f4fcd 100644 (file)
@@ -107,6 +107,7 @@ static void __net_exit ip6table_mangle_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_mangle_net_ops = {
        .exit = ip6table_mangle_net_exit,
+       .async = true,
 };
 
 static int __init ip6table_mangle_init(void)
index 47306e45a80abf5225aab9c1209258302c63c161..4475fd300bb60f24608beda688c53fcc5e010c0f 100644 (file)
@@ -131,6 +131,7 @@ static void __net_exit ip6table_nat_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_nat_net_ops = {
        .exit   = ip6table_nat_net_exit,
+       .async  = true,
 };
 
 static int __init ip6table_nat_init(void)
index 710fa0806c37cddffae7cc56692a24ad9aa9d504..a88f3b1995b186ad90687d793c7be7c91dabd2dc 100644 (file)
@@ -75,6 +75,7 @@ static void __net_exit ip6table_raw_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_raw_net_ops = {
        .exit = ip6table_raw_net_exit,
+       .async = true,
 };
 
 static int __init ip6table_raw_init(void)
index cf26ccb04056e1346f40a1d34ff44e2b8eb9f518..320048c008dc40d9aa0791f042e8335ec550d8ed 100644 (file)
@@ -74,6 +74,7 @@ static void __net_exit ip6table_security_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_security_net_ops = {
        .exit = ip6table_security_net_exit,
+       .async = true,
 };
 
 static int __init ip6table_security_init(void)
index 663827ee3cf8e004e9acc70a2c3d5ccbb8f643cb..ba54bb3bd1e4462632f7542df707477e7c40c9b7 100644 (file)
@@ -401,6 +401,7 @@ static struct pernet_operations ipv6_net_ops = {
        .exit = ipv6_net_exit,
        .id = &conntrack6_net_id,
        .size = sizeof(struct conntrack6_net),
+       .async = true,
 };
 
 static int __init nf_conntrack_l3proto_ipv6_init(void)
index b84ce3e6d728a5b8af65b91faf42ec640ff03910..34136fe80ed5fdc1f1eb825d790449fcb588fb31 100644 (file)
@@ -646,6 +646,7 @@ static void nf_ct_net_exit(struct net *net)
 static struct pernet_operations nf_ct_net_ops = {
        .init = nf_ct_net_init,
        .exit = nf_ct_net_exit,
+       .async = true,
 };
 
 int nf_ct_frag6_init(void)
index c87b48359e8f482d6e8deb91a19502b484f718ed..32f98bc06900de067837c2d982feac318cd9f443 100644 (file)
@@ -103,6 +103,7 @@ static void __net_exit defrag6_net_exit(struct net *net)
 
 static struct pernet_operations defrag6_net_ops = {
        .exit = defrag6_net_exit,
+       .async = true,
 };
 
 static int __init nf_defrag_init(void)
index d346705d6ee6bfe87292bc77d7eb1614d214d2c9..207cb35569b1c3c382560088aeb6ecdd75a236b2 100644 (file)
@@ -178,7 +178,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
        if (skb->len <= mtu)
                return false;
 
-       if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+       if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
                return false;
 
        return true;
index b397a8fe88b9391e462146391901a360969547c0..0220e584589c2ac1cae67868dfdccc1ebbed014f 100644 (file)
@@ -390,6 +390,7 @@ static void __net_exit nf_log_ipv6_net_exit(struct net *net)
 static struct pernet_operations nf_log_ipv6_net_ops = {
        .init = nf_log_ipv6_net_init,
        .exit = nf_log_ipv6_net_exit,
+       .async = true,
 };
 
 static int __init nf_log_ipv6_init(void)
index bed57ee65f7b10c5fb4ae9fc6f086d7bbfffa7af..6b7f075f811f2b317f09f11998a80d87d0616e15 100644 (file)
@@ -99,6 +99,10 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
            !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
                                target, maniptype))
                return false;
+
+       /* must reload, offset might have changed */
+       ipv6h = (void *)skb->data + iphdroff;
+
 manip_addr:
        if (maniptype == NF_NAT_MANIP_SRC)
                ipv6h->saddr = target->src.u3.in6;
index cc5174c7254c5697fc5bcd66ffeecb00e408e28e..36be3cf0adefb3c516b3a616b9425ab9fc61b2e7 100644 (file)
@@ -180,8 +180,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
        }
 
        *dest = 0;
- again:
-       rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+       rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+                                     lookup_flags);
        if (rt->dst.error)
                goto put_rt_err;
 
@@ -189,15 +189,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
        if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
                goto put_rt_err;
 
-       if (oif && oif != rt->rt6i_idev->dev) {
-               /* multipath route? Try again with F_IFACE */
-               if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
-                       lookup_flags |= RT6_LOOKUP_F_IFACE;
-                       fl6.flowi6_oif = oif->ifindex;
-                       ip6_rt_put(rt);
-                       goto again;
-               }
-       }
+       if (oif && oif != rt->rt6i_idev->dev)
+               goto put_rt_err;
 
        switch (priv->result) {
        case NFT_FIB_RESULT_OIF:
index b8858c546f41b40598e73e04b5eda7e7e79e5670..1678cf03768859c358ce78036d5cde3a44ed5828 100644 (file)
@@ -355,4 +355,3 @@ void ipv6_misc_proc_exit(void)
 {
        unregister_pernet_subsys(&ipv6_proc_ops);
 }
-
index aa709b644945bc0a27f3994efeca77c3d06897ee..a2ed9fdd58d479b71d0fb1a1516ecc0048f8cb3a 100644 (file)
@@ -128,7 +128,7 @@ struct uncached_list {
 
 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 
-static void rt6_uncached_list_add(struct rt6_info *rt)
+void rt6_uncached_list_add(struct rt6_info *rt)
 {
        struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 
@@ -139,7 +139,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt)
        spin_unlock_bh(&ul->lock);
 }
 
-static void rt6_uncached_list_del(struct rt6_info *rt)
+void rt6_uncached_list_del(struct rt6_info *rt)
 {
        if (!list_empty(&rt->rt6i_uncached)) {
                struct uncached_list *ul = rt->rt6i_uncached_list;
@@ -450,8 +450,10 @@ static bool rt6_check_expired(const struct rt6_info *rt)
        return false;
 }
 
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+                                            struct rt6_info *match,
                                             struct flowi6 *fl6, int oif,
+                                            const struct sk_buff *skb,
                                             int strict)
 {
        struct rt6_info *sibling, *next_sibling;
@@ -460,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
         * case it will always be non-zero. Otherwise now is the time to do it.
         */
        if (!fl6->mp_hash)
-               fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+               fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
        if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
                return match;
@@ -914,7 +916,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
                                             struct fib6_table *table,
-                                            struct flowi6 *fl6, int flags)
+                                            struct flowi6 *fl6,
+                                            const struct sk_buff *skb,
+                                            int flags)
 {
        struct rt6_info *rt, *rt_cache;
        struct fib6_node *fn;
@@ -929,8 +933,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
                rt = rt6_device_match(net, rt, &fl6->saddr,
                                      fl6->flowi6_oif, flags);
                if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-                       rt = rt6_multipath_select(rt, fl6,
-                                                 fl6->flowi6_oif, flags);
+                       rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
+                                                 skb, flags);
        }
        if (rt == net->ipv6.ip6_null_entry) {
                fn = fib6_backtrack(fn, &fl6->saddr);
@@ -954,14 +958,15 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-                                   int flags)
+                                  const struct sk_buff *skb, int flags)
 {
-       return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+       return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
 }
 EXPORT_SYMBOL_GPL(ip6_route_lookup);
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-                           const struct in6_addr *saddr, int oif, int strict)
+                           const struct in6_addr *saddr, int oif,
+                           const struct sk_buff *skb, int strict)
 {
        struct flowi6 fl6 = {
                .flowi6_oif = oif,
@@ -975,7 +980,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
                flags |= RT6_LOOKUP_F_HAS_SADDR;
        }
 
-       dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+       dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
        if (dst->error == 0)
                return (struct rt6_info *) dst;
 
@@ -1509,7 +1514,30 @@ static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
        }
 }
 
-static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
+                                        struct rt6_info *rt, int mtu)
+{
+       /* If the new MTU is lower than the route PMTU, this new MTU will be the
+        * lowest MTU in the path: always allow updating the route PMTU to
+        * reflect PMTU decreases.
+        *
+        * If the new MTU is higher, and the route PMTU is equal to the local
+        * MTU, this means the old MTU is the lowest in the path, so allow
+        * updating it: if other nodes now have lower MTUs, PMTU discovery will
+        * handle this.
+        */
+
+       if (dst_mtu(&rt->dst) >= mtu)
+               return true;
+
+       if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
+               return true;
+
+       return false;
+}
+
+static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
+                                      struct rt6_info *rt, int mtu)
 {
        struct rt6_exception_bucket *bucket;
        struct rt6_exception *rt6_ex;
@@ -1518,20 +1546,22 @@ static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
        bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
                                        lockdep_is_held(&rt6_exception_lock));
 
-       if (bucket) {
-               for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
-                       hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
-                               struct rt6_info *entry = rt6_ex->rt6i;
-                               /* For RTF_CACHE with rt6i_pmtu == 0
-                                * (i.e. a redirected route),
-                                * the metrics of its rt->dst.from has already
-                                * been updated.
-                                */
-                               if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
-                                       entry->rt6i_pmtu = mtu;
-                       }
-                       bucket++;
+       if (!bucket)
+               return;
+
+       for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+               hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+                       struct rt6_info *entry = rt6_ex->rt6i;
+
+                       /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
+                        * route), the metrics of its rt->dst.from have already
+                        * been updated.
+                        */
+                       if (entry->rt6i_pmtu &&
+                           rt6_mtu_change_route_allowed(idev, entry, mtu))
+                               entry->rt6i_pmtu = mtu;
                }
+               bucket++;
        }
 }
 
@@ -1647,7 +1677,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
 }
 
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-                              int oif, struct flowi6 *fl6, int flags)
+                              int oif, struct flowi6 *fl6,
+                              const struct sk_buff *skb, int flags)
 {
        struct fib6_node *fn, *saved_fn;
        struct rt6_info *rt, *rt_cache;
@@ -1669,7 +1700,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 redo_rt6_select:
        rt = rt6_select(net, fn, oif, strict);
        if (rt->rt6i_nsiblings)
-               rt = rt6_multipath_select(rt, fl6, oif, strict);
+               rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
        if (rt == net->ipv6.ip6_null_entry) {
                fn = fib6_backtrack(fn, &fl6->saddr);
                if (fn)
@@ -1768,28 +1799,35 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 }
 EXPORT_SYMBOL_GPL(ip6_pol_route);
 
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
-                                           struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+                                           struct fib6_table *table,
+                                           struct flowi6 *fl6,
+                                           const struct sk_buff *skb,
+                                           int flags)
 {
-       return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+       return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_input_lookup(struct net *net,
                                         struct net_device *dev,
-                                        struct flowi6 *fl6, int flags)
+                                        struct flowi6 *fl6,
+                                        const struct sk_buff *skb,
+                                        int flags)
 {
        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
                flags |= RT6_LOOKUP_F_IFACE;
 
-       return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+       return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
 }
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
-                                 struct flow_keys *keys)
+                                 struct flow_keys *keys,
+                                 struct flow_keys *flkeys)
 {
        const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
        const struct ipv6hdr *key_iph = outer_iph;
+       struct flow_keys *_flkeys = flkeys;
        const struct ipv6hdr *inner_iph;
        const struct icmp6hdr *icmph;
        struct ipv6hdr _inner_iph;
@@ -1811,26 +1849,76 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
                goto out;
 
        key_iph = inner_iph;
+       _flkeys = NULL;
 out:
-       memset(keys, 0, sizeof(*keys));
-       keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-       keys->addrs.v6addrs.src = key_iph->saddr;
-       keys->addrs.v6addrs.dst = key_iph->daddr;
-       keys->tags.flow_label = ip6_flowinfo(key_iph);
-       keys->basic.ip_proto = key_iph->nexthdr;
+       if (_flkeys) {
+               keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+               keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+               keys->tags.flow_label = _flkeys->tags.flow_label;
+               keys->basic.ip_proto = _flkeys->basic.ip_proto;
+       } else {
+               keys->addrs.v6addrs.src = key_iph->saddr;
+               keys->addrs.v6addrs.dst = key_iph->daddr;
+               keys->tags.flow_label = ip6_flowinfo(key_iph);
+               keys->basic.ip_proto = key_iph->nexthdr;
+       }
 }
 
 /* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+                      const struct sk_buff *skb, struct flow_keys *flkeys)
 {
        struct flow_keys hash_keys;
+       u32 mhash;
 
-       if (skb) {
-               ip6_multipath_l3_keys(skb, &hash_keys);
-               return flow_hash_from_keys(&hash_keys) >> 1;
+       switch (ip6_multipath_hash_policy(net)) {
+       case 0:
+               memset(&hash_keys, 0, sizeof(hash_keys));
+               hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+               if (skb) {
+                       ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+               } else {
+                       hash_keys.addrs.v6addrs.src = fl6->saddr;
+                       hash_keys.addrs.v6addrs.dst = fl6->daddr;
+                       hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+                       hash_keys.basic.ip_proto = fl6->flowi6_proto;
+               }
+               break;
+       case 1:
+               if (skb) {
+                       unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+                       struct flow_keys keys;
+
+                       /* short-circuit if we already have L4 hash present */
+                       if (skb->l4_hash)
+                               return skb_get_hash_raw(skb) >> 1;
+
+                       memset(&hash_keys, 0, sizeof(hash_keys));
+
+                        if (!flkeys) {
+                               skb_flow_dissect_flow_keys(skb, &keys, flag);
+                               flkeys = &keys;
+                       }
+                       hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+                       hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+                       hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+                       hash_keys.ports.src = flkeys->ports.src;
+                       hash_keys.ports.dst = flkeys->ports.dst;
+                       hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+               } else {
+                       memset(&hash_keys, 0, sizeof(hash_keys));
+                       hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+                       hash_keys.addrs.v6addrs.src = fl6->saddr;
+                       hash_keys.addrs.v6addrs.dst = fl6->daddr;
+                       hash_keys.ports.src = fl6->fl6_sport;
+                       hash_keys.ports.dst = fl6->fl6_dport;
+                       hash_keys.basic.ip_proto = fl6->flowi6_proto;
+               }
+               break;
        }
+       mhash = flow_hash_from_keys(&hash_keys);
 
-       return get_hash_from_flowi6(fl6) >> 1;
+       return mhash >> 1;
 }
 
 void ip6_route_input(struct sk_buff *skb)
@@ -1847,20 +1935,29 @@ void ip6_route_input(struct sk_buff *skb)
                .flowi6_mark = skb->mark,
                .flowi6_proto = iph->nexthdr,
        };
+       struct flow_keys *flkeys = NULL, _flkeys;
 
        tun_info = skb_tunnel_info(skb);
        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
                fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+       if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+               flkeys = &_flkeys;
+
        if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
-               fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+               fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
        skb_dst_drop(skb);
-       skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+       skb_dst_set(skb,
+                   ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
 }
 
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
-                                            struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+                                            struct fib6_table *table,
+                                            struct flowi6 *fl6,
+                                            const struct sk_buff *skb,
+                                            int flags)
 {
-       return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+       return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
@@ -1888,7 +1985,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
        else if (sk)
                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
-       return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+       return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
 }
 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
@@ -2137,6 +2234,7 @@ struct ip6rd_flowi {
 static struct rt6_info *__ip6_route_redirect(struct net *net,
                                             struct fib6_table *table,
                                             struct flowi6 *fl6,
+                                            const struct sk_buff *skb,
                                             int flags)
 {
        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
@@ -2210,8 +2308,9 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 };
 
 static struct dst_entry *ip6_route_redirect(struct net *net,
-                                       const struct flowi6 *fl6,
-                                       const struct in6_addr *gateway)
+                                           const struct flowi6 *fl6,
+                                           const struct sk_buff *skb,
+                                           const struct in6_addr *gateway)
 {
        int flags = RT6_LOOKUP_F_HAS_SADDR;
        struct ip6rd_flowi rdfl;
@@ -2219,7 +2318,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
        rdfl.fl6 = *fl6;
        rdfl.gateway = *gateway;
 
-       return fib6_rule_lookup(net, &rdfl.fl6,
+       return fib6_rule_lookup(net, &rdfl.fl6, skb,
                                flags, __ip6_route_redirect);
 }
 
@@ -2239,7 +2338,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
        fl6.flowlabel = ip6_flowinfo(iph);
        fl6.flowi6_uid = uid;
 
-       dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+       dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
        rt6_do_redirect(dst, NULL, skb);
        dst_release(dst);
 }
@@ -2261,7 +2360,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
        fl6.saddr = iph->daddr;
        fl6.flowi6_uid = sock_net_uid(net, NULL);
 
-       dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+       dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
        rt6_do_redirect(dst, NULL, skb);
        dst_release(dst);
 }
@@ -2463,7 +2562,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
                flags |= RT6_LOOKUP_F_HAS_SADDR;
 
        flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
-       rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+       rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
 
        /* if table lookup failed, fall back to full lookup */
        if (rt == net->ipv6.ip6_null_entry) {
@@ -2476,7 +2575,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 
 static int ip6_route_check_nh_onlink(struct net *net,
                                     struct fib6_config *cfg,
-                                    struct net_device *dev,
+                                    const struct net_device *dev,
                                     struct netlink_ext_ack *extack)
 {
        u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
@@ -2526,7 +2625,7 @@ static int ip6_route_check_nh(struct net *net,
        }
 
        if (!grt)
-               grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+               grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
 
        if (!grt)
                goto out;
@@ -2552,6 +2651,79 @@ static int ip6_route_check_nh(struct net *net,
        return err;
 }
 
+static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
+                          struct net_device **_dev, struct inet6_dev **idev,
+                          struct netlink_ext_ack *extack)
+{
+       const struct in6_addr *gw_addr = &cfg->fc_gateway;
+       int gwa_type = ipv6_addr_type(gw_addr);
+       bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
+       const struct net_device *dev = *_dev;
+       bool need_addr_check = !dev;
+       int err = -EINVAL;
+
+       /* if gw_addr is local we will fail to detect this in case
+        * address is still TENTATIVE (DAD in progress). rt6_lookup()
+        * will return already-added prefix route via interface that
+        * prefix route was assigned to, which might be non-loopback.
+        */
+       if (dev &&
+           ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+               NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+               goto out;
+       }
+
+       if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
+               /* IPv6 strictly inhibits using not link-local
+                * addresses as nexthop address.
+                * Otherwise, router will not able to send redirects.
+                * It is very good, but in some (rare!) circumstances
+                * (SIT, PtP, NBMA NOARP links) it is handy to allow
+                * some exceptions. --ANK
+                * We allow IPv4-mapped nexthops to support RFC4798-type
+                * addressing
+                */
+               if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
+                       NL_SET_ERR_MSG(extack, "Invalid gateway address");
+                       goto out;
+               }
+
+               if (cfg->fc_flags & RTNH_F_ONLINK)
+                       err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
+               else
+                       err = ip6_route_check_nh(net, cfg, _dev, idev);
+
+               if (err)
+                       goto out;
+       }
+
+       /* reload in case device was changed */
+       dev = *_dev;
+
+       err = -EINVAL;
+       if (!dev) {
+               NL_SET_ERR_MSG(extack, "Egress device not specified");
+               goto out;
+       } else if (dev->flags & IFF_LOOPBACK) {
+               NL_SET_ERR_MSG(extack,
+                              "Egress device can not be loopback device for this route");
+               goto out;
+       }
+
+       /* if we did not check gw_addr above, do so now that the
+        * egress device has been resolved.
+        */
+       if (need_addr_check &&
+           ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+               NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+               goto out;
+       }
+
+       err = 0;
+out:
+       return err;
+}
+
 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
                                              struct netlink_ext_ack *extack)
 {
@@ -2734,61 +2906,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
        }
 
        if (cfg->fc_flags & RTF_GATEWAY) {
-               const struct in6_addr *gw_addr;
-               int gwa_type;
-
-               gw_addr = &cfg->fc_gateway;
-               gwa_type = ipv6_addr_type(gw_addr);
-
-               /* if gw_addr is local we will fail to detect this in case
-                * address is still TENTATIVE (DAD in progress). rt6_lookup()
-                * will return already-added prefix route via interface that
-                * prefix route was assigned to, which might be non-loopback.
-                */
-               err = -EINVAL;
-               if (ipv6_chk_addr_and_flags(net, gw_addr,
-                                           gwa_type & IPV6_ADDR_LINKLOCAL ?
-                                           dev : NULL, 0, 0)) {
-                       NL_SET_ERR_MSG(extack, "Invalid gateway address");
+               err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+               if (err)
                        goto out;
-               }
-               rt->rt6i_gateway = *gw_addr;
-
-               if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
-                       /* IPv6 strictly inhibits using not link-local
-                          addresses as nexthop address.
-                          Otherwise, router will not able to send redirects.
-                          It is very good, but in some (rare!) circumstances
-                          (SIT, PtP, NBMA NOARP links) it is handy to allow
-                          some exceptions. --ANK
-                          We allow IPv4-mapped nexthops to support RFC4798-type
-                          addressing
-                        */
-                       if (!(gwa_type & (IPV6_ADDR_UNICAST |
-                                         IPV6_ADDR_MAPPED))) {
-                               NL_SET_ERR_MSG(extack,
-                                              "Invalid gateway address");
-                               goto out;
-                       }
 
-                       if (cfg->fc_flags & RTNH_F_ONLINK) {
-                               err = ip6_route_check_nh_onlink(net, cfg, dev,
-                                                               extack);
-                       } else {
-                               err = ip6_route_check_nh(net, cfg, &dev, &idev);
-                       }
-                       if (err)
-                               goto out;
-               }
-               err = -EINVAL;
-               if (!dev) {
-                       NL_SET_ERR_MSG(extack, "Egress device not specified");
-                       goto out;
-               } else if (dev->flags & IFF_LOOPBACK) {
-                       NL_SET_ERR_MSG(extack,
-                                      "Egress device can not be loopback device for this route");
-                       goto out;
-               }
+               rt->rt6i_gateway = cfg->fc_gateway;
        }
 
        err = -ENODEV;
@@ -3802,25 +3924,13 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
           Since RFC 1981 doesn't include administrative MTU increase
           update PMTU increase is a MUST. (i.e. jumbo frame)
         */
-       /*
-          If new MTU is less than route PMTU, this new MTU will be the
-          lowest MTU in the path, update the route PMTU to reflect PMTU
-          decreases; if new MTU is greater than route PMTU, and the
-          old MTU is the lowest MTU in the path, update the route PMTU
-          to reflect the increase. In this case if the other nodes' MTU
-          also have the lowest MTU, TOO BIG MESSAGE will be lead to
-          PMTU discovery.
-        */
        if (rt->dst.dev == arg->dev &&
-           dst_metric_raw(&rt->dst, RTAX_MTU) &&
            !dst_metric_locked(&rt->dst, RTAX_MTU)) {
                spin_lock_bh(&rt6_exception_lock);
-               if (dst_mtu(&rt->dst) >= arg->mtu ||
-                   (dst_mtu(&rt->dst) < arg->mtu &&
-                    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+               if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
+                   rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
                        dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
-               }
-               rt6_exceptions_update_pmtu(rt, arg->mtu);
+               rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
                spin_unlock_bh(&rt6_exception_lock);
        }
        return 0;
@@ -4092,6 +4202,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
                                r_cfg.fc_encap_type = nla_get_u16(nla);
                }
 
+               r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
                rt = ip6_route_info_create(&r_cfg, extack);
                if (IS_ERR(rt)) {
                        err = PTR_ERR(rt);
@@ -4591,7 +4702,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                if (!ipv6_addr_any(&fl6.saddr))
                        flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-               dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+               dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
 
                rcu_read_unlock();
        } else {
index bd6cc688bd199ae98cc1be8d0851b08cb6709486..7a78dcfda68a17e10e5e951db21d8113c7f65301 100644 (file)
@@ -93,7 +93,8 @@ static void set_tun_src(struct net *net, struct net_device *dev,
 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 {
-       struct net *net = dev_net(skb_dst(skb)->dev);
+       struct dst_entry *dst = skb_dst(skb);
+       struct net *net = dev_net(dst->dev);
        struct ipv6hdr *hdr, *inner_hdr;
        struct ipv6_sr_hdr *isrh;
        int hdrlen, tot_len, err;
@@ -134,7 +135,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
        isrh->nexthdr = proto;
 
        hdr->daddr = isrh->segments[isrh->first_segment];
-       set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
+       set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr);
 
 #ifdef CONFIG_IPV6_SEG6_HMAC
        if (sr_has_hmac(isrh)) {
@@ -418,7 +419,7 @@ static int seg6_build_state(struct nlattr *nla,
 
        slwt = seg6_lwt_lwtunnel(newts);
 
-       err = dst_cache_init(&slwt->cache, GFP_KERNEL);
+       err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
        if (err) {
                kfree(newts);
                return err;
index ba3767ef5e93a7c3eed2263734ef7ef58c20d8cb..45722327375afb441dc8f7a806b42fec808aa3aa 100644 (file)
@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
                fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
 
        if (!tbl_id) {
-               dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+               dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
        } else {
                struct fib6_table *table;
 
@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
                if (!table)
                        goto out;
 
-               rt = ip6_pol_route(net, table, 0, &fl6, flags);
+               rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
                dst = &rt->dst;
        }
 
index 3a1775a62973b7e0b1eeb70ba364eb1b068fdde5..8a4f8fddd8121a31416103551f915ad610ac6f94 100644 (file)
@@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
 #ifdef CONFIG_IPV6_SIT_6RD
        struct ip_tunnel *t = netdev_priv(dev);
 
-       if (dev == sitn->fb_tunnel_dev) {
+       if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
                ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
                t->ip6rd.relay_prefix = 0;
                t->ip6rd.prefixlen = 16;
@@ -1578,6 +1578,13 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
        if (err < 0)
                return err;
 
+       if (tb[IFLA_MTU]) {
+               u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+
+               if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len)
+                       dev->mtu = mtu;
+       }
+
 #ifdef CONFIG_IPV6_SIT_6RD
        if (ipip6_netlink_6rd_parms(data, &ip6rd))
                err = ipip6_tunnel_update_6rd(nt, &ip6rd);
@@ -1828,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net)
        sitn->tunnels[2] = sitn->tunnels_r;
        sitn->tunnels[3] = sitn->tunnels_r_l;
 
+       if (!net_has_fallback_tunnels(net))
+               return 0;
+
        sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
                                           NET_NAME_UNKNOWN,
                                           ipip6_tunnel_setup);
@@ -1878,6 +1888,7 @@ static struct pernet_operations sit_net_ops = {
        .exit_batch = sit_exit_batch_net,
        .id   = &sit_net_id,
        .size = sizeof(struct sit_net),
+       .async = true,
 };
 
 static void __exit sit_cleanup(void)
index 262f791f1b9b9151abe1eced80ea0467aa5a9dc9..966c42af92f471a17f901a8529def43524a28965 100644 (file)
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
+#include <net/netevent.h>
 #ifdef CONFIG_NETLABEL
 #include <net/calipso.h>
 #endif
 
+static int zero;
 static int one = 1;
 static int auto_flowlabels_min;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+                                         void __user *buffer, size_t *lenp,
+                                         loff_t *ppos)
+{
+       struct net *net;
+       int ret;
+
+       net = container_of(table->data, struct net,
+                          ipv6.sysctl.multipath_hash_policy);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (write && ret == 0)
+               call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+       return ret;
+}
 
 static struct ctl_table ipv6_table_template[] = {
        {
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
+       {
+               .procname       = "fib_multipath_hash_policy",
+               .data           = &init_net.ipv6.sysctl.multipath_hash_policy,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_rt6_multipath_hash_policy,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
        { }
 };
 
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
        ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
        ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
        ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+       ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
 
        ipv6_route_table = ipv6_route_sysctl_init(net);
        if (!ipv6_route_table)
index 52e3ea0e6f507702251b3dade8ae43181331d975..ad30f5e319699d0d79c976d8a7544d5d4a85e2cd 100644 (file)
@@ -1509,34 +1509,34 @@ void udp6_proc_exit(struct net *net)
 /* ------------------------------------------------------------------------ */
 
 struct proto udpv6_prot = {
-       .name              = "UDPv6",
-       .owner             = THIS_MODULE,
-       .close             = udp_lib_close,
-       .connect           = ip6_datagram_connect,
-       .disconnect        = udp_disconnect,
-       .ioctl             = udp_ioctl,
-       .init              = udp_init_sock,
-       .destroy           = udpv6_destroy_sock,
-       .setsockopt        = udpv6_setsockopt,
-       .getsockopt        = udpv6_getsockopt,
-       .sendmsg           = udpv6_sendmsg,
-       .recvmsg           = udpv6_recvmsg,
-       .release_cb        = ip6_datagram_release_cb,
-       .hash              = udp_lib_hash,
-       .unhash            = udp_lib_unhash,
-       .rehash            = udp_v6_rehash,
-       .get_port          = udp_v6_get_port,
-       .memory_allocated  = &udp_memory_allocated,
-       .sysctl_mem        = sysctl_udp_mem,
-       .sysctl_wmem       = &sysctl_udp_wmem_min,
-       .sysctl_rmem       = &sysctl_udp_rmem_min,
-       .obj_size          = sizeof(struct udp6_sock),
-       .h.udp_table       = &udp_table,
+       .name                   = "UDPv6",
+       .owner                  = THIS_MODULE,
+       .close                  = udp_lib_close,
+       .connect                = ip6_datagram_connect,
+       .disconnect             = udp_disconnect,
+       .ioctl                  = udp_ioctl,
+       .init                   = udp_init_sock,
+       .destroy                = udpv6_destroy_sock,
+       .setsockopt             = udpv6_setsockopt,
+       .getsockopt             = udpv6_getsockopt,
+       .sendmsg                = udpv6_sendmsg,
+       .recvmsg                = udpv6_recvmsg,
+       .release_cb             = ip6_datagram_release_cb,
+       .hash                   = udp_lib_hash,
+       .unhash                 = udp_lib_unhash,
+       .rehash                 = udp_v6_rehash,
+       .get_port               = udp_v6_get_port,
+       .memory_allocated       = &udp_memory_allocated,
+       .sysctl_mem             = sysctl_udp_mem,
+       .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+       .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+       .obj_size               = sizeof(struct udp6_sock),
+       .h.udp_table            = &udp_table,
 #ifdef CONFIG_COMPAT
-       .compat_setsockopt = compat_udpv6_setsockopt,
-       .compat_getsockopt = compat_udpv6_getsockopt,
+       .compat_setsockopt      = compat_udpv6_setsockopt,
+       .compat_getsockopt      = compat_udpv6_getsockopt,
 #endif
-       .diag_destroy      = udp_abort,
+       .diag_destroy           = udp_abort,
 };
 
 static struct inet_protosw udpv6_protosw = {
index bb935a3b7feadafa883a329020d0f90b9c2ee615..de1b0b8c53b0ba26836d40a53dfa6fe077c7ebef 100644 (file)
@@ -92,7 +92,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
        skb_reset_network_header(skb);
        skb_mac_header_rebuild(skb);
-       eth_hdr(skb)->h_proto = skb->protocol;
+       if (skb->mac_len)
+               eth_hdr(skb)->h_proto = skb->protocol;
 
        err = 0;
 
index 8ae87d4ec5ff607d431513bb2ef42d5c2c93450a..5959ce9620eb92ece2830d6a59ed21d562a3a1cf 100644 (file)
@@ -82,7 +82,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 
        if ((!skb_is_gso(skb) && skb->len > mtu) ||
            (skb_is_gso(skb) &&
-            skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) {
+            !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) {
                skb->dev = dst->dev;
                skb->protocol = htons(ETH_P_IPV6);
 
index 88cd0c90fa81acc626fb6a95a020d06d55bf2656..cbb270bd81b078b5cca3010458ff82778258275f 100644 (file)
@@ -113,6 +113,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
        xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
        xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
        xdst->u.rt6.rt6i_src = rt->rt6i_src;
+       INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
+       rt6_uncached_list_add(&xdst->u.rt6);
+       atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache);
 
        return 0;
 }
@@ -244,6 +247,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
        if (likely(xdst->u.rt6.rt6i_idev))
                in6_dev_put(xdst->u.rt6.rt6i_idev);
        dst_destroy_metrics_generic(dst);
+       if (xdst->u.rt6.rt6i_uncached_list)
+               rt6_uncached_list_del(&xdst->u.rt6);
        xfrm_dst_destroy(xdst);
 }
 
index b15075a5c227d29db0171a194f7cd83732e78069..16f434791763f01de79cc0bf1d74715384034392 100644 (file)
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
 {
        xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
 }
-
index f85f0d7480acf48074a7d53557c3c50ca59973cf..a9673619e0e9944073afeb2dd29fdf9a215bf508 100644 (file)
@@ -353,6 +353,7 @@ static struct pernet_operations xfrm6_tunnel_net_ops = {
        .exit   = xfrm6_tunnel_net_exit,
        .id     = &xfrm6_tunnel_net_id,
        .size   = sizeof(struct xfrm6_tunnel_net),
+       .async  = true,
 };
 
 static int __init xfrm6_tunnel_init(void)
index 81ce15ffb8783d787fbf3db60bf676136669e5ae..893a022f962081416fa1b9e5f96416a8c2e92e5c 100644 (file)
@@ -2432,9 +2432,11 @@ static int afiucv_iucv_init(void)
        af_iucv_dev->driver = &af_iucv_driver;
        err = device_register(af_iucv_dev);
        if (err)
-               goto out_driver;
+               goto out_iucv_dev;
        return 0;
 
+out_iucv_dev:
+       put_device(af_iucv_dev);
 out_driver:
        driver_unregister(&af_iucv_driver);
 out_iucv:
index 9d5649e4e8b7cc3b6e54e745f0cf9d98bf097929..2c1c8b3e44522741d53c1dea9b4459945787f1b4 100644 (file)
@@ -433,6 +433,7 @@ static void kcm_proc_exit_net(struct net *net)
 static struct pernet_operations kcm_net_ops = {
        .init = kcm_proc_init_net,
        .exit = kcm_proc_exit_net,
+       .async = true,
 };
 
 int __init kcm_proc_init(void)
index 435594648dac03dcc29ba47ca1da29e4a5684142..516cfad71b85858aeaa395a5e04c579d0bd4f856 100644 (file)
@@ -1381,24 +1381,32 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
                .parse_msg = kcm_parse_func_strparser,
                .read_sock_done = kcm_read_sock_done,
        };
-       int err;
+       int err = 0;
 
        csk = csock->sk;
        if (!csk)
                return -EINVAL;
 
+       lock_sock(csk);
+
        /* Only allow TCP sockets to be attached for now */
        if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
-           csk->sk_protocol != IPPROTO_TCP)
-               return -EOPNOTSUPP;
+           csk->sk_protocol != IPPROTO_TCP) {
+               err = -EOPNOTSUPP;
+               goto out;
+       }
 
        /* Don't allow listeners or closed sockets */
-       if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
-               return -EOPNOTSUPP;
+       if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) {
+               err = -EOPNOTSUPP;
+               goto out;
+       }
 
        psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
-       if (!psock)
-               return -ENOMEM;
+       if (!psock) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        psock->mux = mux;
        psock->sk = csk;
@@ -1407,7 +1415,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
        err = strp_init(&psock->strp, csk, &cb);
        if (err) {
                kmem_cache_free(kcm_psockp, psock);
-               return err;
+               goto out;
        }
 
        write_lock_bh(&csk->sk_callback_lock);
@@ -1420,7 +1428,8 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
                strp_stop(&psock->strp);
                strp_done(&psock->strp);
                kmem_cache_free(kcm_psockp, psock);
-               return -EALREADY;
+               err = -EALREADY;
+               goto out;
        }
 
        psock->save_data_ready = csk->sk_data_ready;
@@ -1456,7 +1465,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
        /* Schedule RX work in case there are already bytes queued */
        strp_check_rcv(&psock->strp);
 
-       return 0;
+out:
+       release_sock(csk);
+
+       return err;
 }
 
 static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
@@ -1508,6 +1520,7 @@ static void kcm_unattach(struct kcm_psock *psock)
 
        if (WARN_ON(psock->rx_kcm)) {
                write_unlock_bh(&csk->sk_callback_lock);
+               release_sock(csk);
                return;
        }
 
@@ -2015,6 +2028,7 @@ static struct pernet_operations kcm_net_ops = {
        .exit = kcm_exit_net,
        .id   = &kcm_net_id,
        .size = sizeof(struct kcm_net),
+       .async = true,
 };
 
 static int __init kcm_init(void)
index 7e2e7188e7f4a28aa45c26848364ab0c297161a2..3ac08ab26207d09a41a17da44de8a8299572be45 100644 (file)
@@ -3863,6 +3863,7 @@ static struct pernet_operations pfkey_net_ops = {
        .exit = pfkey_net_exit,
        .id   = &pfkey_net_id,
        .size = sizeof(struct netns_pfkey),
+       .async = true,
 };
 
 static void __exit ipsec_pfkey_exit(void)
index 194a7483bb930edbeb4a8071d6a7dcc95cd0a0c8..b86868da50d414489fe3efb9d64d842f57aa8869 100644 (file)
@@ -111,6 +111,13 @@ struct l2tp_net {
        spinlock_t l2tp_session_hlist_lock;
 };
 
+#if IS_ENABLED(CONFIG_IPV6)
+static bool l2tp_sk_is_v6(struct sock *sk)
+{
+       return sk->sk_family == PF_INET6 &&
+              !ipv6_addr_v4mapped(&sk->sk_v6_daddr);
+}
+#endif
 
 static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
 {
@@ -136,51 +143,6 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
 
 }
 
-/* Lookup the tunnel socket, possibly involving the fs code if the socket is
- * owned by userspace.  A struct sock returned from this function must be
- * released using l2tp_tunnel_sock_put once you're done with it.
- */
-static struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel)
-{
-       int err = 0;
-       struct socket *sock = NULL;
-       struct sock *sk = NULL;
-
-       if (!tunnel)
-               goto out;
-
-       if (tunnel->fd >= 0) {
-               /* Socket is owned by userspace, who might be in the process
-                * of closing it.  Look the socket up using the fd to ensure
-                * consistency.
-                */
-               sock = sockfd_lookup(tunnel->fd, &err);
-               if (sock)
-                       sk = sock->sk;
-       } else {
-               /* Socket is owned by kernelspace */
-               sk = tunnel->sock;
-               sock_hold(sk);
-       }
-
-out:
-       return sk;
-}
-
-/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */
-static void l2tp_tunnel_sock_put(struct sock *sk)
-{
-       struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
-       if (tunnel) {
-               if (tunnel->fd >= 0) {
-                       /* Socket is owned by userspace */
-                       sockfd_put(sk->sk_socket);
-               }
-               sock_put(sk);
-       }
-       sock_put(sk);
-}
-
 /* Session hash list.
  * The session_id SHOULD be random according to RFC2661, but several
  * L2TP implementations (Cisco and Microsoft) use incrementing
@@ -193,6 +155,13 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
        return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
 }
 
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+{
+       sock_put(tunnel->sock);
+       /* the tunnel is freed in the socket destructor */
+}
+EXPORT_SYMBOL(l2tp_tunnel_free);
+
 /* Lookup a tunnel. A new reference is held on the returned tunnel. */
 struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
 {
@@ -345,13 +314,11 @@ int l2tp_session_register(struct l2tp_session *session,
                        }
 
                l2tp_tunnel_inc_refcount(tunnel);
-               sock_hold(tunnel->sock);
                hlist_add_head_rcu(&session->global_hlist, g_head);
 
                spin_unlock_bh(&pn->l2tp_session_hlist_lock);
        } else {
                l2tp_tunnel_inc_refcount(tunnel);
-               sock_hold(tunnel->sock);
        }
 
        hlist_add_head(&session->hlist, head);
@@ -969,7 +936,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
        struct l2tp_tunnel *tunnel;
 
-       tunnel = l2tp_sock_to_tunnel(sk);
+       tunnel = l2tp_tunnel(sk);
        if (tunnel == NULL)
                goto pass_up;
 
@@ -977,13 +944,10 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
                 tunnel->name, skb->len);
 
        if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook))
-               goto pass_up_put;
+               goto pass_up;
 
-       sock_put(sk);
        return 0;
 
-pass_up_put:
-       sock_put(sk);
 pass_up:
        return 1;
 }
@@ -1092,7 +1056,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
        /* Queue the packet to IP for output */
        skb->ignore_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
-       if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
+       if (l2tp_sk_is_v6(tunnel->sock))
                error = inet6_csk_xmit(tunnel->sock, skb, NULL);
        else
 #endif
@@ -1155,6 +1119,15 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
                goto out_unlock;
        }
 
+       /* The user-space may change the connection status for the user-space
+        * provided socket at run time: we must check it under the socket lock
+        */
+       if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) {
+               kfree_skb(skb);
+               ret = NET_XMIT_DROP;
+               goto out_unlock;
+       }
+
        /* Get routing info from the tunnel socket */
        skb_dst_drop(skb);
        skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
@@ -1174,7 +1147,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
                /* Calculate UDP checksum if configured to do so */
 #if IS_ENABLED(CONFIG_IPV6)
-               if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
+               if (l2tp_sk_is_v6(sk))
                        udp6_set_csum(udp_get_no_check6_tx(sk),
                                      skb, &inet6_sk(sk)->saddr,
                                      &sk->sk_v6_daddr, udp_len);
@@ -1207,14 +1180,12 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
 static void l2tp_tunnel_destruct(struct sock *sk)
 {
        struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
-       struct l2tp_net *pn;
 
        if (tunnel == NULL)
                goto end;
 
        l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name);
 
-
        /* Disable udp encapsulation */
        switch (tunnel->encap) {
        case L2TP_ENCAPTYPE_UDP:
@@ -1231,18 +1202,11 @@ static void l2tp_tunnel_destruct(struct sock *sk)
        sk->sk_destruct = tunnel->old_sk_destruct;
        sk->sk_user_data = NULL;
 
-       /* Remove the tunnel struct from the tunnel list */
-       pn = l2tp_pernet(tunnel->l2tp_net);
-       spin_lock_bh(&pn->l2tp_tunnel_list_lock);
-       list_del_rcu(&tunnel->list);
-       spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
-       tunnel->sock = NULL;
-       l2tp_tunnel_dec_refcount(tunnel);
-
        /* Call the original destructor */
        if (sk->sk_destruct)
                (*sk->sk_destruct)(sk);
+
+       kfree_rcu(tunnel, rcu);
 end:
        return;
 }
@@ -1303,49 +1267,43 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
 /* Tunnel socket destroy hook for UDP encapsulation */
 static void l2tp_udp_encap_destroy(struct sock *sk)
 {
-       struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
-       if (tunnel) {
-               l2tp_tunnel_closeall(tunnel);
-               sock_put(sk);
-       }
+       struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
+
+       if (tunnel)
+               l2tp_tunnel_delete(tunnel);
 }
 
 /* Workqueue tunnel deletion function */
 static void l2tp_tunnel_del_work(struct work_struct *work)
 {
-       struct l2tp_tunnel *tunnel = NULL;
-       struct socket *sock = NULL;
-       struct sock *sk = NULL;
-
-       tunnel = container_of(work, struct l2tp_tunnel, del_work);
+       struct l2tp_tunnel *tunnel = container_of(work, struct l2tp_tunnel,
+                                                 del_work);
+       struct sock *sk = tunnel->sock;
+       struct socket *sock = sk->sk_socket;
+       struct l2tp_net *pn;
 
        l2tp_tunnel_closeall(tunnel);
 
-       sk = l2tp_tunnel_sock_lookup(tunnel);
-       if (!sk)
-               goto out;
-
-       sock = sk->sk_socket;
-
-       /* If the tunnel socket was created by userspace, then go through the
-        * inet layer to shut the socket down, and let userspace close it.
-        * Otherwise, if we created the socket directly within the kernel, use
+       /* If the tunnel socket was created within the kernel, use
         * the sk API to release it here.
-        * In either case the tunnel resources are freed in the socket
-        * destructor when the tunnel socket goes away.
         */
-       if (tunnel->fd >= 0) {
-               if (sock)
-                       inet_shutdown(sock, 2);
-       } else {
+       if (tunnel->fd < 0) {
                if (sock) {
                        kernel_sock_shutdown(sock, SHUT_RDWR);
                        sock_release(sock);
                }
        }
 
-       l2tp_tunnel_sock_put(sk);
-out:
+       /* Remove the tunnel struct from the tunnel list */
+       pn = l2tp_pernet(tunnel->l2tp_net);
+       spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+       list_del_rcu(&tunnel->list);
+       spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+       /* drop initial ref */
+       l2tp_tunnel_dec_refcount(tunnel);
+
+       /* drop workqueue ref */
        l2tp_tunnel_dec_refcount(tunnel);
 }
 
@@ -1515,9 +1473,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
                encap = cfg->encap;
 
        /* Quick sanity checks */
+       err = -EPROTONOSUPPORT;
+       if (sk->sk_type != SOCK_DGRAM) {
+               pr_debug("tunl %hu: fd %d wrong socket type\n",
+                        tunnel_id, fd);
+               goto err;
+       }
        switch (encap) {
        case L2TP_ENCAPTYPE_UDP:
-               err = -EPROTONOSUPPORT;
                if (sk->sk_protocol != IPPROTO_UDP) {
                        pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
                               tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
@@ -1525,7 +1488,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
                }
                break;
        case L2TP_ENCAPTYPE_IP:
-               err = -EPROTONOSUPPORT;
                if (sk->sk_protocol != IPPROTO_L2TP) {
                        pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
                               tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
@@ -1565,24 +1527,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
        if (cfg != NULL)
                tunnel->debug = cfg->debug;
 
-#if IS_ENABLED(CONFIG_IPV6)
-       if (sk->sk_family == PF_INET6) {
-               struct ipv6_pinfo *np = inet6_sk(sk);
-
-               if (ipv6_addr_v4mapped(&np->saddr) &&
-                   ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
-                       struct inet_sock *inet = inet_sk(sk);
-
-                       tunnel->v4mapped = true;
-                       inet->inet_saddr = np->saddr.s6_addr32[3];
-                       inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3];
-                       inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3];
-               } else {
-                       tunnel->v4mapped = false;
-               }
-       }
-#endif
-
        /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
        tunnel->encap = encap;
        if (encap == L2TP_ENCAPTYPE_UDP) {
@@ -1598,13 +1542,22 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
                sk->sk_user_data = tunnel;
        }
 
+       /* Bump the reference count. The tunnel context is deleted
+        * only when this drops to zero. A reference is also held on
+        * the tunnel socket to ensure that it is not released while
+        * the tunnel is extant. Must be done before sk_destruct is
+        * set.
+        */
+       refcount_set(&tunnel->ref_count, 1);
+       sock_hold(sk);
+       tunnel->sock = sk;
+       tunnel->fd = fd;
+
        /* Hook on the tunnel socket destructor so that we can cleanup
         * if the tunnel socket goes away.
         */
        tunnel->old_sk_destruct = sk->sk_destruct;
        sk->sk_destruct = &l2tp_tunnel_destruct;
-       tunnel->sock = sk;
-       tunnel->fd = fd;
        lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock");
 
        sk->sk_allocation = GFP_ATOMIC;
@@ -1614,11 +1567,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 
        /* Add tunnel to our list */
        INIT_LIST_HEAD(&tunnel->list);
-
-       /* Bump the reference count. The tunnel context is deleted
-        * only when this drops to zero. Must be done before list insertion
-        */
-       refcount_set(&tunnel->ref_count, 1);
        spin_lock_bh(&pn->l2tp_tunnel_list_lock);
        list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
        spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
@@ -1659,8 +1607,6 @@ void l2tp_session_free(struct l2tp_session *session)
 
        if (tunnel) {
                BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-               sock_put(tunnel->sock);
-               session->tunnel = NULL;
                l2tp_tunnel_dec_refcount(tunnel);
        }
 
@@ -1843,6 +1789,7 @@ static struct pernet_operations l2tp_net_ops = {
        .exit = l2tp_exit_net,
        .id   = &l2tp_net_id,
        .size = sizeof(struct l2tp_net),
+       .async = true,
 };
 
 static int __init l2tp_init(void)
index 9bbee90e99637fed1bc40b8eba5ea44d4e3b74ef..2718d0b284d040810b3027ba62b911f77fc6f932 100644 (file)
@@ -188,9 +188,6 @@ struct l2tp_tunnel {
        struct sock             *sock;          /* Parent socket */
        int                     fd;             /* Parent fd, if tunnel socket
                                                 * was created by userspace */
-#if IS_ENABLED(CONFIG_IPV6)
-       bool                    v4mapped;
-#endif
 
        struct work_struct      del_work;
 
@@ -214,27 +211,8 @@ static inline void *l2tp_session_priv(struct l2tp_session *session)
        return &session->priv[0];
 }
 
-static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk)
-{
-       struct l2tp_tunnel *tunnel;
-
-       if (sk == NULL)
-               return NULL;
-
-       sock_hold(sk);
-       tunnel = (struct l2tp_tunnel *)(sk->sk_user_data);
-       if (tunnel == NULL) {
-               sock_put(sk);
-               goto out;
-       }
-
-       BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-out:
-       return tunnel;
-}
-
 struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 
 struct l2tp_session *l2tp_session_get(const struct net *net,
                                      struct l2tp_tunnel *tunnel,
@@ -283,7 +261,7 @@ static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
 static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
 {
        if (refcount_dec_and_test(&tunnel->ref_count))
-               kfree_rcu(tunnel, rcu);
+               l2tp_tunnel_free(tunnel);
 }
 
 /* Session reference counts. Incremented when code obtains a reference
index 4614585e172071fb1eea1d7f08115fa8049f86b3..a9c05b2bc1b0bc3471bbf62dc3b7c11e971a7f08 100644 (file)
@@ -234,17 +234,13 @@ static void l2tp_ip_close(struct sock *sk, long timeout)
 static void l2tp_ip_destroy_sock(struct sock *sk)
 {
        struct sk_buff *skb;
-       struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+       struct l2tp_tunnel *tunnel = sk->sk_user_data;
 
        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
                kfree_skb(skb);
 
-       if (tunnel) {
-               l2tp_tunnel_closeall(tunnel);
-               sock_put(sk);
-       }
-
-       sk_refcnt_debug_dec(sk);
+       if (tunnel)
+               l2tp_tunnel_delete(tunnel);
 }
 
 static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
index efea58b662953eb7e81ae18523a0b2e35bd15da8..957369192ca181d6da21c9dda03d0e8a9726643e 100644 (file)
@@ -248,16 +248,14 @@ static void l2tp_ip6_close(struct sock *sk, long timeout)
 
 static void l2tp_ip6_destroy_sock(struct sock *sk)
 {
-       struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+       struct l2tp_tunnel *tunnel = sk->sk_user_data;
 
        lock_sock(sk);
        ip6_flush_pending_frames(sk);
        release_sock(sk);
 
-       if (tunnel) {
-               l2tp_tunnel_closeall(tunnel);
-               sock_put(sk);
-       }
+       if (tunnel)
+               l2tp_tunnel_delete(tunnel);
 
        inet6_destroy_sock(sk);
 }
index 99a03c72db4f8d9af4043945d2a618da3b198726..977bca659787d603713642fa1d7ea79ccb0ca55d 100644 (file)
@@ -416,20 +416,28 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
  * Session (and tunnel control) socket create/destroy.
  *****************************************************************************/
 
+static void pppol2tp_put_sk(struct rcu_head *head)
+{
+       struct pppol2tp_session *ps;
+
+       ps = container_of(head, typeof(*ps), rcu);
+       sock_put(ps->__sk);
+}
+
 /* Called by l2tp_core when a session socket is being closed.
  */
 static void pppol2tp_session_close(struct l2tp_session *session)
 {
-       struct sock *sk;
-
-       BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+       struct pppol2tp_session *ps;
 
-       sk = pppol2tp_session_get_sock(session);
-       if (sk) {
-               if (sk->sk_socket)
-                       inet_shutdown(sk->sk_socket, SEND_SHUTDOWN);
-               sock_put(sk);
-       }
+       ps = l2tp_session_priv(session);
+       mutex_lock(&ps->sk_lock);
+       ps->__sk = rcu_dereference_protected(ps->sk,
+                                            lockdep_is_held(&ps->sk_lock));
+       RCU_INIT_POINTER(ps->sk, NULL);
+       if (ps->__sk)
+               call_rcu(&ps->rcu, pppol2tp_put_sk);
+       mutex_unlock(&ps->sk_lock);
 }
 
 /* Really kill the session socket. (Called from sock_put() if
@@ -449,14 +457,6 @@ static void pppol2tp_session_destruct(struct sock *sk)
        }
 }
 
-static void pppol2tp_put_sk(struct rcu_head *head)
-{
-       struct pppol2tp_session *ps;
-
-       ps = container_of(head, typeof(*ps), rcu);
-       sock_put(ps->__sk);
-}
-
 /* Called when the PPPoX socket (session) is closed.
  */
 static int pppol2tp_release(struct socket *sock)
@@ -480,26 +480,17 @@ static int pppol2tp_release(struct socket *sock)
        sock_orphan(sk);
        sock->sk = NULL;
 
+       /* If the socket is associated with a session,
+        * l2tp_session_delete will call pppol2tp_session_close which
+        * will drop the session's ref on the socket.
+        */
        session = pppol2tp_sock_to_session(sk);
-
-       if (session != NULL) {
-               struct pppol2tp_session *ps;
-
+       if (session) {
                l2tp_session_delete(session);
-
-               ps = l2tp_session_priv(session);
-               mutex_lock(&ps->sk_lock);
-               ps->__sk = rcu_dereference_protected(ps->sk,
-                                                    lockdep_is_held(&ps->sk_lock));
-               RCU_INIT_POINTER(ps->sk, NULL);
-               mutex_unlock(&ps->sk_lock);
-               call_rcu(&ps->rcu, pppol2tp_put_sk);
-
-               /* Rely on the sock_put() call at the end of the function for
-                * dropping the reference held by pppol2tp_sock_to_session().
-                * The last reference will be dropped by pppol2tp_put_sk().
-                */
+               /* drop the ref obtained by pppol2tp_sock_to_session */
+               sock_put(sk);
        }
+
        release_sock(sk);
 
        /* This will delete the session context via
@@ -796,6 +787,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 out_no_ppp:
        /* This is how we get the session context from the socket. */
+       sock_hold(sk);
        sk->sk_user_data = session;
        rcu_assign_pointer(ps->sk, sk);
        mutex_unlock(&ps->sk_lock);
@@ -1770,6 +1762,7 @@ static struct pernet_operations pppol2tp_net_ops = {
        .init = pppol2tp_init_net,
        .exit = pppol2tp_exit_net,
        .id   = &pppol2tp_net_id,
+       .async = true,
 };
 
 /*****************************************************************************
index d90928f50226051a9af674f7a1ed95babbdd69fb..a7f7b8ff47292b476b4e3ec2e17e05a6fa0ee3ca 100644 (file)
@@ -394,8 +394,9 @@ static void llc_sap_mcast(struct llc_sap *sap,
                          const struct llc_addr *laddr,
                          struct sk_buff *skb)
 {
-       int i = 0, count = 256 / sizeof(struct sock *);
-       struct sock *sk, *stack[count];
+       int i = 0;
+       struct sock *sk;
+       struct sock *stack[256 / sizeof(struct sock *)];
        struct llc_sock *llc;
        struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex);
 
@@ -408,7 +409,7 @@ static void llc_sap_mcast(struct llc_sap *sap,
                        continue;
 
                sock_hold(sk);
-               if (i < count)
+               if (i < ARRAY_SIZE(stack))
                        stack[i++] = sk;
                else {
                        llc_do_mcast(sap, skb, stack, i);
index 1f3188d0384028ef338e86c291441c20493cbcb5..e83c19d4c292e46fce243f60fdf77557096c2af6 100644 (file)
@@ -298,13 +298,23 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 
        if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
                if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
+                       struct tid_ampdu_rx *tid_rx;
+
                        ht_dbg_ratelimited(sta->sdata,
                                           "updated AddBA Req from %pM on tid %u\n",
                                           sta->sta.addr, tid);
                        /* We have no API to update the timeout value in the
-                        * driver so reject the timeout update.
+                        * driver so reject the timeout update if the timeout
+                        * changed. If if did not change, i.e., no real update,
+                        * just reply with success.
                         */
-                       status = WLAN_STATUS_REQUEST_DECLINED;
+                       rcu_read_lock();
+                       tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
+                       if (tid_rx && tid_rx->timeout == timeout)
+                               status = WLAN_STATUS_SUCCESS;
+                       else
+                               status = WLAN_STATUS_REQUEST_DECLINED;
+                       rcu_read_unlock();
                        goto end;
                }
 
index f4195a0f027989c4829b2f84d678373894d2fa1c..fd68f6fb02d787b1a8b62d13d1b829c355f8f996 100644 (file)
@@ -2685,6 +2685,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 
        ieee80211_recalc_ps(local);
        ieee80211_recalc_ps_vif(sdata);
+       ieee80211_check_fast_rx_iface(sdata);
 
        return 0;
 }
index a75653affbf71894e956b0211b312e61e1182ed2..b5adf3625d161bb7537ec5d5cad320ef4335f4e8 100644 (file)
@@ -213,6 +213,7 @@ static const char *hw_flag_names[] = {
        FLAG(SUPPORTS_TX_FRAG),
        FLAG(SUPPORTS_TDLS_BUFFER_STA),
        FLAG(DEAUTH_NEED_MGD_TX_PREP),
+       FLAG(DOESNT_SUPPORT_QOS_NDP),
 #undef FLAG
 };
 
index 0024eff9bb84fbd3a55e1aecf615d9dcedcbaf93..fe4aefb06d9f2295a5a2e9863fc2bb4770deec02 100644 (file)
@@ -897,7 +897,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
        struct ieee80211_hdr_3addr *nullfunc;
        struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
-       skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true);
+       skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
+               !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
        if (!skb)
                return;
 
index 3dc162ddc3a6539f743215f0c0fb18a44b376c34..9c898a3688c6cac59ec65c79053e67c492fe2646 100644 (file)
@@ -2353,39 +2353,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 }
 
 static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
 {
        struct net_device *dev = rx->sdata->dev;
        struct sk_buff *skb = rx->skb;
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
        __le16 fc = hdr->frame_control;
        struct sk_buff_head frame_list;
-       struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
        struct ethhdr ethhdr;
        const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
 
-       if (unlikely(!ieee80211_is_data(fc)))
-               return RX_CONTINUE;
-
-       if (unlikely(!ieee80211_is_data_present(fc)))
-               return RX_DROP_MONITOR;
-
-       if (!(status->rx_flags & IEEE80211_RX_AMSDU))
-               return RX_CONTINUE;
-
        if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
-               switch (rx->sdata->vif.type) {
-               case NL80211_IFTYPE_AP_VLAN:
-                       if (!rx->sdata->u.vlan.sta)
-                               return RX_DROP_UNUSABLE;
-                       break;
-               case NL80211_IFTYPE_STATION:
-                       if (!rx->sdata->u.mgd.use_4addr)
-                               return RX_DROP_UNUSABLE;
-                       break;
-               default:
-                       return RX_DROP_UNUSABLE;
-               }
                check_da = NULL;
                check_sa = NULL;
        } else switch (rx->sdata->vif.type) {
@@ -2405,15 +2383,13 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
                        break;
        }
 
-       if (is_multicast_ether_addr(hdr->addr1))
-               return RX_DROP_UNUSABLE;
-
        skb->dev = dev;
        __skb_queue_head_init(&frame_list);
 
        if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
                                          rx->sdata->vif.addr,
-                                         rx->sdata->vif.type))
+                                         rx->sdata->vif.type,
+                                         data_offset))
                return RX_DROP_UNUSABLE;
 
        ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
@@ -2435,6 +2411,44 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
        return RX_QUEUED;
 }
 
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+{
+       struct sk_buff *skb = rx->skb;
+       struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+       __le16 fc = hdr->frame_control;
+
+       if (!(status->rx_flags & IEEE80211_RX_AMSDU))
+               return RX_CONTINUE;
+
+       if (unlikely(!ieee80211_is_data(fc)))
+               return RX_CONTINUE;
+
+       if (unlikely(!ieee80211_is_data_present(fc)))
+               return RX_DROP_MONITOR;
+
+       if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+               switch (rx->sdata->vif.type) {
+               case NL80211_IFTYPE_AP_VLAN:
+                       if (!rx->sdata->u.vlan.sta)
+                               return RX_DROP_UNUSABLE;
+                       break;
+               case NL80211_IFTYPE_STATION:
+                       if (!rx->sdata->u.mgd.use_4addr)
+                               return RX_DROP_UNUSABLE;
+                       break;
+               default:
+                       return RX_DROP_UNUSABLE;
+               }
+       }
+
+       if (is_multicast_ether_addr(hdr->addr1))
+               return RX_DROP_UNUSABLE;
+
+       return __ieee80211_rx_h_amsdu(rx, 0);
+}
+
 #ifdef CONFIG_MAC80211_MESH
 static ieee80211_rx_result
 ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
@@ -2535,11 +2549,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
        fwd_skb = skb_copy_expand(skb, local->tx_headroom +
                                       sdata->encrypt_headroom, 0, GFP_ATOMIC);
-       if (!fwd_skb) {
-               net_info_ratelimited("%s: failed to clone mesh frame\n",
-                                   sdata->name);
+       if (!fwd_skb)
                goto out;
-       }
 
        fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
        fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
@@ -3747,15 +3758,6 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_STATION:
-               /* 4-addr is harder to deal with, later maybe */
-               if (sdata->u.mgd.use_4addr)
-                       goto clear;
-               /* software powersave is a huge mess, avoid all of it */
-               if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
-                       goto clear;
-               if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
-                   !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
-                       goto clear;
                if (sta->sta.tdls) {
                        fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1);
                        fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2);
@@ -3767,6 +3769,23 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
                        fastrx.expected_ds_bits =
                                cpu_to_le16(IEEE80211_FCTL_FROMDS);
                }
+
+               if (sdata->u.mgd.use_4addr && !sta->sta.tdls) {
+                       fastrx.expected_ds_bits |=
+                               cpu_to_le16(IEEE80211_FCTL_TODS);
+                       fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3);
+                       fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+               }
+
+               if (!sdata->u.mgd.powersave)
+                       break;
+
+               /* software powersave is a huge mess, avoid all of it */
+               if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
+                       goto clear;
+               if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
+                   !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
+                       goto clear;
                break;
        case NL80211_IFTYPE_AP_VLAN:
        case NL80211_IFTYPE_AP:
@@ -3783,6 +3802,15 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
                        !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
                        (sdata->vif.type != NL80211_IFTYPE_AP_VLAN ||
                         !sdata->u.vlan.sta);
+
+               if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+                   sdata->u.vlan.sta) {
+                       fastrx.expected_ds_bits |=
+                               cpu_to_le16(IEEE80211_FCTL_FROMDS);
+                       fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+                       fastrx.internal_forward = 0;
+               }
+
                break;
        default:
                goto clear;
@@ -3881,7 +3909,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
        struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
        struct sta_info *sta = rx->sta;
        int orig_len = skb->len;
-       int snap_offs = ieee80211_hdrlen(hdr->frame_control);
+       int hdrlen = ieee80211_hdrlen(hdr->frame_control);
+       int snap_offs = hdrlen;
        struct {
                u8 snap[sizeof(rfc1042_header)];
                __be16 proto;
@@ -3912,10 +3941,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
            (status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS)
                return false;
 
-       /* we don't deal with A-MSDU deaggregation here */
-       if (status->rx_flags & IEEE80211_RX_AMSDU)
-               return false;
-
        if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
                return false;
 
@@ -3937,7 +3962,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
        if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FROMDS |
                                              IEEE80211_FCTL_TODS)) !=
            fast_rx->expected_ds_bits)
-               goto drop;
+               return false;
 
        /* assign the key to drop unencrypted frames (later)
         * and strip the IV/MIC if necessary
@@ -3947,21 +3972,24 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
                snap_offs += IEEE80211_CCMP_HDR_LEN;
        }
 
-       if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
-               goto drop;
-       payload = (void *)(skb->data + snap_offs);
+       if (!(status->rx_flags & IEEE80211_RX_AMSDU)) {
+               if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
+                       goto drop;
 
-       if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
-               return false;
+               payload = (void *)(skb->data + snap_offs);
 
-       /* Don't handle these here since they require special code.
-        * Accept AARP and IPX even though they should come with a
-        * bridge-tunnel header - but if we get them this way then
-        * there's little point in discarding them.
-        */
-       if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
-                    payload->proto == fast_rx->control_port_protocol))
-               return false;
+               if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
+                       return false;
+
+               /* Don't handle these here since they require special code.
+                * Accept AARP and IPX even though they should come with a
+                * bridge-tunnel header - but if we get them this way then
+                * there's little point in discarding them.
+                */
+               if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
+                            payload->proto == fast_rx->control_port_protocol))
+                       return false;
+       }
 
        /* after this point, don't punt to the slowpath! */
 
@@ -3975,12 +4003,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
        }
 
        /* statistics part of ieee80211_rx_h_sta_process() */
-       stats->last_rx = jiffies;
-       stats->last_rate = sta_stats_encode_rate(status);
-
-       stats->fragments++;
-       stats->packets++;
-
        if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
                stats->last_signal = status->signal;
                if (!fast_rx->uses_rss)
@@ -4009,6 +4031,20 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
        if (rx->key && !ieee80211_has_protected(hdr->frame_control))
                goto drop;
 
+       if (status->rx_flags & IEEE80211_RX_AMSDU) {
+               if (__ieee80211_rx_h_amsdu(rx, snap_offs - hdrlen) !=
+                   RX_QUEUED)
+                       goto drop;
+
+               return true;
+       }
+
+       stats->last_rx = jiffies;
+       stats->last_rate = sta_stats_encode_rate(status);
+
+       stats->fragments++;
+       stats->packets++;
+
        /* do the header conversion - first grab the addresses */
        ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
        ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
index 7643178ef13246840a5c2b9b5aacc370c06b5a94..933c67b5f84572e599e758b8d7e9b3706470f507 100644 (file)
@@ -3569,6 +3569,14 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
        if (!IS_ERR_OR_NULL(sta)) {
                struct ieee80211_fast_tx *fast_tx;
 
+               /* We need a bit of data queued to build aggregates properly, so
+                * instruct the TCP stack to allow more than a single ms of data
+                * to be queued in the stack. The value is a bit-shift of 1
+                * second, so 8 is ~4ms of queued data. Only affects local TCP
+                * sockets.
+                */
+               sk_pacing_shift_update(skb->sk, 8);
+
                fast_tx = rcu_dereference(sta->fast_tx);
 
                if (fast_tx &&
index e545a3c9365f8a8fe172a2a2d4ad0a14af49f9a8..d4a89a8be013cebdfc5ab7f86f6527a799bb645d 100644 (file)
@@ -122,7 +122,7 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
        if (skb->len <= mtu)
                return false;
 
-       if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+       if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
                return false;
 
        return true;
@@ -2488,6 +2488,7 @@ static void mpls_net_exit(struct net *net)
 static struct pernet_operations mpls_net_ops = {
        .init = mpls_net_init,
        .exit = mpls_net_exit,
+       .async = true,
 };
 
 static struct rtnl_af_ops mpls_af_ops __read_mostly = {
index dd12b564f2e7e4594d643e7dfaedb8814550ecdd..436ef68331f2b3ec3409d2ce611d805ee4cfdeaa 100644 (file)
@@ -1,4 +1,4 @@
 #
 # Makefile for NCSI API
 #
-obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o
+obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o ncsi-netlink.o
index d30f7bd741d0610cd261b4514baf0b1cbf20c607..8da84312cd3b5e862d13cb41c2cd381d4a8378a5 100644 (file)
@@ -276,6 +276,8 @@ struct ncsi_dev_priv {
        unsigned int        package_num;     /* Number of packages         */
        struct list_head    packages;        /* List of packages           */
        struct ncsi_channel *hot_channel;    /* Channel was ever active    */
+       struct ncsi_package *force_package;  /* Force a specific package   */
+       struct ncsi_channel *force_channel;  /* Force a specific channel   */
        struct ncsi_request requests[256];   /* Request table              */
        unsigned int        request_id;      /* Last used request ID       */
 #define NCSI_REQ_START_IDX     1
@@ -318,6 +320,7 @@ extern spinlock_t ncsi_dev_lock;
        list_for_each_entry_rcu(nc, &np->channels, node)
 
 /* Resources */
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index);
 int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data);
 int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data);
 int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index);
index c989211bbabc6475d805f6adf1957157b2e6727c..c3695ba0cf94fbc2a3d63b7254dc1cc400e6cf0a 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 
 #include <net/ncsi.h>
 #include <net/net_namespace.h>
@@ -23,6 +22,7 @@
 
 #include "internal.h"
 #include "ncsi-pkt.h"
+#include "ncsi-netlink.h"
 
 LIST_HEAD(ncsi_dev_list);
 DEFINE_SPINLOCK(ncsi_dev_lock);
@@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table)
        return sizes[table];
 }
 
-static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
 {
        struct ncsi_channel_filter *ncf;
        int size;
@@ -965,20 +965,37 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 
 static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 {
-       struct ncsi_package *np;
-       struct ncsi_channel *nc, *found, *hot_nc;
+       struct ncsi_package *np, *force_package;
+       struct ncsi_channel *nc, *found, *hot_nc, *force_channel;
        struct ncsi_channel_mode *ncm;
        unsigned long flags;
 
        spin_lock_irqsave(&ndp->lock, flags);
        hot_nc = ndp->hot_channel;
+       force_channel = ndp->force_channel;
+       force_package = ndp->force_package;
        spin_unlock_irqrestore(&ndp->lock, flags);
 
+       /* Force a specific channel whether or not it has link if we have been
+        * configured to do so
+        */
+       if (force_package && force_channel) {
+               found = force_channel;
+               ncm = &found->modes[NCSI_MODE_LINK];
+               if (!(ncm->data[2] & 0x1))
+                       netdev_info(ndp->ndev.dev,
+                                   "NCSI: Channel %u forced, but it is link down\n",
+                                   found->id);
+               goto out;
+       }
+
        /* The search is done once an inactive channel with up
         * link is found.
         */
        found = NULL;
        NCSI_FOR_EACH_PACKAGE(ndp, np) {
+               if (ndp->force_package && np != ndp->force_package)
+                       continue;
                NCSI_FOR_EACH_CHANNEL(np, nc) {
                        spin_lock_irqsave(&nc->lock, flags);
 
@@ -1594,6 +1611,9 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
        ndp->ptype.dev = dev;
        dev_add_pack(&ndp->ptype);
 
+       /* Set up generic netlink interface */
+       ncsi_init_netlink(dev);
+
        return nd;
 }
 EXPORT_SYMBOL_GPL(ncsi_register_dev);
@@ -1673,6 +1693,8 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
 #endif
        spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
+       ncsi_unregister_netlink(nd->dev);
+
        kfree(ndp);
 }
 EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
new file mode 100644 (file)
index 0000000..05fcfb4
--- /dev/null
@@ -0,0 +1,423 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <net/genetlink.h>
+#include <net/ncsi.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <uapi/linux/ncsi.h>
+
+#include "internal.h"
+#include "ncsi-netlink.h"
+
+static struct genl_family ncsi_genl_family;
+
+static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = {
+       [NCSI_ATTR_IFINDEX] =           { .type = NLA_U32 },
+       [NCSI_ATTR_PACKAGE_LIST] =      { .type = NLA_NESTED },
+       [NCSI_ATTR_PACKAGE_ID] =        { .type = NLA_U32 },
+       [NCSI_ATTR_CHANNEL_ID] =        { .type = NLA_U32 },
+};
+
+static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex)
+{
+       struct ncsi_dev_priv *ndp;
+       struct net_device *dev;
+       struct ncsi_dev *nd;
+       struct ncsi_dev;
+
+       if (!net)
+               return NULL;
+
+       dev = dev_get_by_index(net, ifindex);
+       if (!dev) {
+               pr_err("NCSI netlink: No device for ifindex %u\n", ifindex);
+               return NULL;
+       }
+
+       nd = ncsi_find_dev(dev);
+       ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
+
+       dev_put(dev);
+       return ndp;
+}
+
+static int ncsi_write_channel_info(struct sk_buff *skb,
+                                  struct ncsi_dev_priv *ndp,
+                                  struct ncsi_channel *nc)
+{
+       struct nlattr *vid_nest;
+       struct ncsi_channel_filter *ncf;
+       struct ncsi_channel_mode *m;
+       u32 *data;
+       int i;
+
+       nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id);
+       m = &nc->modes[NCSI_MODE_LINK];
+       nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]);
+       if (nc->state == NCSI_CHANNEL_ACTIVE)
+               nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE);
+       if (ndp->force_channel == nc)
+               nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
+
+       nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
+       nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+       nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
+
+       vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
+       if (!vid_nest)
+               return -ENOMEM;
+       ncf = nc->filters[NCSI_FILTER_VLAN];
+       i = -1;
+       if (ncf) {
+               while ((i = find_next_bit((void *)&ncf->bitmap, ncf->total,
+                                         i + 1)) < ncf->total) {
+                       data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, i);
+                       /* Uninitialised channels will have 'zero' vlan ids */
+                       if (!data || !*data)
+                               continue;
+                       nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID,
+                                   *(u16 *)data);
+               }
+       }
+       nla_nest_end(skb, vid_nest);
+
+       return 0;
+}
+
+static int ncsi_write_package_info(struct sk_buff *skb,
+                                  struct ncsi_dev_priv *ndp, unsigned int id)
+{
+       struct nlattr *pnest, *cnest, *nest;
+       struct ncsi_package *np;
+       struct ncsi_channel *nc;
+       bool found;
+       int rc;
+
+       if (id > ndp->package_num) {
+               netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id);
+               return -ENODEV;
+       }
+
+       found = false;
+       NCSI_FOR_EACH_PACKAGE(ndp, np) {
+               if (np->id != id)
+                       continue;
+               pnest = nla_nest_start(skb, NCSI_PKG_ATTR);
+               if (!pnest)
+                       return -ENOMEM;
+               nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+               if (ndp->force_package == np)
+                       nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
+               cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
+               if (!cnest) {
+                       nla_nest_cancel(skb, pnest);
+                       return -ENOMEM;
+               }
+               NCSI_FOR_EACH_CHANNEL(np, nc) {
+                       nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR);
+                       if (!nest) {
+                               nla_nest_cancel(skb, cnest);
+                               nla_nest_cancel(skb, pnest);
+                               return -ENOMEM;
+                       }
+                       rc = ncsi_write_channel_info(skb, ndp, nc);
+                       if (rc) {
+                               nla_nest_cancel(skb, nest);
+                               nla_nest_cancel(skb, cnest);
+                               nla_nest_cancel(skb, pnest);
+                               return rc;
+                       }
+                       nla_nest_end(skb, nest);
+               }
+               nla_nest_end(skb, cnest);
+               nla_nest_end(skb, pnest);
+               found = true;
+       }
+
+       if (!found)
+               return -ENODEV;
+
+       return 0;
+}
+
+static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
+{
+       struct ncsi_dev_priv *ndp;
+       unsigned int package_id;
+       struct sk_buff *skb;
+       struct nlattr *attr;
+       void *hdr;
+       int rc;
+
+       if (!info || !info->attrs)
+               return -EINVAL;
+
+       if (!info->attrs[NCSI_ATTR_IFINDEX])
+               return -EINVAL;
+
+       if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+               return -EINVAL;
+
+       ndp = ndp_from_ifindex(genl_info_net(info),
+                              nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+       if (!ndp)
+               return -ENODEV;
+
+       skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!skb)
+               return -ENOMEM;
+
+       hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+                         &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO);
+       if (!hdr) {
+               kfree_skb(skb);
+               return -EMSGSIZE;
+       }
+
+       package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+
+       attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+       rc = ncsi_write_package_info(skb, ndp, package_id);
+
+       if (rc) {
+               nla_nest_cancel(skb, attr);
+               goto err;
+       }
+
+       nla_nest_end(skb, attr);
+
+       genlmsg_end(skb, hdr);
+       return genlmsg_reply(skb, info);
+
+err:
+       genlmsg_cancel(skb, hdr);
+       kfree_skb(skb);
+       return rc;
+}
+
+static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
+                               struct netlink_callback *cb)
+{
+       struct nlattr *attrs[NCSI_ATTR_MAX];
+       struct ncsi_package *np, *package;
+       struct ncsi_dev_priv *ndp;
+       unsigned int package_id;
+       struct nlattr *attr;
+       void *hdr;
+       int rc;
+
+       rc = genlmsg_parse(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX,
+                          ncsi_genl_policy, NULL);
+       if (rc)
+               return rc;
+
+       if (!attrs[NCSI_ATTR_IFINDEX])
+               return -EINVAL;
+
+       ndp = ndp_from_ifindex(get_net(sock_net(skb->sk)),
+                              nla_get_u32(attrs[NCSI_ATTR_IFINDEX]));
+
+       if (!ndp)
+               return -ENODEV;
+
+       package_id = cb->args[0];
+       package = NULL;
+       NCSI_FOR_EACH_PACKAGE(ndp, np)
+               if (np->id == package_id)
+                       package = np;
+
+       if (!package)
+               return 0; /* done */
+
+       hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+                         &ncsi_genl_family, 0,  NCSI_CMD_PKG_INFO);
+       if (!hdr) {
+               rc = -EMSGSIZE;
+               goto err;
+       }
+
+       attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+       rc = ncsi_write_package_info(skb, ndp, package->id);
+       if (rc) {
+               nla_nest_cancel(skb, attr);
+               goto err;
+       }
+
+       nla_nest_end(skb, attr);
+       genlmsg_end(skb, hdr);
+
+       cb->args[0] = package_id + 1;
+
+       return skb->len;
+err:
+       genlmsg_cancel(skb, hdr);
+       return rc;
+}
+
+static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+       struct ncsi_package *np, *package;
+       struct ncsi_channel *nc, *channel;
+       u32 package_id, channel_id;
+       struct ncsi_dev_priv *ndp;
+       unsigned long flags;
+
+       if (!info || !info->attrs)
+               return -EINVAL;
+
+       if (!info->attrs[NCSI_ATTR_IFINDEX])
+               return -EINVAL;
+
+       if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+               return -EINVAL;
+
+       ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+                              nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+       if (!ndp)
+               return -ENODEV;
+
+       package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+       package = NULL;
+
+       spin_lock_irqsave(&ndp->lock, flags);
+
+       NCSI_FOR_EACH_PACKAGE(ndp, np)
+               if (np->id == package_id)
+                       package = np;
+       if (!package) {
+               /* The user has set a package that does not exist */
+               spin_unlock_irqrestore(&ndp->lock, flags);
+               return -ERANGE;
+       }
+
+       channel = NULL;
+       if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+               /* Allow any channel */
+               channel_id = NCSI_RESERVED_CHANNEL;
+       } else {
+               channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+               NCSI_FOR_EACH_CHANNEL(package, nc)
+                       if (nc->id == channel_id)
+                               channel = nc;
+       }
+
+       if (channel_id != NCSI_RESERVED_CHANNEL && !channel) {
+               /* The user has set a channel that does not exist on this
+                * package
+                */
+               spin_unlock_irqrestore(&ndp->lock, flags);
+               netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
+                           channel_id);
+               return -ERANGE;
+       }
+
+       ndp->force_package = package;
+       ndp->force_channel = channel;
+       spin_unlock_irqrestore(&ndp->lock, flags);
+
+       netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n",
+                   package_id, channel_id,
+                   channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
+
+       /* Bounce the NCSI channel to set changes */
+       ncsi_stop_dev(&ndp->ndev);
+       ncsi_start_dev(&ndp->ndev);
+
+       return 0;
+}
+
+static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+       struct ncsi_dev_priv *ndp;
+       unsigned long flags;
+
+       if (!info || !info->attrs)
+               return -EINVAL;
+
+       if (!info->attrs[NCSI_ATTR_IFINDEX])
+               return -EINVAL;
+
+       ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+                              nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+       if (!ndp)
+               return -ENODEV;
+
+       /* Clear any override */
+       spin_lock_irqsave(&ndp->lock, flags);
+       ndp->force_package = NULL;
+       ndp->force_channel = NULL;
+       spin_unlock_irqrestore(&ndp->lock, flags);
+       netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
+
+       /* Bounce the NCSI channel to set changes */
+       ncsi_stop_dev(&ndp->ndev);
+       ncsi_start_dev(&ndp->ndev);
+
+       return 0;
+}
+
+static const struct genl_ops ncsi_ops[] = {
+       {
+               .cmd = NCSI_CMD_PKG_INFO,
+               .policy = ncsi_genl_policy,
+               .doit = ncsi_pkg_info_nl,
+               .dumpit = ncsi_pkg_info_all_nl,
+               .flags = 0,
+       },
+       {
+               .cmd = NCSI_CMD_SET_INTERFACE,
+               .policy = ncsi_genl_policy,
+               .doit = ncsi_set_interface_nl,
+               .flags = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd = NCSI_CMD_CLEAR_INTERFACE,
+               .policy = ncsi_genl_policy,
+               .doit = ncsi_clear_interface_nl,
+               .flags = GENL_ADMIN_PERM,
+       },
+};
+
+static struct genl_family ncsi_genl_family __ro_after_init = {
+       .name = "NCSI",
+       .version = 0,
+       .maxattr = NCSI_ATTR_MAX,
+       .module = THIS_MODULE,
+       .ops = ncsi_ops,
+       .n_ops = ARRAY_SIZE(ncsi_ops),
+};
+
+int ncsi_init_netlink(struct net_device *dev)
+{
+       int rc;
+
+       rc = genl_register_family(&ncsi_genl_family);
+       if (rc)
+               netdev_err(dev, "ncsi: failed to register netlink family\n");
+
+       return rc;
+}
+
+int ncsi_unregister_netlink(struct net_device *dev)
+{
+       int rc;
+
+       rc = genl_unregister_family(&ncsi_genl_family);
+       if (rc)
+               netdev_err(dev, "ncsi: failed to unregister netlink family\n");
+
+       return rc;
+}
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
new file mode 100644 (file)
index 0000000..91a5c25
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NCSI_NETLINK_H__
+#define __NCSI_NETLINK_H__
+
+#include <linux/netdevice.h>
+
+#include "internal.h"
+
+int ncsi_init_netlink(struct net_device *dev);
+int ncsi_unregister_netlink(struct net_device *dev);
+
+#endif /* __NCSI_NETLINK_H__ */
index 975a85a48d39fad1bf8fb88046a0558e6017af62..2523ebe2b3cc496abfe504454d79aa29c6a6e96d 100644 (file)
@@ -2094,7 +2094,8 @@ static struct pernet_operations ip_set_net_ops = {
        .init   = ip_set_net_init,
        .exit   = ip_set_net_exit,
        .id     = &ip_set_net_id,
-       .size   = sizeof(struct ip_set_net)
+       .size   = sizeof(struct ip_set_net),
+       .async  = true,
 };
 
 static int __init
index 5f6f73cf2174d1494a685d73ca94ea124da83de5..6a6cb9db030bc66489b4b940655405dd1941d226 100644 (file)
@@ -2289,10 +2289,12 @@ static struct pernet_operations ipvs_core_ops = {
        .exit = __ip_vs_cleanup,
        .id   = &ip_vs_net_id,
        .size = sizeof(struct netns_ipvs),
+       .async = true,
 };
 
 static struct pernet_operations ipvs_core_dev_ops = {
        .exit = __ip_vs_dev_cleanup,
+       .async = true,
 };
 
 /*
index 3e17d32b629d18e97f85fe8e543431562cdf3c6e..8b25aab419287dc307db0d7767a4869380cc5349 100644 (file)
@@ -260,7 +260,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
                buf_len = strlen(buf);
 
                ct = nf_ct_get(skb, &ctinfo);
-               if (ct && (ct->status & IPS_NAT_MASK)) {
+               if (ct) {
                        bool mangled;
 
                        /* If mangling fails this function will return 0
@@ -479,6 +479,7 @@ static void __ip_vs_ftp_exit(struct net *net)
 static struct pernet_operations ip_vs_ftp_ops = {
        .init = __ip_vs_ftp_init,
        .exit = __ip_vs_ftp_exit,
+       .async = true,
 };
 
 static int __init ip_vs_ftp_init(void)
index d625179de485b0324d862f78cf6db7764609404e..6a340c94c4b88e1e41e4f5cd30a1fb766d36e198 100644 (file)
@@ -604,6 +604,7 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblc_ops = {
        .init = __ip_vs_lblc_init,
        .exit = __ip_vs_lblc_exit,
+       .async = true,
 };
 
 static int __init ip_vs_lblc_init(void)
index 84c57b62a5887b433b672d0d71c681bbf9b11a59..0627881128da1359cd4eee4864e522f9d1e2bd55 100644 (file)
@@ -789,6 +789,7 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblcr_ops = {
        .init = __ip_vs_lblcr_init,
        .exit = __ip_vs_lblcr_exit,
+       .async = true,
 };
 
 static int __init ip_vs_lblcr_init(void)
index dd177ebee9aabcbe47e704e9d4e424f1b718b89f..8884d302d33a640380261fae28703581efb0369d 100644 (file)
@@ -3417,6 +3417,7 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
 static struct pernet_operations ctnetlink_net_ops = {
        .init           = ctnetlink_net_init,
        .exit_batch     = ctnetlink_net_exit_batch,
+       .async          = true,
 };
 
 static int __init ctnetlink_init(void)
index d049ea5a3770df595f49511cd4ad96eb1195ca00..9bcd72fe91f91a8dc140a695cdac35ed9d3a14f2 100644 (file)
@@ -406,6 +406,7 @@ static struct pernet_operations proto_gre_net_ops = {
        .exit = proto_gre_net_exit,
        .id   = &proto_gre_net_id,
        .size = sizeof(struct netns_proto_gre),
+       .async = true,
 };
 
 static int __init nf_ct_proto_gre_init(void)
index 9123fdec5e14a25dac693940bc20fce4c623fb86..3cdce391362e265efb27195b36c5cff323517c7c 100644 (file)
@@ -705,6 +705,7 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
 static struct pernet_operations nf_conntrack_net_ops = {
        .init           = nf_conntrack_pernet_init,
        .exit_batch     = nf_conntrack_pernet_exit,
+       .async          = true,
 };
 
 static int __init nf_conntrack_standalone_init(void)
index 350eb147754d886f25d2aad19dab7fde157e1ddb..254c2c6bde480b74075dcb409c643eb53e0335bc 100644 (file)
@@ -47,6 +47,7 @@ static void __net_exit nf_log_netdev_net_exit(struct net *net)
 static struct pernet_operations nf_log_netdev_net_ops = {
        .init = nf_log_netdev_net_init,
        .exit = nf_log_netdev_net_exit,
+       .async = true,
 };
 
 static int __init nf_log_netdev_init(void)
index 92139a087260ae3d9fb03a2da4de375fc2f3a20c..64b875e452ca93299b4887e185f7b64894174f70 100644 (file)
@@ -398,6 +398,7 @@ static struct pernet_operations synproxy_net_ops = {
        .exit           = synproxy_net_exit,
        .id             = &synproxy_net_id,
        .size           = sizeof(struct synproxy_net),
+       .async          = true,
 };
 
 static int __init synproxy_core_init(void)
index 8b9fe30de0cdda1df772f8b16b03850603008f06..fd13d28e4ca7c019a97138f236841f3ab2e598a3 100644 (file)
@@ -5037,9 +5037,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        const struct nf_flowtable_type *type;
+       struct nft_flowtable *flowtable, *ft;
        u8 genmask = nft_genmask_next(net);
        int family = nfmsg->nfgen_family;
-       struct nft_flowtable *flowtable;
        struct nft_table *table;
        struct nft_ctx ctx;
        int err, i, k;
@@ -5099,6 +5099,22 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
                goto err3;
 
        for (i = 0; i < flowtable->ops_len; i++) {
+               if (!flowtable->ops[i].dev)
+                       continue;
+
+               list_for_each_entry(ft, &table->flowtables, list) {
+                       for (k = 0; k < ft->ops_len; k++) {
+                               if (!ft->ops[k].dev)
+                                       continue;
+
+                               if (flowtable->ops[i].dev == ft->ops[k].dev &&
+                                   flowtable->ops[i].pf == ft->ops[k].pf) {
+                                       err = -EBUSY;
+                                       goto err4;
+                               }
+                       }
+               }
+
                err = nf_register_net_hook(net, &flowtable->ops[i]);
                if (err < 0)
                        goto err4;
@@ -5120,7 +5136,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
        i = flowtable->ops_len;
 err4:
        for (k = i - 1; k >= 0; k--)
-               nf_unregister_net_hook(net, &flowtable->ops[i]);
+               nf_unregister_net_hook(net, &flowtable->ops[k]);
 
        kfree(flowtable->ops);
 err3:
@@ -5145,6 +5161,11 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
        struct nft_table *table;
        struct nft_ctx ctx;
 
+       if (!nla[NFTA_FLOWTABLE_TABLE] ||
+           (!nla[NFTA_FLOWTABLE_NAME] &&
+            !nla[NFTA_FLOWTABLE_HANDLE]))
+               return -EINVAL;
+
        table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
                                       family, genmask);
        if (IS_ERR(table))
@@ -5402,6 +5423,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 {
        cancel_delayed_work_sync(&flowtable->data.gc_work);
+       kfree(flowtable->ops);
        kfree(flowtable->name);
        flowtable->data.type->free(&flowtable->data);
        rhashtable_destroy(&flowtable->data.rhashtable);
@@ -6575,6 +6597,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 static struct pernet_operations nf_tables_net_ops = {
        .init   = nf_tables_init_net,
        .exit   = nf_tables_exit_net,
+       .async  = true,
 };
 
 static int __init nf_tables_module_init(void)
index 03ead8a9e90ccfcc1936ee66064269dec4719fbd..84fc4954862d46c0fb6ca77c3842518c6cd7753e 100644 (file)
@@ -566,6 +566,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
 static struct pernet_operations nfnetlink_net_ops = {
        .init           = nfnetlink_net_init,
        .exit_batch     = nfnetlink_net_exit_batch,
+       .async          = true,
 };
 
 static int __init nfnetlink_init(void)
index 88d427f9f9e6a0f461527ad117a157f395982ea9..8d9f18bb8840677883f50810135e075a17e828e8 100644 (file)
@@ -515,6 +515,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
 static struct pernet_operations nfnl_acct_ops = {
         .init   = nfnl_acct_net_init,
         .exit   = nfnl_acct_net_exit,
+       .async  = true,
 };
 
 static int __init nfnl_acct_init(void)
index 95b04702a655af03be4297b4a84cafb6dd618fc3..6819300f7fb789f06183a8d1873a0f07d258149a 100644 (file)
@@ -586,6 +586,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 static struct pernet_operations cttimeout_ops = {
        .init   = cttimeout_net_init,
        .exit   = cttimeout_net_exit,
+       .async  = true,
 };
 
 static int __init cttimeout_init(void)
index 7b46aa4c478d35a0a94d2214ffcb2441c9f9c582..b21ef79849a1cb9ec435307107b54f65ccb1b242 100644 (file)
@@ -1108,6 +1108,7 @@ static struct pernet_operations nfnl_log_net_ops = {
        .exit   = nfnl_log_net_exit,
        .id     = &nfnl_log_net_id,
        .size   = sizeof(struct nfnl_log_net),
+       .async  = true,
 };
 
 static int __init nfnetlink_log_init(void)
index 8bba23160a68fde04624eac306731ebf64fb8600..9f572ed56208992bfd65efc0817e0e5e8876e334 100644 (file)
@@ -833,11 +833,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
                if (diff > skb_tailroom(e->skb)) {
                        nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
                                               diff, GFP_ATOMIC);
-                       if (!nskb) {
-                               printk(KERN_WARNING "nf_queue: OOM "
-                                     "in mangle, dropping packet\n");
+                       if (!nskb)
                                return -ENOMEM;
-                       }
                        kfree_skb(e->skb);
                        e->skb = nskb;
                }
@@ -1528,6 +1525,7 @@ static struct pernet_operations nfnl_queue_net_ops = {
        .exit_batch     = nfnl_queue_net_exit_batch,
        .id             = &nfnl_queue_net_id,
        .size           = sizeof(struct nfnl_queue_net),
+       .async          = true,
 };
 
 static int __init nfnetlink_queue_init(void)
index 3f1624ee056f96570254cf5d8737f38b82a87593..d40591fe1b2f64c3531b35abcadd8dbfc6d67c8a 100644 (file)
@@ -674,7 +674,7 @@ static const struct nft_set_ops *
 nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
                    u32 flags)
 {
-       if (desc->size) {
+       if (desc->size && !(flags & NFT_SET_TIMEOUT)) {
                switch (desc->klen) {
                case 4:
                        return &nft_hash_fast_ops;
index d9deebe599ecac285541a15dbd3a794c25e4dfb9..6de1f6a4cb806bba1b28510fe718b5701755f887 100644 (file)
@@ -423,6 +423,36 @@ textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
        return buf;
 }
 
+/**
+ * xt_check_proc_name - check that name is suitable for /proc file creation
+ *
+ * @name: file name candidate
+ * @size: length of buffer
+ *
+ * some x_tables modules wish to create a file in /proc.
+ * This function makes sure that the name is suitable for this
+ * purpose, it checks that name is NUL terminated and isn't a 'special'
+ * name, like "..".
+ *
+ * returns negative number on error or 0 if name is useable.
+ */
+int xt_check_proc_name(const char *name, unsigned int size)
+{
+       if (name[0] == '\0')
+               return -EINVAL;
+
+       if (strnlen(name, size) == size)
+               return -ENAMETOOLONG;
+
+       if (strcmp(name, ".") == 0 ||
+           strcmp(name, "..") == 0 ||
+           strchr(name, '/'))
+               return -EINVAL;
+
+       return 0;
+}
+EXPORT_SYMBOL(xt_check_proc_name);
+
 int xt_check_match(struct xt_mtchk_param *par,
                   unsigned int size, u_int8_t proto, bool inv_proto)
 {
index 66f5aca62a087ee816b6c18e0c385af535cf4ae7..ef65b7a9173e655b8bf0a809b6d6818c3d72e93e 100644 (file)
@@ -917,8 +917,9 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
        struct hashlimit_cfg3 cfg = {};
        int ret;
 
-       if (info->name[sizeof(info->name) - 1] != '\0')
-               return -EINVAL;
+       ret = xt_check_proc_name(info->name, sizeof(info->name));
+       if (ret)
+               return ret;
 
        ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
 
@@ -935,8 +936,9 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
        struct hashlimit_cfg3 cfg = {};
        int ret;
 
-       if (info->name[sizeof(info->name) - 1] != '\0')
-               return -EINVAL;
+       ret = xt_check_proc_name(info->name, sizeof(info->name));
+       if (ret)
+               return ret;
 
        ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
 
@@ -950,9 +952,11 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
 static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
        struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+       int ret;
 
-       if (info->name[sizeof(info->name) - 1] != '\0')
-               return -EINVAL;
+       ret = xt_check_proc_name(info->name, sizeof(info->name));
+       if (ret)
+               return ret;
 
        return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
                                         info->name, 3);
@@ -1345,6 +1349,7 @@ static struct pernet_operations hashlimit_net_ops = {
        .exit   = hashlimit_net_exit,
        .id     = &hashlimit_net_id,
        .size   = sizeof(struct hashlimit_net),
+       .async  = true,
 };
 
 static int __init hashlimit_mt_init(void)
index 6d232d18faff72ec8c26bd82dc7685fec52c5c6a..486dd24da78b74872e01dd7c55eebbc75bc285a5 100644 (file)
@@ -361,9 +361,9 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
                                    info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
                return -EINVAL;
        }
-       if (info->name[0] == '\0' ||
-           strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
-               return -EINVAL;
+       ret = xt_check_proc_name(info->name, sizeof(info->name));
+       if (ret)
+               return ret;
 
        if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot)
                nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1;
@@ -687,6 +687,7 @@ static struct pernet_operations recent_net_ops = {
        .exit   = recent_net_exit,
        .id     = &recent_net_id,
        .size   = sizeof(struct recent_net),
+       .async  = true,
 };
 
 static struct xt_match recent_mt_reg[] __read_mostly = {
index a6f63a5faee7c83145ea1fcc02e9fd6514e6a897..af51b8c0a2cbca2e45aed95edac687763a858a59 100644 (file)
@@ -1107,7 +1107,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
        if (!err)
                delivered = true;
        else if (err != -ESRCH)
-               goto error;
+               return err;
        return delivered ? 0 : -ESRCH;
  error:
        kfree_skb(skb);
index ef38e5aecd2851d61d204e4e04c9d6d723f14887..100191df03714d9b67e742dffe5339f7d438cb32 100644 (file)
@@ -2384,6 +2384,7 @@ static struct pernet_operations ovs_net_ops = {
        .exit = ovs_exit_net,
        .id   = &ovs_net_id,
        .size = sizeof(struct ovs_net),
+       .async = true,
 };
 
 static int __init dp_init(void)
index 04b94281a30b2b97a449efbb71fb3958afa44177..b891a91577f8030e55e973f4a4cf74f0c4637916 100644 (file)
@@ -242,14 +242,20 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 
                band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
                band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
+               if (band->rate == 0) {
+                       err = -EINVAL;
+                       goto exit_free_meter;
+               }
+
                band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
                /* Figure out max delta_t that is enough to fill any bucket.
                 * Keep max_delta_t size to the bucket units:
                 * pkts => 1/1000 packets, kilobits => bits.
+                *
+                * Start with a full bucket.
                 */
-               band_max_delta_t = (band->burst_size + band->rate) * 1000;
-               /* Start with a full bucket. */
-               band->bucket = band_max_delta_t;
+               band->bucket = (band->burst_size + band->rate) * 1000;
+               band_max_delta_t = band->bucket / band->rate;
                if (band_max_delta_t > meter->max_delta_t)
                        meter->max_delta_t = band_max_delta_t;
                band++;
index b6c8524032a0633c8f3262e9ea58e457b6a14598..f81c1d0ddff4d6e05da635f78e84bd28cc681f06 100644 (file)
@@ -464,10 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
        return 0;
 }
 
-static unsigned int packet_length(const struct sk_buff *skb,
-                                 struct net_device *dev)
+static int packet_length(const struct sk_buff *skb,
+                        struct net_device *dev)
 {
-       unsigned int length = skb->len - dev->hard_header_len;
+       int length = skb->len - dev->hard_header_len;
 
        if (!skb_vlan_tag_present(skb) &&
            eth_type_vlan(skb->protocol))
@@ -478,7 +478,7 @@ static unsigned int packet_length(const struct sk_buff *skb,
         * account for 802.1ad. e.g. is_skb_forwardable().
         */
 
-       return length;
+       return length > 0 ? length : 0;
 }
 
 void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
index 77787512fc32cbd0a0cb842a88a31c6711218b8c..9454e839379310592e7c04c5555b2fa9555d31d5 100644 (file)
@@ -342,6 +342,7 @@ static struct pernet_operations phonet_net_ops = {
        .exit = phonet_exit_net,
        .id   = &phonet_net_id,
        .size = sizeof(struct phonet_net),
+       .async = true,
 };
 
 /* Initialize Phonet devices list */
index 50615d5efac1529a0fd617c2692b1e9419da7137..9cf089b9754eaadbc58d9fdd9c455876a07712ca 100644 (file)
@@ -114,5 +114,6 @@ static struct rpmsg_driver qcom_smd_qrtr_driver = {
 
 module_rpmsg_driver(qcom_smd_qrtr_driver);
 
+MODULE_ALIAS("rpmsg:IPCRTR");
 MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
 MODULE_LICENSE("GPL v2");
index a937f18896aee6c0fce229b528d77b9ed33ae05c..ab751a150f707ccf4b6b4b5f0a378325882d95b2 100644 (file)
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
        rds_send_drop_to(rs, NULL);
        rds_rdma_drop_keys(rs);
        rds_notify_queue_get(rs, NULL);
+       rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue);
 
        spin_lock_bh(&rds_sock_lock);
        list_del_init(&rs->rs_item);
@@ -144,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
  *  -  to signal that a previously congested destination may have become
  *     uncongested
  *  -  A notification has been queued to the socket (this can be a congestion
- *     update, or a RDMA completion).
+ *     update, or a RDMA completion, or a MSG_ZEROCOPY completion).
  *
  * EPOLLOUT is asserted if there is room on the send queue. This does not mean
  * however, that the next sendmsg() call will succeed. If the application tries
@@ -178,7 +179,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
                spin_unlock(&rs->rs_lock);
        }
        if (!list_empty(&rs->rs_recv_queue) ||
-           !list_empty(&rs->rs_notify_queue))
+           !list_empty(&rs->rs_notify_queue) ||
+           !list_empty(&rs->rs_zcookie_queue.zcookie_head))
                mask |= (EPOLLIN | EPOLLRDNORM);
        if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
                mask |= (EPOLLOUT | EPOLLWRNORM);
@@ -513,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
        INIT_LIST_HEAD(&rs->rs_recv_queue);
        INIT_LIST_HEAD(&rs->rs_notify_queue);
        INIT_LIST_HEAD(&rs->rs_cong_list);
+       rds_message_zcopy_queue_init(&rs->rs_zcookie_queue);
        spin_lock_init(&rs->rs_rdma_lock);
        rs->rs_rdma_keys = RB_ROOT;
        rs->rs_rx_traces = 0;
index 2da3176bf7924d9132647d28a3c3263716ead608..abef75da89a7450092aefc46ed902e6602fba7a6 100644 (file)
@@ -540,9 +540,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
                          struct rds_info_iterator *iter,
                          struct rds_info_lengths *lens,
                          int (*visitor)(struct rds_connection *, void *),
+                         u64 *buffer,
                          size_t item_len)
 {
-       uint64_t buffer[(item_len + 7) / 8];
        struct hlist_head *head;
        struct rds_connection *conn;
        size_t i;
@@ -578,9 +578,9 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
                                    struct rds_info_iterator *iter,
                                    struct rds_info_lengths *lens,
                                    int (*visitor)(struct rds_conn_path *, void *),
+                                   u64 *buffer,
                                    size_t item_len)
 {
-       u64  buffer[(item_len + 7) / 8];
        struct hlist_head *head;
        struct rds_connection *conn;
        size_t i;
@@ -649,8 +649,11 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
                          struct rds_info_iterator *iter,
                          struct rds_info_lengths *lens)
 {
+       u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8];
+
        rds_walk_conn_path_info(sock, len, iter, lens,
                                rds_conn_info_visitor,
+                               buffer,
                                sizeof(struct rds_info_connection));
 }
 
index 50a88f3e7e393401db0b143982571f6ad50cd999..02deee29e7f109e96908382345faf528471f2d90 100644 (file)
@@ -321,8 +321,11 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
                           struct rds_info_iterator *iter,
                           struct rds_info_lengths *lens)
 {
+       u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8];
+
        rds_for_each_conn_info(sock, len, iter, lens,
                                rds_ib_conn_info_visitor,
+                               buffer,
                                sizeof(struct rds_info_rdma_connection));
 }
 
index 651834513481a3db69f62e884f1b2e9f84d833f8..a35f7697198499eece20e1678f5d003d64bf785a 100644 (file)
@@ -48,7 +48,6 @@ static unsigned int   rds_exthdr_size[__RDS_EXTHDR_MAX] = {
 [RDS_EXTHDR_GEN_NUM]   = sizeof(u32),
 };
 
-
 void rds_message_addref(struct rds_message *rm)
 {
        rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
@@ -56,59 +55,73 @@ void rds_message_addref(struct rds_message *rm)
 }
 EXPORT_SYMBOL_GPL(rds_message_addref);
 
-static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
+static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
 {
-       struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
-       int ncookies;
-       u32 *ptr;
+       struct rds_zcopy_cookies *ck = &info->zcookies;
+       int ncookies = ck->num;
 
-       if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE)
-               return false;
-       ncookies = serr->ee.ee_data;
-       if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES)
+       if (ncookies == RDS_MAX_ZCOOKIES)
                return false;
-       ptr = skb_put(skb, sizeof(u32));
-       *ptr = cookie;
-       serr->ee.ee_data = ++ncookies;
+       ck->cookies[ncookies] = cookie;
+       ck->num =  ++ncookies;
        return true;
 }
 
-static void rds_rm_zerocopy_callback(struct rds_sock *rs,
-                                    struct rds_znotifier *znotif)
+static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
+{
+       return container_of(znotif, struct rds_msg_zcopy_info, znotif);
+}
+
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q)
 {
-       struct sock *sk = rds_rs_to_sk(rs);
-       struct sk_buff *skb, *tail;
-       struct sock_exterr_skb *serr;
        unsigned long flags;
-       struct sk_buff_head *q;
-       u32 cookie = znotif->z_cookie;
+       LIST_HEAD(copy);
+       struct rds_msg_zcopy_info *info, *tmp;
 
-       q = &sk->sk_error_queue;
        spin_lock_irqsave(&q->lock, flags);
-       tail = skb_peek_tail(q);
+       list_splice(&q->zcookie_head, &copy);
+       INIT_LIST_HEAD(&q->zcookie_head);
+       spin_unlock_irqrestore(&q->lock, flags);
 
-       if (tail && skb_zcookie_add(tail, cookie)) {
-               spin_unlock_irqrestore(&q->lock, flags);
-               mm_unaccount_pinned_pages(&znotif->z_mmp);
-               consume_skb(rds_skb_from_znotifier(znotif));
-               sk->sk_error_report(sk);
-               return;
+       list_for_each_entry_safe(info, tmp, &copy, rs_zcookie_next) {
+               list_del(&info->rs_zcookie_next);
+               kfree(info);
        }
+}
+
+static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+                                    struct rds_znotifier *znotif)
+{
+       struct rds_msg_zcopy_info *info;
+       struct rds_msg_zcopy_queue *q;
+       u32 cookie = znotif->z_cookie;
+       struct rds_zcopy_cookies *ck;
+       struct list_head *head;
+       unsigned long flags;
 
-       skb = rds_skb_from_znotifier(znotif);
-       serr = SKB_EXT_ERR(skb);
-       memset(&serr->ee, 0, sizeof(serr->ee));
-       serr->ee.ee_errno = 0;
-       serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE;
-       serr->ee.ee_info = 0;
-       WARN_ON(!skb_zcookie_add(skb, cookie));
+       mm_unaccount_pinned_pages(&znotif->z_mmp);
+       q = &rs->rs_zcookie_queue;
+       spin_lock_irqsave(&q->lock, flags);
+       head = &q->zcookie_head;
+       if (!list_empty(head)) {
+               info = list_entry(head, struct rds_msg_zcopy_info,
+                                 rs_zcookie_next);
+               if (info && rds_zcookie_add(info, cookie)) {
+                       spin_unlock_irqrestore(&q->lock, flags);
+                       kfree(rds_info_from_znotifier(znotif));
+                       /* caller invokes rds_wake_sk_sleep() */
+                       return;
+               }
+       }
 
-       __skb_queue_tail(q, skb);
+       info = rds_info_from_znotifier(znotif);
+       ck = &info->zcookies;
+       memset(ck, 0, sizeof(*ck));
+       WARN_ON(!rds_zcookie_add(info, cookie));
+       list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
 
        spin_unlock_irqrestore(&q->lock, flags);
-       sk->sk_error_report(sk);
-
-       mm_unaccount_pinned_pages(&znotif->z_mmp);
+       /* caller invokes rds_wake_sk_sleep() */
 }
 
 /*
@@ -129,6 +142,7 @@ static void rds_message_purge(struct rds_message *rm)
                if (rm->data.op_mmp_znotifier) {
                        zcopy = true;
                        rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+                       rds_wake_sk_sleep(rs);
                        rm->data.op_mmp_znotifier = NULL;
                }
                sock_put(rds_rs_to_sk(rs));
@@ -341,14 +355,13 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
        return rm;
 }
 
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
-                              bool zcopy)
+static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
 {
-       unsigned long to_copy, nbytes;
-       unsigned long sg_off;
        struct scatterlist *sg;
        int ret = 0;
        int length = iov_iter_count(from);
+       int total_copied = 0;
+       struct rds_msg_zcopy_info *info;
 
        rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
 
@@ -356,54 +369,66 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
         * now allocate and copy in the data payload.
         */
        sg = rm->data.op_sg;
-       sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
 
-       if (zcopy) {
-               int total_copied = 0;
-               struct sk_buff *skb;
-
-               skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
-                               GFP_KERNEL);
-               if (!skb)
-                       return -ENOMEM;
-               rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
-               if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
-                                           length)) {
-                       ret = -ENOMEM;
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+       INIT_LIST_HEAD(&info->rs_zcookie_next);
+       rm->data.op_mmp_znotifier = &info->znotif;
+       if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
+                                   length)) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       while (iov_iter_count(from)) {
+               struct page *pages;
+               size_t start;
+               ssize_t copied;
+
+               copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+                                           1, &start);
+               if (copied < 0) {
+                       struct mmpin *mmp;
+                       int i;
+
+                       for (i = 0; i < rm->data.op_nents; i++)
+                               put_page(sg_page(&rm->data.op_sg[i]));
+                       mmp = &rm->data.op_mmp_znotifier->z_mmp;
+                       mm_unaccount_pinned_pages(mmp);
+                       ret = -EFAULT;
                        goto err;
                }
-               while (iov_iter_count(from)) {
-                       struct page *pages;
-                       size_t start;
-                       ssize_t copied;
-
-                       copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
-                                                   1, &start);
-                       if (copied < 0) {
-                               struct mmpin *mmp;
-                               int i;
-
-                               for (i = 0; i < rm->data.op_nents; i++)
-                                       put_page(sg_page(&rm->data.op_sg[i]));
-                               mmp = &rm->data.op_mmp_znotifier->z_mmp;
-                               mm_unaccount_pinned_pages(mmp);
-                               ret = -EFAULT;
-                               goto err;
-                       }
-                       total_copied += copied;
-                       iov_iter_advance(from, copied);
-                       length -= copied;
-                       sg_set_page(sg, pages, copied, start);
-                       rm->data.op_nents++;
-                       sg++;
-               }
-               WARN_ON_ONCE(length != 0);
-               return ret;
+               total_copied += copied;
+               iov_iter_advance(from, copied);
+               length -= copied;
+               sg_set_page(sg, pages, copied, start);
+               rm->data.op_nents++;
+               sg++;
+       }
+       WARN_ON_ONCE(length != 0);
+       return ret;
 err:
-               consume_skb(skb);
-               rm->data.op_mmp_znotifier = NULL;
-               return ret;
-       } /* zcopy */
+       kfree(info);
+       rm->data.op_mmp_znotifier = NULL;
+       return ret;
+}
+
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+                              bool zcopy)
+{
+       unsigned long to_copy, nbytes;
+       unsigned long sg_off;
+       struct scatterlist *sg;
+       int ret = 0;
+
+       rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
+
+       /* now allocate and copy in the data payload.  */
+       sg = rm->data.op_sg;
+       sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
+
+       if (zcopy)
+               return rds_message_zcopy_from_user(rm, from);
 
        while (iov_iter_count(from)) {
                if (!sg_page(sg)) {
index 31cd388520502c5bbbc8b331953a38528a059b6a..b04c333d9d1c201ba4aa4617c750941cf4cdfec7 100644 (file)
@@ -357,16 +357,27 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
 #define RDS_MSG_FLUSH          8
 
 struct rds_znotifier {
-       struct list_head        z_list;
        struct mmpin            z_mmp;
        u32                     z_cookie;
 };
 
-#define        RDS_ZCOPY_SKB(__skb)    ((struct rds_znotifier *)&((__skb)->cb[0]))
+struct rds_msg_zcopy_info {
+       struct list_head rs_zcookie_next;
+       union {
+               struct rds_znotifier znotif;
+               struct rds_zcopy_cookies zcookies;
+       };
+};
+
+struct rds_msg_zcopy_queue {
+       struct list_head zcookie_head;
+       spinlock_t lock; /* protects zcookie_head queue */
+};
 
-static inline struct sk_buff *rds_skb_from_znotifier(struct rds_znotifier *z)
+static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q)
 {
-       return container_of((void *)z, struct sk_buff, cb);
+       spin_lock_init(&q->lock);
+       INIT_LIST_HEAD(&q->zcookie_head);
 }
 
 struct rds_message {
@@ -603,6 +614,7 @@ struct rds_sock {
        /* Socket receive path trace points*/
        u8                      rs_rx_traces;
        u8                      rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+       struct rds_msg_zcopy_queue rs_zcookie_queue;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -723,6 +735,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
                          struct rds_info_iterator *iter,
                          struct rds_info_lengths *lens,
                          int (*visitor)(struct rds_connection *, void *),
+                         u64 *buffer,
                          size_t item_len);
 
 __printf(2, 3)
@@ -801,6 +814,7 @@ void rds_message_addref(struct rds_message *rm);
 void rds_message_put(struct rds_message *rm);
 void rds_message_wait(struct rds_message *rm);
 void rds_message_unmapped(struct rds_message *rm);
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info);
 
 static inline void rds_message_make_checksum(struct rds_header *hdr)
 {
index b080961464df71ac048e971a544b32f3be5e7b48..de50e2126e404aed541b8d268a28da08154bf08d 100644 (file)
@@ -577,6 +577,41 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
        return ret;
 }
 
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+       struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue;
+       struct rds_msg_zcopy_info *info = NULL;
+       struct rds_zcopy_cookies *done;
+       unsigned long flags;
+
+       if (!msg->msg_control)
+               return false;
+
+       if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+           msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+               return false;
+
+       spin_lock_irqsave(&q->lock, flags);
+       if (!list_empty(&q->zcookie_head)) {
+               info = list_entry(q->zcookie_head.next,
+                                 struct rds_msg_zcopy_info, rs_zcookie_next);
+               list_del(&info->rs_zcookie_next);
+       }
+       spin_unlock_irqrestore(&q->lock, flags);
+       if (!info)
+               return false;
+       done = &info->zcookies;
+       if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+                    done)) {
+               spin_lock_irqsave(&q->lock, flags);
+               list_add(&info->rs_zcookie_next, &q->zcookie_head);
+               spin_unlock_irqrestore(&q->lock, flags);
+               return false;
+       }
+       kfree(info);
+       return true;
+}
+
 int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                int msg_flags)
 {
@@ -611,7 +646,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
                if (!rds_next_incoming(rs, &inc)) {
                        if (nonblock) {
-                               ret = -EAGAIN;
+                               bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+                               ret = reaped ?  0 : -EAGAIN;
                                break;
                        }
 
@@ -660,6 +697,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                        ret = -EFAULT;
                        goto out;
                }
+               rds_recvmsg_zcookie(rs, msg);
 
                rds_stats_inc(s_recv_delivered);
 
index 08230a1450427c031bccae1920cdc461f593e10c..4f3a32c38bf5430a3b8c3a575c8a43ad32a3a2b9 100644 (file)
@@ -272,13 +272,14 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
 static void rds_tcp_conn_free(void *arg)
 {
        struct rds_tcp_connection *tc = arg;
+       unsigned long flags;
 
        rdsdebug("freeing tc %p\n", tc);
 
-       spin_lock_bh(&rds_tcp_conn_lock);
+       spin_lock_irqsave(&rds_tcp_conn_lock, flags);
        if (!tc->t_tcp_node_detached)
                list_del(&tc->t_tcp_node);
-       spin_unlock_bh(&rds_tcp_conn_lock);
+       spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
 
        kmem_cache_free(rds_tcp_conn_slab, tc);
 }
@@ -308,13 +309,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
                rdsdebug("rds_conn_path [%d] tc %p\n", i,
                         conn->c_path[i].cp_transport_data);
        }
-       spin_lock_bh(&rds_tcp_conn_lock);
+       spin_lock_irq(&rds_tcp_conn_lock);
        for (i = 0; i < RDS_MPATH_WORKERS; i++) {
                tc = conn->c_path[i].cp_transport_data;
                tc->t_tcp_node_detached = false;
                list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
        }
-       spin_unlock_bh(&rds_tcp_conn_lock);
+       spin_unlock_irq(&rds_tcp_conn_lock);
 fail:
        if (ret) {
                for (j = 0; j < i; j++)
@@ -484,39 +485,6 @@ static __net_init int rds_tcp_init_net(struct net *net)
        return err;
 }
 
-static void __net_exit rds_tcp_exit_net(struct net *net)
-{
-       struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
-
-       if (rtn->rds_tcp_sysctl)
-               unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
-
-       if (net != &init_net && rtn->ctl_table)
-               kfree(rtn->ctl_table);
-
-       /* If rds_tcp_exit_net() is called as a result of netns deletion,
-        * the rds_tcp_kill_sock() device notifier would already have cleaned
-        * up the listen socket, thus there is no work to do in this function.
-        *
-        * If rds_tcp_exit_net() is called as a result of module unload,
-        * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
-        * we do need to clean up the listen socket here.
-        */
-       if (rtn->rds_tcp_listen_sock) {
-               struct socket *lsock = rtn->rds_tcp_listen_sock;
-
-               rtn->rds_tcp_listen_sock = NULL;
-               rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
-       }
-}
-
-static struct pernet_operations rds_tcp_net_ops = {
-       .init = rds_tcp_init_net,
-       .exit = rds_tcp_exit_net,
-       .id = &rds_tcp_netid,
-       .size = sizeof(struct rds_tcp_net),
-};
-
 static void rds_tcp_kill_sock(struct net *net)
 {
        struct rds_tcp_connection *tc, *_tc;
@@ -526,7 +494,7 @@ static void rds_tcp_kill_sock(struct net *net)
 
        rtn->rds_tcp_listen_sock = NULL;
        rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
-       spin_lock_bh(&rds_tcp_conn_lock);
+       spin_lock_irq(&rds_tcp_conn_lock);
        list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
                struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -539,45 +507,43 @@ static void rds_tcp_kill_sock(struct net *net)
                        tc->t_tcp_node_detached = true;
                }
        }
-       spin_unlock_bh(&rds_tcp_conn_lock);
+       spin_unlock_irq(&rds_tcp_conn_lock);
        list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
                rds_conn_destroy(tc->t_cpath->cp_conn);
 }
 
-void *rds_tcp_listen_sock_def_readable(struct net *net)
+static void __net_exit rds_tcp_exit_net(struct net *net)
 {
        struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
-       struct socket *lsock = rtn->rds_tcp_listen_sock;
 
-       if (!lsock)
-               return NULL;
+       rds_tcp_kill_sock(net);
 
-       return lsock->sk->sk_user_data;
+       if (rtn->rds_tcp_sysctl)
+               unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+
+       if (net != &init_net && rtn->ctl_table)
+               kfree(rtn->ctl_table);
 }
 
-static int rds_tcp_dev_event(struct notifier_block *this,
-                            unsigned long event, void *ptr)
+static struct pernet_operations rds_tcp_net_ops = {
+       .init = rds_tcp_init_net,
+       .exit = rds_tcp_exit_net,
+       .id = &rds_tcp_netid,
+       .size = sizeof(struct rds_tcp_net),
+       .async = true,
+};
+
+void *rds_tcp_listen_sock_def_readable(struct net *net)
 {
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+       struct socket *lsock = rtn->rds_tcp_listen_sock;
 
-       /* rds-tcp registers as a pernet subys, so the ->exit will only
-        * get invoked after network acitivity has quiesced. We need to
-        * clean up all sockets  to quiesce network activity, and use
-        * the unregistration of the per-net loopback device as a trigger
-        * to start that cleanup.
-        */
-       if (event == NETDEV_UNREGISTER_FINAL &&
-           dev->ifindex == LOOPBACK_IFINDEX)
-               rds_tcp_kill_sock(dev_net(dev));
+       if (!lsock)
+               return NULL;
 
-       return NOTIFY_DONE;
+       return lsock->sk->sk_user_data;
 }
 
-static struct notifier_block rds_tcp_dev_notifier = {
-       .notifier_call        = rds_tcp_dev_event,
-       .priority = -10, /* must be called after other network notifiers */
-};
-
 /* when sysctl is used to modify some kernel socket parameters,this
  * function  resets the RDS connections in that netns  so that we can
  * restart with new parameters.  The assumption is that such reset
@@ -587,7 +553,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 {
        struct rds_tcp_connection *tc, *_tc;
 
-       spin_lock_bh(&rds_tcp_conn_lock);
+       spin_lock_irq(&rds_tcp_conn_lock);
        list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
                struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -597,7 +563,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
                /* reconnect with new parameters */
                rds_conn_path_drop(tc->t_cpath, false);
        }
-       spin_unlock_bh(&rds_tcp_conn_lock);
+       spin_unlock_irq(&rds_tcp_conn_lock);
 }
 
 static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
@@ -623,9 +589,7 @@ static void rds_tcp_exit(void)
        rds_tcp_set_unloading();
        synchronize_rcu();
        rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
-       unregister_pernet_subsys(&rds_tcp_net_ops);
-       if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
-               pr_warn("could not unregister rds_tcp_dev_notifier\n");
+       unregister_pernet_device(&rds_tcp_net_ops);
        rds_tcp_destroy_conns();
        rds_trans_unregister(&rds_tcp_transport);
        rds_tcp_recv_exit();
@@ -649,24 +613,15 @@ static int rds_tcp_init(void)
        if (ret)
                goto out_slab;
 
-       ret = register_pernet_subsys(&rds_tcp_net_ops);
+       ret = register_pernet_device(&rds_tcp_net_ops);
        if (ret)
                goto out_recv;
 
-       ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
-       if (ret) {
-               pr_warn("could not register rds_tcp_dev_notifier\n");
-               goto out_pernet;
-       }
-
        rds_trans_register(&rds_tcp_transport);
 
        rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 
        goto out;
-
-out_pernet:
-       unregister_pernet_subsys(&rds_tcp_net_ops);
 out_recv:
        rds_tcp_recv_exit();
 out_slab:
index c061d6eb465d528966a513173950463ded396c5e..22571189f21e7e4a805af1b7edaed1c9f3c918ef 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -142,12 +142,20 @@ int rds_tcp_accept_one(struct socket *sock)
        if (ret)
                goto out;
 
-       new_sock->type = sock->type;
-       new_sock->ops = sock->ops;
        ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
        if (ret < 0)
                goto out;
 
+       /* sock_create_lite() does not get a hold on the owner module so we
+        * need to do it here.  Note that sock_release() uses sock->ops to
+        * determine if it needs to decrement the reference count.  So set
+        * sock->ops after calling accept() in case that fails.  And there's
+        * no need to do try_module_get() as the listener should have a hold
+        * already.
+        */
+       new_sock->ops = sock->ops;
+       __module_get(new_sock->ops->owner);
+
        ret = rds_tcp_keepalive(new_sock);
        if (ret < 0)
                goto out;
index f18c9248e0d4c6108cb0ae82cc408e70d4aacbdb..5fd939dabf41f4467515358108b2b97b64229fe8 100644 (file)
@@ -106,4 +106,5 @@ struct pernet_operations rxrpc_net_ops = {
        .exit   = rxrpc_exit_net,
        .id     = &rxrpc_net_id,
        .size   = sizeof(struct rxrpc_net),
+       .async  = true,
 };
index 9d45d8b567447c7eb8e35a997d953ed63197d715..7bff716e911ea6708fe23b6075323fd4371d5c76 100644 (file)
@@ -272,7 +272,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
                             unsigned int *_offset, unsigned int *_len)
 {
        unsigned int offset = sizeof(struct rxrpc_wire_header);
-       unsigned int len = *_len;
+       unsigned int len;
        int ret;
        u8 annotation = *_annotation;
 
index 1f65d6ada9ff04d19c589d2b238c0f143d2b30c8..7bd1b964f02172701677056cc75e2ae04f526ea5 100644 (file)
@@ -109,6 +109,42 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 }
 EXPORT_SYMBOL(__tcf_idr_release);
 
+static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+{
+       u32 cookie_len = 0;
+
+       if (act->act_cookie)
+               cookie_len = nla_total_size(act->act_cookie->len);
+
+       return  nla_total_size(0) /* action number nested */
+               + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
+               + cookie_len /* TCA_ACT_COOKIE */
+               + nla_total_size(0) /* TCA_ACT_STATS nested */
+               /* TCA_STATS_BASIC */
+               + nla_total_size_64bit(sizeof(struct gnet_stats_basic))
+               /* TCA_STATS_QUEUE */
+               + nla_total_size_64bit(sizeof(struct gnet_stats_queue))
+               + nla_total_size(0) /* TCA_OPTIONS nested */
+               + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
+}
+
+static size_t tcf_action_full_attrs_size(size_t sz)
+{
+       return NLMSG_HDRLEN                     /* struct nlmsghdr */
+               + sizeof(struct tcamsg)
+               + nla_total_size(0)             /* TCA_ACT_TAB nested */
+               + sz;
+}
+
+static size_t tcf_action_fill_size(const struct tc_action *act)
+{
+       size_t sz = tcf_action_shared_attrs_size(act);
+
+       if (act->ops->get_fill_size)
+               return act->ops->get_fill_size(act) + sz;
+       return sz;
+}
+
 static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
                           struct netlink_callback *cb)
 {
@@ -260,14 +296,6 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 }
 EXPORT_SYMBOL(tcf_idr_check);
 
-void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
-{
-       if (est)
-               gen_kill_estimator(&a->tcfa_rate_est);
-       free_tcf(a);
-}
-EXPORT_SYMBOL(tcf_idr_cleanup);
-
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                   struct tc_action **a, const struct tc_action_ops *ops,
                   int bind, bool cpustats)
@@ -741,10 +769,12 @@ static void cleanup_a(struct list_head *actions, int ovr)
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
                    struct nlattr *est, char *name, int ovr, int bind,
-                   struct list_head *actions, struct netlink_ext_ack *extack)
+                   struct list_head *actions, size_t *attr_size,
+                   struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *act;
+       size_t sz = 0;
        int err;
        int i;
 
@@ -760,11 +790,14 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
                        goto err;
                }
                act->order = i;
+               sz += tcf_action_fill_size(act);
                if (ovr)
                        act->tcfa_refcnt++;
                list_add_tail(&act->list, actions);
        }
 
+       *attr_size = tcf_action_full_attrs_size(sz);
+
        /* Remove the temp refcnt which was necessary to protect against
         * destroying an existing action which was being replaced
         */
@@ -994,12 +1027,13 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 
 static int
 tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-              u32 portid, struct netlink_ext_ack *extack)
+              u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
        int ret;
        struct sk_buff *skb;
 
-       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+                       GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;
 
@@ -1032,6 +1066,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
        int i, ret;
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *act;
+       size_t attr_size = 0;
        LIST_HEAD(actions);
 
        ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
@@ -1053,13 +1088,16 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
                        goto err;
                }
                act->order = i;
+               attr_size += tcf_action_fill_size(act);
                list_add_tail(&act->list, &actions);
        }
 
+       attr_size = tcf_action_full_attrs_size(attr_size);
+
        if (event == RTM_GETACTION)
                ret = tcf_get_notify(net, portid, n, &actions, event, extack);
        else { /* delete */
-               ret = tcf_del_notify(net, n, &actions, portid, extack);
+               ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
                if (ret)
                        goto err;
                return ret;
@@ -1072,18 +1110,19 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 
 static int
 tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-              u32 portid, struct netlink_ext_ack *extack)
+              u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
        struct sk_buff *skb;
        int err = 0;
 
-       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+                       GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;
 
        if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
                         RTM_NEWACTION, 0, 0) <= 0) {
-               NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while deleting TC action");
+               NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
                kfree_skb(skb);
                return -EINVAL;
        }
@@ -1099,15 +1138,16 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
                          struct nlmsghdr *n, u32 portid, int ovr,
                          struct netlink_ext_ack *extack)
 {
+       size_t attr_size = 0;
        int ret = 0;
        LIST_HEAD(actions);
 
        ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
-                             extack);
+                             &attr_size, extack);
        if (ret)
                return ret;
 
-       return tcf_add_notify(net, n, &actions, portid, extack);
+       return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
index cb3c5d403c887dd360feac64bdb8232b3b22c5dc..5cb9b268e8ffa3b19a54f8a4e6aa8c908eb06a0b 100644 (file)
@@ -352,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
        return res;
 out:
        if (res == ACT_P_CREATED)
-               tcf_idr_cleanup(*act, est);
+               tcf_idr_release(*act, bind);
 
        return ret;
 }
@@ -413,6 +413,7 @@ static struct pernet_operations bpf_net_ops = {
        .exit_batch = bpf_exit_net,
        .id   = &bpf_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init bpf_init_module(void)
index e4b880fa51fec90fa1a1d92c11f8a337637d3509..371e5e4ab3e2331e86a6508563622198b52bb30e 100644 (file)
@@ -222,6 +222,7 @@ static struct pernet_operations connmark_net_ops = {
        .exit_batch = connmark_exit_net,
        .id   = &connmark_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init connmark_init_module(void)
index d5c2e528d150bf8e7d2f9007e34d01bc0347dad4..a527e287c086e98e03de9de455e5af0fa04a85d2 100644 (file)
@@ -350,7 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
 {
        struct sctphdr *sctph;
 
-       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)
+       if (skb_is_gso(skb) && skb_is_gso_sctp(skb))
                return 1;
 
        sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph));
@@ -626,7 +626,8 @@ static void tcf_csum_cleanup(struct tc_action *a)
        struct tcf_csum_params *params;
 
        params = rcu_dereference_protected(p->params, 1);
-       kfree_rcu(params, rcu);
+       if (params)
+               kfree_rcu(params, rcu);
 }
 
 static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
@@ -677,6 +678,7 @@ static struct pernet_operations csum_net_ops = {
        .exit_batch = csum_exit_net,
        .id   = &csum_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_DESCRIPTION("Checksum updating actions");
index f072bcf337604396344a1d29ad6cc262c6ae3352..88fbb8403565f70c42d7fc511f6fd77db766f32b 100644 (file)
@@ -217,6 +217,19 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static size_t tcf_gact_get_fill_size(const struct tc_action *act)
+{
+       size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */
+
+#ifdef CONFIG_GACT_PROB
+       if (to_gact(act)->tcfg_ptype)
+               /* TCA_GACT_PROB */
+               sz += nla_total_size(sizeof(struct tc_gact_p));
+#endif
+
+       return sz;
+}
+
 static struct tc_action_ops act_gact_ops = {
        .kind           =       "gact",
        .type           =       TCA_ACT_GACT,
@@ -227,6 +240,7 @@ static struct tc_action_ops act_gact_ops = {
        .init           =       tcf_gact_init,
        .walk           =       tcf_gact_walker,
        .lookup         =       tcf_gact_search,
+       .get_fill_size  =       tcf_gact_get_fill_size,
        .size           =       sizeof(struct tcf_gact),
 };
 
@@ -247,6 +261,7 @@ static struct pernet_operations gact_net_ops = {
        .exit_batch = gact_exit_net,
        .id   = &gact_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
index a5994cf0512bd731f55a1dfa798d85ff658e18ef..555b1caeff727561d0664a4abee6ff9b46aad857 100644 (file)
@@ -870,6 +870,7 @@ static struct pernet_operations ife_net_ops = {
        .exit_batch = ife_exit_net,
        .id   = &ife_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init ife_init_module(void)
index 9784629090ad7cd3c886fc753a540fb46215709a..b5e8565b89c7447645ef94403b7b15f339daf191 100644 (file)
@@ -80,9 +80,12 @@ static void ipt_destroy_target(struct xt_entry_target *t)
 static void tcf_ipt_release(struct tc_action *a)
 {
        struct tcf_ipt *ipt = to_ipt(a);
-       ipt_destroy_target(ipt->tcfi_t);
+
+       if (ipt->tcfi_t) {
+               ipt_destroy_target(ipt->tcfi_t);
+               kfree(ipt->tcfi_t);
+       }
        kfree(ipt->tcfi_tname);
-       kfree(ipt->tcfi_t);
 }
 
 static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
@@ -187,7 +190,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
        kfree(tname);
 err1:
        if (ret == ACT_P_CREATED)
-               tcf_idr_cleanup(*a, est);
+               tcf_idr_release(*a, bind);
        return err;
 }
 
@@ -349,6 +352,7 @@ static struct pernet_operations ipt_net_ops = {
        .exit_batch = ipt_exit_net,
        .id   = &ipt_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
@@ -399,6 +403,7 @@ static struct pernet_operations xt_net_ops = {
        .exit_batch = xt_exit_net,
        .id   = &xt_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
index fd34015331ab86c395a2e599546a51b64efb8625..64c86579c3d9e725040bfafbe74c5dad0d3dba70 100644 (file)
@@ -353,6 +353,7 @@ static struct pernet_operations mirred_net_ops = {
        .exit_batch = mirred_exit_net,
        .id   = &mirred_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002)");
index 4b5848b6c25207ac74b0508259f9f3019020d3c9..b1bc757f649129086afea4aa68f74f02d5026a0b 100644 (file)
@@ -323,6 +323,7 @@ static struct pernet_operations nat_net_ops = {
        .exit_batch = nat_exit_net,
        .id   = &nat_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_DESCRIPTION("Stateless NAT actions");
index 094303c27c5ea753376844e6130178dc2fd589b8..f392ccaaa0d83319e98698b07ae9a63c6d034499 100644 (file)
@@ -176,7 +176,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                p = to_pedit(*a);
                keys = kmalloc(ksize, GFP_KERNEL);
                if (keys == NULL) {
-                       tcf_idr_cleanup(*a, est);
+                       tcf_idr_release(*a, bind);
                        kfree(keys_ex);
                        return -ENOMEM;
                }
@@ -465,6 +465,7 @@ static struct pernet_operations pedit_net_ops = {
        .exit_batch = pedit_exit_net,
        .id   = &pedit_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
index ff55bd6c7db002a39ee7107bd005db8434392451..7081ec75e696ef6c306557e5ed27dcf36147ebaa 100644 (file)
@@ -196,7 +196,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
        qdisc_put_rtab(P_tab);
        qdisc_put_rtab(R_tab);
        if (ret == ACT_P_CREATED)
-               tcf_idr_cleanup(*a, est);
+               tcf_idr_release(*a, bind);
        return err;
 }
 
@@ -347,6 +347,7 @@ static struct pernet_operations police_net_ops = {
        .exit_batch = police_exit_net,
        .id   = &police_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init police_init_module(void)
index 9765145aaf4066775f4ecdc8b90b951992515183..3a89f98f17e6691e94c735dfda747983b74fd9ed 100644 (file)
@@ -103,7 +103,8 @@ static void tcf_sample_cleanup(struct tc_action *a)
 
        psample_group = rtnl_dereference(s->psample_group);
        RCU_INIT_POINTER(s->psample_group, NULL);
-       psample_group_put(psample_group);
+       if (psample_group)
+               psample_group_put(psample_group);
 }
 
 static bool tcf_sample_dev_ok_push(struct net_device *dev)
@@ -248,6 +249,7 @@ static struct pernet_operations sample_net_ops = {
        .exit_batch = sample_exit_net,
        .id   = &sample_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init sample_init_module(void)
index 8244e221fe4f8b6ae9aa2b0f9b03e34a320e3e52..e84768ae610a8a33ba32796104b1c41ee8c31e37 100644 (file)
@@ -121,7 +121,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
                d = to_defact(*a);
                ret = alloc_defdata(d, defdata);
                if (ret < 0) {
-                       tcf_idr_cleanup(*a, est);
+                       tcf_idr_release(*a, bind);
                        return ret;
                }
                d->tcf_action = parm->action;
@@ -216,6 +216,7 @@ static struct pernet_operations simp_net_ops = {
        .exit_batch = simp_exit_net,
        .id   = &simp_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2005)");
index ddf69fc01bdf8913260b63adb6de0bbbdcf90b25..7971510fe61b680668a757cd3f1af35d868af7aa 100644 (file)
@@ -253,6 +253,7 @@ static struct pernet_operations skbedit_net_ops = {
        .exit_batch = skbedit_exit_net,
        .id   = &skbedit_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
index a406f191cb846da7c2516c476785230101197a8b..142a996ac776d63309a2825201dd21c6e4051110 100644 (file)
@@ -152,7 +152,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
        ASSERT_RTNL();
        p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
        if (unlikely(!p)) {
-               if (ovr)
+               if (ret == ACT_P_CREATED)
                        tcf_idr_release(*a, bind);
                return -ENOMEM;
        }
@@ -190,7 +190,8 @@ static void tcf_skbmod_cleanup(struct tc_action *a)
        struct tcf_skbmod_params  *p;
 
        p = rcu_dereference_protected(d->skbmod_p, 1);
-       kfree_rcu(p, rcu);
+       if (p)
+               kfree_rcu(p, rcu);
 }
 
 static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
@@ -278,6 +279,7 @@ static struct pernet_operations skbmod_net_ops = {
        .exit_batch = skbmod_exit_net,
        .id   = &skbmod_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
index 41ff9d0e5c62f20078eb6a1b61925001f6dbdf9e..a1c8dd406a045a4da372ec633ff0e14a51223103 100644 (file)
@@ -153,6 +153,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
                break;
        default:
+               ret = -EINVAL;
                goto err_out;
        }
 
@@ -207,11 +208,12 @@ static void tunnel_key_release(struct tc_action *a)
        struct tcf_tunnel_key_params *params;
 
        params = rcu_dereference_protected(t->params, 1);
+       if (params) {
+               if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+                       dst_release(&params->tcft_enc_metadata->dst);
 
-       if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
-               dst_release(&params->tcft_enc_metadata->dst);
-
-       kfree_rcu(params, rcu);
+               kfree_rcu(params, rcu);
+       }
 }
 
 static int tunnel_key_dump_addresses(struct sk_buff *skb,
@@ -337,6 +339,7 @@ static struct pernet_operations tunnel_key_net_ops = {
        .exit_batch = tunnel_key_exit_net,
        .id   = &tunnel_key_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init tunnel_key_init_module(void)
index 71411a255f04e789b170a8ee9382ea3c4923b48f..41a66effeb5fe3299e9a62493ba84fccdec9fba4 100644 (file)
@@ -117,7 +117,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
        struct tc_vlan *parm;
        struct tcf_vlan *v;
        int action;
-       __be16 push_vid = 0;
+       u16 push_vid = 0;
        __be16 push_proto = 0;
        u8 push_prio = 0;
        bool exists = false;
@@ -195,7 +195,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
        ASSERT_RTNL();
        p = kzalloc(sizeof(*p), GFP_KERNEL);
        if (!p) {
-               if (ovr)
+               if (ret == ACT_P_CREATED)
                        tcf_idr_release(*a, bind);
                return -ENOMEM;
        }
@@ -225,7 +225,8 @@ static void tcf_vlan_cleanup(struct tc_action *a)
        struct tcf_vlan_params *p;
 
        p = rcu_dereference_protected(v->vlan_p, 1);
-       kfree_rcu(p, rcu);
+       if (p)
+               kfree_rcu(p, rcu);
 }
 
 static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
@@ -313,6 +314,7 @@ static struct pernet_operations vlan_net_ops = {
        .exit_batch = vlan_exit_net,
        .id   = &vlan_net_id,
        .size = sizeof(struct tc_action_net),
+       .async = true,
 };
 
 static int __init vlan_init_module(void)
index 9d1a8bbf81523276c56d5cfe4d647a2ab523bf21..ec5fe8ec0c3e1cb12dfda0a0e6bf9f59a7c01ead 100644 (file)
@@ -1433,6 +1433,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 #ifdef CONFIG_NET_CLS_ACT
        {
                struct tc_action *act;
+               size_t attr_size = 0;
 
                if (exts->police && tb[exts->police]) {
                        act = tcf_action_init_1(net, tp, tb[exts->police],
@@ -1450,7 +1451,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
                        err = tcf_action_init(net, tp, tb[exts->action],
                                              rate_tlv, NULL, ovr, TCA_ACT_BIND,
-                                             &actions, extack);
+                                             &actions, &attr_size, extack);
                        if (err)
                                return err;
                        list_for_each_entry(act, &actions, list)
@@ -1618,6 +1619,7 @@ static struct pernet_operations tcf_net_ops = {
        .exit = tcf_net_exit,
        .id   = &tcf_net_id,
        .size = sizeof(struct tcf_net),
+       .async = true,
 };
 
 static int __init tc_filter_init(void)
index 7d0ce2c40f9333aed8ef6bb4270fa8a8056330a9..d964e60c730eafb79a8a9437325e14c8d6288e40 100644 (file)
@@ -511,6 +511,9 @@ static int fl_set_key_flags(struct nlattr **tb,
 
        fl_set_key_flag(key, mask, flags_key, flags_mask,
                        TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+       fl_set_key_flag(key, mask, flags_key, flags_mask,
+                       TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+                       FLOW_DIS_FIRST_FRAG);
 
        return 0;
 }
@@ -1130,6 +1133,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 
        fl_get_key_flag(flags_key, flags_mask, &key, &mask,
                        TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+       fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+                       TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+                       FLOW_DIS_FIRST_FRAG);
 
        _key = cpu_to_be32(key);
        _mask = cpu_to_be32(mask);
index 27e672c12492022a79b676216fe774294a5cb1aa..68f9d942bed4dd5505aefe25314091ca21078c4a 100644 (file)
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
                               unsigned int len)
 {
+       bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
        const struct Qdisc_class_ops *cops;
        unsigned long cl;
        u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
                 * If child was empty even before update then backlog
                 * counter is screwed and we skip notification because
                 * parent class is already passive.
+                *
+                * If the original child was offloaded then it is allowed
+                * to be seem as empty, so the parent is notified anyway.
                 */
-               notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+               notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+                                                      !qdisc_is_offloaded);
                /* TODO: perform the search on a per txq basis */
                sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
                if (sch == NULL) {
index 190570f21b208d5a17943360a3a6f85e1c2a2187..7e3fbe9cc936be376b66a5b12bf8957c3b601f2c 100644 (file)
@@ -106,6 +106,14 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
 
        __skb_queue_tail(&q->skb_bad_txq, skb);
 
+       if (qdisc_is_percpu_stats(q)) {
+               qdisc_qstats_cpu_backlog_inc(q, skb);
+               qdisc_qstats_cpu_qlen_inc(q);
+       } else {
+               qdisc_qstats_backlog_inc(q, skb);
+               q->q.qlen++;
+       }
+
        if (lock)
                spin_unlock(lock);
 }
@@ -196,14 +204,6 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
                        break;
                if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
                        qdisc_enqueue_skb_bad_txq(q, nskb);
-
-                       if (qdisc_is_percpu_stats(q)) {
-                               qdisc_qstats_cpu_backlog_inc(q, nskb);
-                               qdisc_qstats_cpu_qlen_inc(q);
-                       } else {
-                               qdisc_qstats_backlog_inc(q, nskb);
-                               q->q.qlen++;
-                       }
                        break;
                }
                skb->next = nskb;
@@ -628,6 +628,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
        int band = prio2band[skb->priority & TC_PRIO_MAX];
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
        struct skb_array *q = band2list(priv, band);
+       unsigned int pkt_len = qdisc_pkt_len(skb);
        int err;
 
        err = skb_array_produce(q, skb);
@@ -636,7 +637,10 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
                return qdisc_drop_cpu(skb, qdisc, to_free);
 
        qdisc_qstats_cpu_qlen_inc(qdisc);
-       qdisc_qstats_cpu_backlog_inc(qdisc, skb);
+       /* Note: skb can not be used after skb_array_produce(),
+        * so we better not use qdisc_qstats_cpu_backlog_inc()
+        */
+       this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
        return NET_XMIT_SUCCESS;
 }
 
index 7c179addebcd2967e9fb9c280d8bbe64e33ebe5d..7d6801fc5340eff65b81037519ada115cbc23e20 100644 (file)
@@ -509,7 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
        }
 
        if (unlikely(sch->q.qlen >= sch->limit))
-               return qdisc_drop(skb, sch, to_free);
+               return qdisc_drop_all(skb, sch, to_free);
 
        qdisc_qstats_backlog_inc(sch, skb);
 
index efbf51f3577893d90cb0dd01e261b1c6f7cf622c..222e53d3d27a90cdeb42c6c416c638243c6ad446 100644 (file)
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
        sch->q.qlen = 0;
 }
 
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
 {
-       struct prio_sched_data *q = qdisc_priv(sch);
        struct net_device *dev = qdisc_dev(sch);
        struct tc_prio_qopt_offload opt = {
                .handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
                return -EOPNOTSUPP;
 
-       if (enable) {
+       if (qopt) {
                opt.command = TC_PRIO_REPLACE;
-               opt.replace_params.bands = q->bands;
-               memcpy(&opt.replace_params.priomap, q->prio2band,
+               opt.replace_params.bands = qopt->bands;
+               memcpy(&opt.replace_params.priomap, qopt->priomap,
                       TC_PRIO_MAX + 1);
                opt.replace_params.qstats = &sch->qstats;
        } else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
        struct prio_sched_data *q = qdisc_priv(sch);
 
        tcf_block_put(q->block);
-       prio_offload(sch, false);
+       prio_offload(sch, NULL);
        for (prio = 0; prio < q->bands; prio++)
                qdisc_destroy(q->queues[prio]);
 }
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
                }
        }
 
+       prio_offload(sch, qopt);
        sch_tree_lock(sch);
        q->bands = qopt->bands;
        memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
        }
 
        sch_tree_unlock(sch);
-       prio_offload(sch, true);
        return 0;
 }
 
@@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
                      struct Qdisc **old, struct netlink_ext_ack *extack)
 {
        struct prio_sched_data *q = qdisc_priv(sch);
+       struct tc_prio_qopt_offload graft_offload;
+       struct net_device *dev = qdisc_dev(sch);
        unsigned long band = arg - 1;
+       bool any_qdisc_is_offloaded;
+       int err;
 
        if (new == NULL)
                new = &noop_qdisc;
 
        *old = qdisc_replace(sch, new, &q->queues[band]);
+
+       if (!tc_can_offload(dev))
+               return 0;
+
+       graft_offload.handle = sch->handle;
+       graft_offload.parent = sch->parent;
+       graft_offload.graft_params.band = band;
+       graft_offload.graft_params.child_handle = new->handle;
+       graft_offload.command = TC_PRIO_GRAFT;
+
+       err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+                                           &graft_offload);
+
+       /* Don't report error if the graft is part of destroy operation. */
+       if (err && new != &noop_qdisc) {
+               /* Don't report error if the parent, the old child and the new
+                * one are not offloaded.
+                */
+               any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+               any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+               if (*old)
+                       any_qdisc_is_offloaded |= (*old)->flags &
+                                                  TCQ_F_OFFLOADED;
+
+               if (any_qdisc_is_offloaded)
+                       NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+       }
+
        return 0;
 }
 
index 229172d509cc5d189b926f15ec06785c7f6303ab..03225a8df9730cee7e020331b42a805d42b6f25c 100644 (file)
@@ -188,7 +188,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
        int ret;
 
        if (qdisc_pkt_len(skb) > q->max_size) {
-               if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
+               if (skb_is_gso(skb) &&
+                   skb_gso_validate_mac_len(skb, q->max_size))
                        return tbf_segment(skb, sch, to_free);
                return qdisc_drop(skb, sch, to_free);
        }
index 00667c50efa7abdc45dc0635ec59cc8e70878e54..e64630cd33318ef3c90339bd61388fafd27928f3 100644 (file)
@@ -101,13 +101,14 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp)
                return NULL;
 
        INIT_LIST_HEAD(&new->key_list);
+       refcount_set(&new->refcnt, 1);
        new->key_id = key_id;
 
        return new;
 }
 
 /* Free the shared key structure */
-static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
+static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key)
 {
        BUG_ON(!list_empty(&sh_key->key_list));
        sctp_auth_key_put(sh_key->key);
@@ -115,6 +116,17 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
        kfree(sh_key);
 }
 
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key)
+{
+       if (refcount_dec_and_test(&sh_key->refcnt))
+               sctp_auth_shkey_destroy(sh_key);
+}
+
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key)
+{
+       refcount_inc(&sh_key->refcnt);
+}
+
 /* Destroy the entire key list.  This is done during the
  * associon and endpoint free process.
  */
@@ -128,7 +140,7 @@ void sctp_auth_destroy_keys(struct list_head *keys)
 
        key_for_each_safe(ep_key, tmp, keys) {
                list_del_init(&ep_key->key_list);
-               sctp_auth_shkey_free(ep_key);
+               sctp_auth_shkey_release(ep_key);
        }
 }
 
@@ -409,13 +421,19 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
 
        sctp_auth_key_put(asoc->asoc_shared_key);
        asoc->asoc_shared_key = secret;
+       asoc->shkey = ep_key;
 
        /* Update send queue in case any chunk already in there now
         * needs authenticating
         */
        list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) {
-               if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc))
+               if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) {
                        chunk->auth = 1;
+                       if (!chunk->shkey) {
+                               chunk->shkey = asoc->shkey;
+                               sctp_auth_shkey_hold(chunk->shkey);
+                       }
+               }
        }
 
        return 0;
@@ -431,8 +449,11 @@ struct sctp_shared_key *sctp_auth_get_shkey(
 
        /* First search associations set of endpoint pair shared keys */
        key_for_each(key, &asoc->endpoint_shared_keys) {
-               if (key->key_id == key_id)
-                       return key;
+               if (key->key_id == key_id) {
+                       if (!key->deactivated)
+                               return key;
+                       break;
+               }
        }
 
        return NULL;
@@ -703,16 +724,15 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
  *    after the AUTH chunk in the SCTP packet.
  */
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
-                             struct sk_buff *skb,
-                             struct sctp_auth_chunk *auth,
-                             gfp_t gfp)
+                             struct sk_buff *skb, struct sctp_auth_chunk *auth,
+                             struct sctp_shared_key *ep_key, gfp_t gfp)
 {
-       struct crypto_shash *tfm;
        struct sctp_auth_bytes *asoc_key;
+       struct crypto_shash *tfm;
        __u16 key_id, hmac_id;
-       __u8 *digest;
        unsigned char *end;
        int free_key = 0;
+       __u8 *digest;
 
        /* Extract the info we need:
         * - hmac id
@@ -724,12 +744,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
        if (key_id == asoc->active_key_id)
                asoc_key = asoc->asoc_shared_key;
        else {
-               struct sctp_shared_key *ep_key;
-
-               ep_key = sctp_auth_get_shkey(asoc, key_id);
-               if (!ep_key)
-                       return;
-
+               /* ep_key can't be NULL here */
                asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp);
                if (!asoc_key)
                        return;
@@ -829,7 +844,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
                      struct sctp_association *asoc,
                      struct sctp_authkey *auth_key)
 {
-       struct sctp_shared_key *cur_key = NULL;
+       struct sctp_shared_key *cur_key, *shkey;
        struct sctp_auth_bytes *key;
        struct list_head *sh_keys;
        int replace = 0;
@@ -842,46 +857,34 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
        else
                sh_keys = &ep->endpoint_shared_keys;
 
-       key_for_each(cur_key, sh_keys) {
-               if (cur_key->key_id == auth_key->sca_keynumber) {
+       key_for_each(shkey, sh_keys) {
+               if (shkey->key_id == auth_key->sca_keynumber) {
                        replace = 1;
                        break;
                }
        }
 
-       /* If we are not replacing a key id, we need to allocate
-        * a shared key.
-        */
-       if (!replace) {
-               cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber,
-                                                GFP_KERNEL);
-               if (!cur_key)
-                       return -ENOMEM;
-       }
+       cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL);
+       if (!cur_key)
+               return -ENOMEM;
 
        /* Create a new key data based on the info passed in */
        key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL);
-       if (!key)
-               goto nomem;
+       if (!key) {
+               kfree(cur_key);
+               return -ENOMEM;
+       }
 
        memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
+       cur_key->key = key;
 
-       /* If we are replacing, remove the old keys data from the
-        * key id.  If we are adding new key id, add it to the
-        * list.
-        */
-       if (replace)
-               sctp_auth_key_put(cur_key->key);
-       else
-               list_add(&cur_key->key_list, sh_keys);
+       if (replace) {
+               list_del_init(&shkey->key_list);
+               sctp_auth_shkey_release(shkey);
+       }
+       list_add(&cur_key->key_list, sh_keys);
 
-       cur_key->key = key;
        return 0;
-nomem:
-       if (!replace)
-               sctp_auth_shkey_free(cur_key);
-
-       return -ENOMEM;
 }
 
 int sctp_auth_set_active_key(struct sctp_endpoint *ep,
@@ -905,7 +908,7 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
                }
        }
 
-       if (!found)
+       if (!found || key->deactivated)
                return -EINVAL;
 
        if (asoc) {
@@ -952,7 +955,58 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep,
 
        /* Delete the shared key */
        list_del_init(&key->key_list);
-       sctp_auth_shkey_free(key);
+       sctp_auth_shkey_release(key);
+
+       return 0;
+}
+
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+                          struct sctp_association *asoc, __u16  key_id)
+{
+       struct sctp_shared_key *key;
+       struct list_head *sh_keys;
+       int found = 0;
+
+       /* The key identifier MUST NOT be the current active key
+        * The key identifier MUST correst to an existing key
+        */
+       if (asoc) {
+               if (asoc->active_key_id == key_id)
+                       return -EINVAL;
+
+               sh_keys = &asoc->endpoint_shared_keys;
+       } else {
+               if (ep->active_key_id == key_id)
+                       return -EINVAL;
+
+               sh_keys = &ep->endpoint_shared_keys;
+       }
+
+       key_for_each(key, sh_keys) {
+               if (key->key_id == key_id) {
+                       found = 1;
+                       break;
+               }
+       }
+
+       if (!found)
+               return -EINVAL;
+
+       /* refcnt == 1 and !list_empty mean it's not being used anywhere
+        * and deactivated will be set, so it's time to notify userland
+        * that this shkey can be freed.
+        */
+       if (asoc && !list_empty(&key->key_list) &&
+           refcount_read(&key->refcnt) == 1) {
+               struct sctp_ulpevent *ev;
+
+               ev = sctp_ulpevent_make_authkey(asoc, key->key_id,
+                                               SCTP_AUTH_FREE_KEY, GFP_KERNEL);
+               if (ev)
+                       asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+       }
+
+       key->deactivated = 1;
 
        return 0;
 }
index 991a530c6b31667ec15b95a08035a5e8f71476f4..f889a84f264db9f71cd448804c5a4646c729c664 100644 (file)
@@ -168,6 +168,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 {
        size_t len, first_len, max_data, remaining;
        size_t msg_len = iov_iter_count(from);
+       struct sctp_shared_key *shkey = NULL;
        struct list_head *pos, *temp;
        struct sctp_chunk *chunk;
        struct sctp_datamsg *msg;
@@ -204,6 +205,17 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
                if (hmac_desc)
                        max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
                                              hmac_desc->hmac_len);
+
+               if (sinfo->sinfo_tsn &&
+                   sinfo->sinfo_ssn != asoc->active_key_id) {
+                       shkey = sctp_auth_get_shkey(asoc, sinfo->sinfo_ssn);
+                       if (!shkey) {
+                               err = -EINVAL;
+                               goto errout;
+                       }
+               } else {
+                       shkey = asoc->shkey;
+               }
        }
 
        /* Check what's our max considering the above */
@@ -275,6 +287,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
                if (err < 0)
                        goto errout_chunk_free;
 
+               chunk->shkey = shkey;
+
                /* Put the chunk->skb back into the form expected by send.  */
                __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr -
                                       chunk->skb->data);
index 0247cc432e0293f2881a594367e79c666e9b9f4d..b381d78548ac7391ba64ebff333835d0627ffdb8 100644 (file)
@@ -106,6 +106,7 @@ int sctp_rcv(struct sk_buff *skb)
        int family;
        struct sctp_af *af;
        struct net *net = dev_net(skb->dev);
+       bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb);
 
        if (skb->pkt_type != PACKET_HOST)
                goto discard_it;
@@ -123,8 +124,7 @@ int sctp_rcv(struct sk_buff *skb)
         * it's better to just linearize it otherwise crc computing
         * takes longer.
         */
-       if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
-            skb_linearize(skb)) ||
+       if ((!is_gso && skb_linearize(skb)) ||
            !pskb_may_pull(skb, sizeof(struct sctphdr)))
                goto discard_it;
 
@@ -135,7 +135,7 @@ int sctp_rcv(struct sk_buff *skb)
        if (skb_csum_unnecessary(skb))
                __skb_decr_checksum_unnecessary(skb);
        else if (!sctp_checksum_disable &&
-                !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+                !is_gso &&
                 sctp_rcv_checksum(net, skb) < 0)
                goto discard_it;
        skb->csum_valid = 1;
@@ -1218,7 +1218,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
         * issue as packets hitting this are mostly INIT or INIT-ACK and
         * those cannot be on GSO-style anyway.
         */
-       if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
+       if (skb_is_gso(skb) && skb_is_gso_sctp(skb))
                return NULL;
 
        ch = (struct sctp_chunkhdr *)skb->data;
index 48392552ee7c1ea75a134375b55ffb0ebf59064e..23ebc5318edc47c51230a95256064f5b2974d2f4 100644 (file)
@@ -170,7 +170,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 
                chunk = list_entry(entry, struct sctp_chunk, list);
 
-               if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
+               if (skb_is_gso(chunk->skb) && skb_is_gso_sctp(chunk->skb)) {
                        /* GSO-marked skbs but without frags, handle
                         * them normally
                         */
index aeea6da814417c4541ec95f0272aa7587a4b3ecd..fd2684ad94c879357bcf8e1f672e757f4116365a 100644 (file)
@@ -130,11 +130,3 @@ void sctp_dbg_objcnt_init(struct net *net)
        if (!ent)
                pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
 }
-
-/* Cleanup the objcount entry in the proc filesystem.  */
-void sctp_dbg_objcnt_exit(struct net *net)
-{
-       remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp);
-}
-
-
index 35bc7106d1827a80f1135fe4797bec36cb7c669a..123e9f2dc22652ba0e7bf26cd329682b9afefa0c 100644 (file)
@@ -45,7 +45,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
        struct sk_buff *segs = ERR_PTR(-EINVAL);
        struct sctphdr *sh;
 
-       if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+       if (!skb_is_gso_sctp(skb))
                goto out;
 
        sh = sctp_hdr(skb);
index 01a26ee051e3878ced4253429b5017708d0c138f..d6e1c90cc09afdaaa14360d7df613997d49746b9 100644 (file)
@@ -241,10 +241,13 @@ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
        if (!chunk->auth)
                return retval;
 
-       auth = sctp_make_auth(asoc);
+       auth = sctp_make_auth(asoc, chunk->shkey->key_id);
        if (!auth)
                return retval;
 
+       auth->shkey = chunk->shkey;
+       sctp_auth_shkey_hold(auth->shkey);
+
        retval = __sctp_packet_append_chunk(pkt, auth);
 
        if (retval != SCTP_XMIT_OK)
@@ -490,7 +493,8 @@ static int sctp_packet_pack(struct sctp_packet *packet,
                }
 
                if (auth) {
-                       sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
+                       sctp_auth_calculate_hmac(tp->asoc, nskb, auth,
+                                                packet->auth->shkey, gfp);
                        /* free auth if no more chunks, or add it back */
                        if (list_empty(&packet->chunk_list))
                                sctp_chunk_free(packet->auth);
@@ -770,6 +774,16 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
        enum sctp_xmit retval = SCTP_XMIT_OK;
        size_t psize, pmtu, maxsize;
 
+       /* Don't bundle in this packet if this chunk's auth key doesn't
+        * match other chunks already enqueued on this packet. Also,
+        * don't bundle the chunk with auth key if other chunks in this
+        * packet don't have auth key.
+        */
+       if ((packet->auth && chunk->shkey != packet->auth->shkey) ||
+           (!packet->auth && chunk->shkey &&
+            chunk->chunk_hdr->type != SCTP_CID_AUTH))
+               return SCTP_XMIT_PMTU_FULL;
+
        psize = packet->size;
        if (packet->transport->asoc)
                pmtu = packet->transport->asoc->pathmtu;
index 537545ebcb0ec83de4d9e551550332f7394855ea..17d0155d9de3bb7fb0d30bdd5939c0a4bfa8b4a7 100644 (file)
@@ -101,25 +101,6 @@ static const struct file_operations sctp_snmp_seq_fops = {
        .release = single_release_net,
 };
 
-/* Set up the proc fs entry for 'snmp' object. */
-int __net_init sctp_snmp_proc_init(struct net *net)
-{
-       struct proc_dir_entry *p;
-
-       p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
-                       &sctp_snmp_seq_fops);
-       if (!p)
-               return -ENOMEM;
-
-       return 0;
-}
-
-/* Cleanup the proc fs entry for 'snmp' object. */
-void sctp_snmp_proc_exit(struct net *net)
-{
-       remove_proc_entry("snmp", net->sctp.proc_net_sctp);
-}
-
 /* Dump local addresses of an association/endpoint. */
 static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
 {
@@ -259,25 +240,6 @@ static const struct file_operations sctp_eps_seq_fops = {
        .release = seq_release_net,
 };
 
-/* Set up the proc fs entry for 'eps' object. */
-int __net_init sctp_eps_proc_init(struct net *net)
-{
-       struct proc_dir_entry *p;
-
-       p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
-                       &sctp_eps_seq_fops);
-       if (!p)
-               return -ENOMEM;
-
-       return 0;
-}
-
-/* Cleanup the proc fs entry for 'eps' object. */
-void sctp_eps_proc_exit(struct net *net)
-{
-       remove_proc_entry("eps", net->sctp.proc_net_sctp);
-}
-
 struct sctp_ht_iter {
        struct seq_net_private p;
        struct rhashtable_iter hti;
@@ -390,25 +352,6 @@ static const struct file_operations sctp_assocs_seq_fops = {
        .release = seq_release_net,
 };
 
-/* Set up the proc fs entry for 'assocs' object. */
-int __net_init sctp_assocs_proc_init(struct net *net)
-{
-       struct proc_dir_entry *p;
-
-       p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
-                       &sctp_assocs_seq_fops);
-       if (!p)
-               return -ENOMEM;
-
-       return 0;
-}
-
-/* Cleanup the proc fs entry for 'assocs' object. */
-void sctp_assocs_proc_exit(struct net *net)
-{
-       remove_proc_entry("assocs", net->sctp.proc_net_sctp);
-}
-
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 {
        struct sctp_association *assoc;
@@ -488,12 +431,6 @@ static const struct seq_operations sctp_remaddr_ops = {
        .show  = sctp_remaddr_seq_show,
 };
 
-/* Cleanup the proc fs entry for 'remaddr' object. */
-void sctp_remaddr_proc_exit(struct net *net)
-{
-       remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
-}
-
 static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
 {
        return seq_open_net(inode, file, &sctp_remaddr_ops,
@@ -507,13 +444,28 @@ static const struct file_operations sctp_remaddr_seq_fops = {
        .release = seq_release_net,
 };
 
-int __net_init sctp_remaddr_proc_init(struct net *net)
+/* Set up the proc fs entry for the SCTP protocol. */
+int __net_init sctp_proc_init(struct net *net)
 {
-       struct proc_dir_entry *p;
-
-       p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
-                       &sctp_remaddr_seq_fops);
-       if (!p)
+       net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
+       if (!net->sctp.proc_net_sctp)
                return -ENOMEM;
+       if (!proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
+                       &sctp_snmp_seq_fops))
+               goto cleanup;
+       if (!proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
+                       &sctp_eps_seq_fops))
+               goto cleanup;
+       if (!proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
+                       &sctp_assocs_seq_fops))
+               goto cleanup;
+       if (!proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
+                       &sctp_remaddr_seq_fops))
+               goto cleanup;
        return 0;
+
+cleanup:
+       remove_proc_subtree("sctp", net->proc_net);
+       net->sctp.proc_net_sctp = NULL;
+       return -ENOMEM;
 }
index 91813e686c6737aaf20ca1ea2f3978a833c5eff6..493b817f6a2a370b7c6a4a19dad08c82e96e4ece 100644 (file)
@@ -80,56 +80,6 @@ long sysctl_sctp_mem[3];
 int sysctl_sctp_rmem[3];
 int sysctl_sctp_wmem[3];
 
-/* Set up the proc fs entry for the SCTP protocol. */
-static int __net_init sctp_proc_init(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
-       net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
-       if (!net->sctp.proc_net_sctp)
-               goto out_proc_net_sctp;
-       if (sctp_snmp_proc_init(net))
-               goto out_snmp_proc_init;
-       if (sctp_eps_proc_init(net))
-               goto out_eps_proc_init;
-       if (sctp_assocs_proc_init(net))
-               goto out_assocs_proc_init;
-       if (sctp_remaddr_proc_init(net))
-               goto out_remaddr_proc_init;
-
-       return 0;
-
-out_remaddr_proc_init:
-       sctp_assocs_proc_exit(net);
-out_assocs_proc_init:
-       sctp_eps_proc_exit(net);
-out_eps_proc_init:
-       sctp_snmp_proc_exit(net);
-out_snmp_proc_init:
-       remove_proc_entry("sctp", net->proc_net);
-       net->sctp.proc_net_sctp = NULL;
-out_proc_net_sctp:
-       return -ENOMEM;
-#endif /* CONFIG_PROC_FS */
-       return 0;
-}
-
-/* Clean up the proc fs entry for the SCTP protocol.
- * Note: Do not make this __exit as it is used in the init error
- * path.
- */
-static void sctp_proc_exit(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
-       sctp_snmp_proc_exit(net);
-       sctp_eps_proc_exit(net);
-       sctp_assocs_proc_exit(net);
-       sctp_remaddr_proc_exit(net);
-
-       remove_proc_entry("sctp", net->proc_net);
-       net->sctp.proc_net_sctp = NULL;
-#endif
-}
-
 /* Private helper to extract ipv4 address and stash them in
  * the protocol structure.
  */
@@ -1285,10 +1235,12 @@ static int __net_init sctp_defaults_init(struct net *net)
        if (status)
                goto err_init_mibs;
 
+#ifdef CONFIG_PROC_FS
        /* Initialize proc fs directory.  */
        status = sctp_proc_init(net);
        if (status)
                goto err_init_proc;
+#endif
 
        sctp_dbg_objcnt_init(net);
 
@@ -1320,9 +1272,10 @@ static void __net_exit sctp_defaults_exit(struct net *net)
        sctp_free_addr_wq(net);
        sctp_free_local_addr_list(net);
 
-       sctp_dbg_objcnt_exit(net);
-
-       sctp_proc_exit(net);
+#ifdef CONFIG_PROC_FS
+       remove_proc_subtree("sctp", net->proc_net);
+       net->sctp.proc_net_sctp = NULL;
+#endif
        cleanup_sctp_mibs(net);
        sctp_sysctl_net_unregister(net);
 }
@@ -1330,6 +1283,7 @@ static void __net_exit sctp_defaults_exit(struct net *net)
 static struct pernet_operations sctp_defaults_ops = {
        .init = sctp_defaults_init,
        .exit = sctp_defaults_exit,
+       .async = true,
 };
 
 static int __net_init sctp_ctrlsock_init(struct net *net)
@@ -1353,6 +1307,7 @@ static void __net_init sctp_ctrlsock_exit(struct net *net)
 static struct pernet_operations sctp_ctrlsock_ops = {
        .init = sctp_ctrlsock_init,
        .exit = sctp_ctrlsock_exit,
+       .async = true,
 };
 
 /* Initialize the universe into something sensible.  */
index d01475f5f710667a40b56b7fff2a939e0a37e59e..cc20bc39ee7ca97330ba8cc3202c01079f5e4e72 100644 (file)
@@ -87,7 +87,28 @@ static void  *sctp_addto_chunk_fixed(struct sctp_chunk *, int len,
 /* Control chunk destructor */
 static void sctp_control_release_owner(struct sk_buff *skb)
 {
-       /*TODO: do memory release */
+       struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg;
+
+       if (chunk->shkey) {
+               struct sctp_shared_key *shkey = chunk->shkey;
+               struct sctp_association *asoc = chunk->asoc;
+
+               /* refcnt == 2 and !list_empty mean after this release, it's
+                * not being used anywhere, and it's time to notify userland
+                * that this shkey can be freed if it's been deactivated.
+                */
+               if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+                   refcount_read(&shkey->refcnt) == 2) {
+                       struct sctp_ulpevent *ev;
+
+                       ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+                                                       SCTP_AUTH_FREE_KEY,
+                                                       GFP_KERNEL);
+                       if (ev)
+                               asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+               }
+               sctp_auth_shkey_release(chunk->shkey);
+       }
 }
 
 static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
@@ -102,7 +123,12 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
         *
         *  For now don't do anything for now.
         */
+       if (chunk->auth) {
+               chunk->shkey = asoc->shkey;
+               sctp_auth_shkey_hold(chunk->shkey);
+       }
        skb->sk = asoc ? asoc->base.sk : NULL;
+       skb_shinfo(skb)->destructor_arg = chunk;
        skb->destructor = sctp_control_release_owner;
 }
 
@@ -1271,7 +1297,8 @@ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
        return retval;
 }
 
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+                                 __u16 key_id)
 {
        struct sctp_authhdr auth_hdr;
        struct sctp_hmac *hmac_desc;
@@ -1289,7 +1316,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
                return NULL;
 
        auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
-       auth_hdr.shkey_id = htons(asoc->active_key_id);
+       auth_hdr.shkey_id = htons(key_id);
 
        retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
                                                 &auth_hdr);
index b71e7fb0a20af5f6bb6810e03c44154cd2c2b9f6..298112ca8c069e3bc473e73080c37bbd61253f50 100644 (file)
@@ -1049,6 +1049,16 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
                asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
 }
 
+static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands,
+                                 struct sctp_association *asoc)
+{
+       struct sctp_ulpevent *ev;
+
+       ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC);
+       if (ev)
+               asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+}
+
 /* Helper function to generate an adaptation indication event */
 static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
                                    struct sctp_association *asoc)
@@ -1755,6 +1765,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
                case SCTP_CMD_ADAPTATION_IND:
                        sctp_cmd_adaptation_ind(commands, asoc);
                        break;
+               case SCTP_CMD_PEER_NO_AUTH:
+                       sctp_cmd_peer_no_auth(commands, asoc);
+                       break;
 
                case SCTP_CMD_ASSOC_SHKEY:
                        error = sctp_auth_asoc_init_active_key(asoc,
index eb7905ffe5f2c5706655c305d6c72f3a366f6d7c..cc56a67dbb4ddf633d2277ca36e8620bf5479fb2 100644 (file)
@@ -659,7 +659,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
                                         void *arg,
                                         struct sctp_cmd_seq *commands)
 {
-       struct sctp_ulpevent *ev, *ai_ev = NULL;
+       struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL;
        struct sctp_association *new_asoc;
        struct sctp_init_chunk *peer_init;
        struct sctp_chunk *chunk = arg;
@@ -820,6 +820,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
                        goto nomem_aiev;
        }
 
+       if (!new_asoc->peer.auth_capable) {
+               auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0,
+                                                    SCTP_AUTH_NO_AUTH,
+                                                    GFP_ATOMIC);
+               if (!auth_ev)
+                       goto nomem_authev;
+       }
+
        /* Add all the state machine commands now since we've created
         * everything.  This way we don't introduce memory corruptions
         * during side-effect processing and correclty count established
@@ -847,8 +855,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
                sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
                                SCTP_ULPEVENT(ai_ev));
 
+       if (auth_ev)
+               sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+                               SCTP_ULPEVENT(auth_ev));
+
        return SCTP_DISPOSITION_CONSUME;
 
+nomem_authev:
+       sctp_ulpevent_free(ai_ev);
 nomem_aiev:
        sctp_ulpevent_free(ev);
 nomem_ev:
@@ -953,6 +967,15 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
                                SCTP_ULPEVENT(ev));
        }
 
+       if (!asoc->peer.auth_capable) {
+               ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH,
+                                               GFP_ATOMIC);
+               if (!ev)
+                       goto nomem;
+               sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+                               SCTP_ULPEVENT(ev));
+       }
+
        return SCTP_DISPOSITION_CONSUME;
 nomem:
        return SCTP_DISPOSITION_NOMEM;
@@ -1908,6 +1931,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
        if (asoc->peer.adaptation_ind)
                sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL());
 
+       if (!asoc->peer.auth_capable)
+               sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL());
+
        return SCTP_DISPOSITION_CONSUME;
 
 nomem:
@@ -1954,7 +1980,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
                                        struct sctp_cmd_seq *commands,
                                        struct sctp_association *new_asoc)
 {
-       struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
+       struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL;
        struct sctp_chunk *repl;
 
        /* Clarification from Implementor's Guide:
@@ -2001,6 +2027,14 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
                                goto nomem;
 
                }
+
+               if (!asoc->peer.auth_capable) {
+                       auth_ev = sctp_ulpevent_make_authkey(asoc, 0,
+                                                            SCTP_AUTH_NO_AUTH,
+                                                            GFP_ATOMIC);
+                       if (!auth_ev)
+                               goto nomem;
+               }
        }
 
        repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -2015,10 +2049,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
        if (ai_ev)
                sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
                                        SCTP_ULPEVENT(ai_ev));
+       if (auth_ev)
+               sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+                               SCTP_ULPEVENT(auth_ev));
 
        return SCTP_DISPOSITION_CONSUME;
 
 nomem:
+       if (auth_ev)
+               sctp_ulpevent_free(auth_ev);
        if (ai_ev)
                sctp_ulpevent_free(ai_ev);
        if (ev)
@@ -4114,6 +4153,7 @@ static enum sctp_ierror sctp_sf_authenticate(
                                        const union sctp_subtype type,
                                        struct sctp_chunk *chunk)
 {
+       struct sctp_shared_key *sh_key = NULL;
        struct sctp_authhdr *auth_hdr;
        __u8 *save_digest, *digest;
        struct sctp_hmac *hmac;
@@ -4135,9 +4175,11 @@ static enum sctp_ierror sctp_sf_authenticate(
         * configured
         */
        key_id = ntohs(auth_hdr->shkey_id);
-       if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id))
-               return SCTP_IERROR_AUTH_BAD_KEYID;
-
+       if (key_id != asoc->active_key_id) {
+               sh_key = sctp_auth_get_shkey(asoc, key_id);
+               if (!sh_key)
+                       return SCTP_IERROR_AUTH_BAD_KEYID;
+       }
 
        /* Make sure that the length of the signature matches what
         * we expect.
@@ -4166,7 +4208,7 @@ static enum sctp_ierror sctp_sf_authenticate(
 
        sctp_auth_calculate_hmac(asoc, chunk->skb,
                                 (struct sctp_auth_chunk *)chunk->chunk_hdr,
-                                GFP_ATOMIC);
+                                sh_key, GFP_ATOMIC);
 
        /* Discard the packet if the digests do not match */
        if (memcmp(save_digest, digest, sig_len)) {
@@ -4243,7 +4285,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
                struct sctp_ulpevent *ev;
 
                ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id),
-                                   SCTP_AUTH_NEWKEY, GFP_ATOMIC);
+                                   SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
 
                if (!ev)
                        return -ENOMEM;
index bf271f8c2dc9b28c992f9be3bc9b6a7bfa5fdb82..7a10ae3c3d8293abecd955ff6a5a19e60dcc6f95 100644 (file)
@@ -156,6 +156,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
        /* The sndbuf space is tracked per association.  */
        sctp_association_hold(asoc);
 
+       if (chunk->shkey)
+               sctp_auth_shkey_hold(chunk->shkey);
+
        skb_set_owner_w(chunk->skb, sk);
 
        chunk->skb->destructor = sctp_wfree;
@@ -1606,396 +1609,303 @@ static int sctp_error(struct sock *sk, int flags, int err)
 static int sctp_msghdr_parse(const struct msghdr *msg,
                             struct sctp_cmsgs *cmsgs);
 
-static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs,
+                             struct sctp_sndrcvinfo *srinfo,
+                             const struct msghdr *msg, size_t msg_len)
 {
-       struct net *net = sock_net(sk);
-       struct sctp_sock *sp;
-       struct sctp_endpoint *ep;
-       struct sctp_association *new_asoc = NULL, *asoc = NULL;
-       struct sctp_transport *transport, *chunk_tp;
-       struct sctp_chunk *chunk;
-       union sctp_addr to;
-       struct sockaddr *msg_name = NULL;
-       struct sctp_sndrcvinfo default_sinfo;
-       struct sctp_sndrcvinfo *sinfo;
-       struct sctp_initmsg *sinit;
-       sctp_assoc_t associd = 0;
-       struct sctp_cmsgs cmsgs = { NULL };
-       enum sctp_scope scope;
-       bool fill_sinfo_ttl = false, wait_connect = false;
-       struct sctp_datamsg *datamsg;
-       int msg_flags = msg->msg_flags;
-       __u16 sinfo_flags = 0;
-       long timeo;
+       __u16 sflags;
        int err;
 
-       err = 0;
-       sp = sctp_sk(sk);
-       ep = sp->ep;
-
-       pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk,
-                msg, msg_len, ep);
+       if (sctp_sstate(sk, LISTENING) && sctp_style(sk, TCP))
+               return -EPIPE;
 
-       /* We cannot send a message over a TCP-style listening socket. */
-       if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
-               err = -EPIPE;
-               goto out_nounlock;
-       }
+       if (msg_len > sk->sk_sndbuf)
+               return -EMSGSIZE;
 
-       /* Parse out the SCTP CMSGs.  */
-       err = sctp_msghdr_parse(msg, &cmsgs);
+       memset(cmsgs, 0, sizeof(*cmsgs));
+       err = sctp_msghdr_parse(msg, cmsgs);
        if (err) {
                pr_debug("%s: msghdr parse err:%x\n", __func__, err);
-               goto out_nounlock;
+               return err;
        }
 
-       /* Fetch the destination address for this packet.  This
-        * address only selects the association--it is not necessarily
-        * the address we will send to.
-        * For a peeled-off socket, msg_name is ignored.
-        */
-       if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
-               int msg_namelen = msg->msg_namelen;
+       memset(srinfo, 0, sizeof(*srinfo));
+       if (cmsgs->srinfo) {
+               srinfo->sinfo_stream = cmsgs->srinfo->sinfo_stream;
+               srinfo->sinfo_flags = cmsgs->srinfo->sinfo_flags;
+               srinfo->sinfo_ppid = cmsgs->srinfo->sinfo_ppid;
+               srinfo->sinfo_context = cmsgs->srinfo->sinfo_context;
+               srinfo->sinfo_assoc_id = cmsgs->srinfo->sinfo_assoc_id;
+               srinfo->sinfo_timetolive = cmsgs->srinfo->sinfo_timetolive;
+       }
 
-               err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name,
-                                      msg_namelen);
-               if (err)
-                       return err;
+       if (cmsgs->sinfo) {
+               srinfo->sinfo_stream = cmsgs->sinfo->snd_sid;
+               srinfo->sinfo_flags = cmsgs->sinfo->snd_flags;
+               srinfo->sinfo_ppid = cmsgs->sinfo->snd_ppid;
+               srinfo->sinfo_context = cmsgs->sinfo->snd_context;
+               srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id;
+       }
 
-               if (msg_namelen > sizeof(to))
-                       msg_namelen = sizeof(to);
-               memcpy(&to, msg->msg_name, msg_namelen);
-               msg_name = msg->msg_name;
+       if (cmsgs->prinfo) {
+               srinfo->sinfo_timetolive = cmsgs->prinfo->pr_value;
+               SCTP_PR_SET_POLICY(srinfo->sinfo_flags,
+                                  cmsgs->prinfo->pr_policy);
        }
 
-       sinit = cmsgs.init;
-       if (cmsgs.sinfo != NULL) {
-               memset(&default_sinfo, 0, sizeof(default_sinfo));
-               default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid;
-               default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags;
-               default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid;
-               default_sinfo.sinfo_context = cmsgs.sinfo->snd_context;
-               default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id;
+       sflags = srinfo->sinfo_flags;
+       if (!sflags && msg_len)
+               return 0;
 
-               sinfo = &default_sinfo;
-               fill_sinfo_ttl = true;
-       } else {
-               sinfo = cmsgs.srinfo;
-       }
-       /* Did the user specify SNDINFO/SNDRCVINFO? */
-       if (sinfo) {
-               sinfo_flags = sinfo->sinfo_flags;
-               associd = sinfo->sinfo_assoc_id;
-       }
+       if (sctp_style(sk, TCP) && (sflags & (SCTP_EOF | SCTP_ABORT)))
+               return -EINVAL;
 
-       pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__,
-                msg_len, sinfo_flags);
+       if (((sflags & SCTP_EOF) && msg_len > 0) ||
+           (!(sflags & (SCTP_EOF | SCTP_ABORT)) && msg_len == 0))
+               return -EINVAL;
 
-       /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
-       if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
-               err = -EINVAL;
-               goto out_nounlock;
-       }
+       if ((sflags & SCTP_ADDR_OVER) && !msg->msg_name)
+               return -EINVAL;
 
-       /* If SCTP_EOF is set, no data can be sent. Disallow sending zero
-        * length messages when SCTP_EOF|SCTP_ABORT is not set.
-        * If SCTP_ABORT is set, the message length could be non zero with
-        * the msg_iov set to the user abort reason.
-        */
-       if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) ||
-           (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) {
-               err = -EINVAL;
-               goto out_nounlock;
-       }
+       return 0;
+}
 
-       /* If SCTP_ADDR_OVER is set, there must be an address
-        * specified in msg_name.
-        */
-       if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) {
-               err = -EINVAL;
-               goto out_nounlock;
-       }
+static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
+                                struct sctp_cmsgs *cmsgs,
+                                union sctp_addr *daddr,
+                                struct sctp_transport **tp)
+{
+       struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+       struct net *net = sock_net(sk);
+       struct sctp_association *asoc;
+       enum sctp_scope scope;
+       struct cmsghdr *cmsg;
+       int err;
 
-       transport = NULL;
+       *tp = NULL;
 
-       pr_debug("%s: about to look up association\n", __func__);
+       if (sflags & (SCTP_EOF | SCTP_ABORT))
+               return -EINVAL;
 
-       lock_sock(sk);
+       if (sctp_style(sk, TCP) && (sctp_sstate(sk, ESTABLISHED) ||
+                                   sctp_sstate(sk, CLOSING)))
+               return -EADDRNOTAVAIL;
 
-       /* If a msg_name has been specified, assume this is to be used.  */
-       if (msg_name) {
-               /* Look for a matching association on the endpoint. */
-               asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+       if (sctp_endpoint_is_peeled_off(ep, daddr))
+               return -EADDRNOTAVAIL;
 
-               /* If we could not find a matching association on the
-                * endpoint, make sure that it is not a TCP-style
-                * socket that already has an association or there is
-                * no peeled-off association on another socket.
-                */
-               if (!asoc &&
-                   ((sctp_style(sk, TCP) &&
-                     (sctp_sstate(sk, ESTABLISHED) ||
-                      sctp_sstate(sk, CLOSING))) ||
-                    sctp_endpoint_is_peeled_off(ep, &to))) {
-                       err = -EADDRNOTAVAIL;
-                       goto out_unlock;
-               }
+       if (!ep->base.bind_addr.port) {
+               if (sctp_autobind(sk))
+                       return -EAGAIN;
        } else {
-               asoc = sctp_id2assoc(sk, associd);
-               if (!asoc) {
-                       err = -EPIPE;
-                       goto out_unlock;
-               }
+               if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+                   !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+                       return -EACCES;
        }
 
-       if (asoc) {
-               pr_debug("%s: just looked up association:%p\n", __func__, asoc);
+       scope = sctp_scope(daddr);
 
-               /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
-                * socket that has an association in CLOSED state. This can
-                * happen when an accepted socket has an association that is
-                * already CLOSED.
-                */
-               if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) {
-                       err = -EPIPE;
-                       goto out_unlock;
-               }
+       asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+       if (!asoc)
+               return -ENOMEM;
+
+       if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
+               err = -ENOMEM;
+               goto free;
+       }
 
-               if (sinfo_flags & SCTP_EOF) {
-                       pr_debug("%s: shutting down association:%p\n",
-                                __func__, asoc);
+       if (cmsgs->init) {
+               struct sctp_initmsg *init = cmsgs->init;
 
-                       sctp_primitive_SHUTDOWN(net, asoc, NULL);
-                       err = 0;
-                       goto out_unlock;
+               if (init->sinit_num_ostreams) {
+                       __u16 outcnt = init->sinit_num_ostreams;
+
+                       asoc->c.sinit_num_ostreams = outcnt;
+                       /* outcnt has been changed, need to re-init stream */
+                       err = sctp_stream_init(&asoc->stream, outcnt, 0,
+                                              GFP_KERNEL);
+                       if (err)
+                               goto free;
                }
-               if (sinfo_flags & SCTP_ABORT) {
 
-                       chunk = sctp_make_abort_user(asoc, msg, msg_len);
-                       if (!chunk) {
-                               err = -ENOMEM;
-                               goto out_unlock;
-                       }
+               if (init->sinit_max_instreams)
+                       asoc->c.sinit_max_instreams = init->sinit_max_instreams;
 
-                       pr_debug("%s: aborting association:%p\n",
-                                __func__, asoc);
+               if (init->sinit_max_attempts)
+                       asoc->max_init_attempts = init->sinit_max_attempts;
 
-                       sctp_primitive_ABORT(net, asoc, chunk);
-                       err = 0;
-                       goto out_unlock;
-               }
+               if (init->sinit_max_init_timeo)
+                       asoc->max_init_timeo =
+                               msecs_to_jiffies(init->sinit_max_init_timeo);
        }
 
-       /* Do we need to create the association?  */
-       if (!asoc) {
-               pr_debug("%s: there is no association yet\n", __func__);
+       *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+       if (!*tp) {
+               err = -ENOMEM;
+               goto free;
+       }
 
-               if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
-                       err = -EINVAL;
-                       goto out_unlock;
-               }
+       if (!cmsgs->addrs_msg)
+               return 0;
 
-               /* Check for invalid stream against the stream counts,
-                * either the default or the user specified stream counts.
-                */
-               if (sinfo) {
-                       if (!sinit || !sinit->sinit_num_ostreams) {
-                               /* Check against the defaults. */
-                               if (sinfo->sinfo_stream >=
-                                   sp->initmsg.sinit_num_ostreams) {
-                                       err = -EINVAL;
-                                       goto out_unlock;
-                               }
-                       } else {
-                               /* Check against the requested.  */
-                               if (sinfo->sinfo_stream >=
-                                   sinit->sinit_num_ostreams) {
-                                       err = -EINVAL;
-                                       goto out_unlock;
-                               }
-                       }
-               }
+       /* sendv addr list parse */
+       for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
+               struct sctp_transport *transport;
+               struct sctp_association *old;
+               union sctp_addr _daddr;
+               int dlen;
 
-               /*
-                * API 3.1.2 bind() - UDP Style Syntax
-                * If a bind() or sctp_bindx() is not called prior to a
-                * sendmsg() call that initiates a new association, the
-                * system picks an ephemeral port and will choose an address
-                * set equivalent to binding with a wildcard address.
-                */
-               if (!ep->base.bind_addr.port) {
-                       if (sctp_autobind(sk)) {
-                               err = -EAGAIN;
-                               goto out_unlock;
+               if (cmsg->cmsg_level != IPPROTO_SCTP ||
+                   (cmsg->cmsg_type != SCTP_DSTADDRV4 &&
+                    cmsg->cmsg_type != SCTP_DSTADDRV6))
+                       continue;
+
+               daddr = &_daddr;
+               memset(daddr, 0, sizeof(*daddr));
+               dlen = cmsg->cmsg_len - sizeof(struct cmsghdr);
+               if (cmsg->cmsg_type == SCTP_DSTADDRV4) {
+                       if (dlen < sizeof(struct in_addr)) {
+                               err = -EINVAL;
+                               goto free;
                        }
+
+                       dlen = sizeof(struct in_addr);
+                       daddr->v4.sin_family = AF_INET;
+                       daddr->v4.sin_port = htons(asoc->peer.port);
+                       memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen);
                } else {
-                       /*
-                        * If an unprivileged user inherits a one-to-many
-                        * style socket with open associations on a privileged
-                        * port, it MAY be permitted to accept new associations,
-                        * but it SHOULD NOT be permitted to open new
-                        * associations.
-                        */
-                       if (ep->base.bind_addr.port < inet_prot_sock(net) &&
-                           !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
-                               err = -EACCES;
-                               goto out_unlock;
+                       if (dlen < sizeof(struct in6_addr)) {
+                               err = -EINVAL;
+                               goto free;
                        }
-               }
 
-               scope = sctp_scope(&to);
-               new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-               if (!new_asoc) {
-                       err = -ENOMEM;
-                       goto out_unlock;
+                       dlen = sizeof(struct in6_addr);
+                       daddr->v6.sin6_family = AF_INET6;
+                       daddr->v6.sin6_port = htons(asoc->peer.port);
+                       memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
                }
-               asoc = new_asoc;
-               err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
-               if (err < 0) {
-                       err = -ENOMEM;
-                       goto out_free;
+               err = sctp_verify_addr(sk, daddr, sizeof(*daddr));
+               if (err)
+                       goto free;
+
+               old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+               if (old && old != asoc) {
+                       if (old->state >= SCTP_STATE_ESTABLISHED)
+                               err = -EISCONN;
+                       else
+                               err = -EALREADY;
+                       goto free;
                }
 
-               /* If the SCTP_INIT ancillary data is specified, set all
-                * the association init values accordingly.
-                */
-               if (sinit) {
-                       if (sinit->sinit_num_ostreams) {
-                               __u16 outcnt = sinit->sinit_num_ostreams;
-
-                               asoc->c.sinit_num_ostreams = outcnt;
-                               /* outcnt has been changed, so re-init stream */
-                               err = sctp_stream_init(&asoc->stream, outcnt, 0,
-                                                      GFP_KERNEL);
-                               if (err)
-                                       goto out_free;
-                       }
-                       if (sinit->sinit_max_instreams) {
-                               asoc->c.sinit_max_instreams =
-                                       sinit->sinit_max_instreams;
-                       }
-                       if (sinit->sinit_max_attempts) {
-                               asoc->max_init_attempts
-                                       = sinit->sinit_max_attempts;
-                       }
-                       if (sinit->sinit_max_init_timeo) {
-                               asoc->max_init_timeo =
-                                msecs_to_jiffies(sinit->sinit_max_init_timeo);
-                       }
+               if (sctp_endpoint_is_peeled_off(ep, daddr)) {
+                       err = -EADDRNOTAVAIL;
+                       goto free;
                }
 
-               /* Prime the peer's transport structures.  */
-               transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
+               transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
+                                               SCTP_UNKNOWN);
                if (!transport) {
                        err = -ENOMEM;
-                       goto out_free;
+                       goto free;
                }
        }
 
-       /* ASSERT: we have a valid association at this point.  */
-       pr_debug("%s: we have a valid association\n", __func__);
+       return 0;
 
-       if (!sinfo) {
-               /* If the user didn't specify SNDINFO/SNDRCVINFO, make up
-                * one with some defaults.
-                */
-               memset(&default_sinfo, 0, sizeof(default_sinfo));
-               default_sinfo.sinfo_stream = asoc->default_stream;
-               default_sinfo.sinfo_flags = asoc->default_flags;
-               default_sinfo.sinfo_ppid = asoc->default_ppid;
-               default_sinfo.sinfo_context = asoc->default_context;
-               default_sinfo.sinfo_timetolive = asoc->default_timetolive;
-               default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
-
-               sinfo = &default_sinfo;
-       } else if (fill_sinfo_ttl) {
-               /* In case SNDINFO was specified, we still need to fill
-                * it with a default ttl from the assoc here.
-                */
-               sinfo->sinfo_timetolive = asoc->default_timetolive;
-       }
+free:
+       sctp_association_free(asoc);
+       return err;
+}
 
-       /* API 7.1.7, the sndbuf size per association bounds the
-        * maximum size of data that can be sent in a single send call.
-        */
-       if (msg_len > sk->sk_sndbuf) {
-               err = -EMSGSIZE;
-               goto out_free;
+static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
+                                    __u16 sflags, struct msghdr *msg,
+                                    size_t msg_len)
+{
+       struct sock *sk = asoc->base.sk;
+       struct net *net = sock_net(sk);
+
+       if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP))
+               return -EPIPE;
+
+       if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP) &&
+           !sctp_state(asoc, ESTABLISHED))
+               return 0;
+
+       if (sflags & SCTP_EOF) {
+               pr_debug("%s: shutting down association:%p\n", __func__, asoc);
+               sctp_primitive_SHUTDOWN(net, asoc, NULL);
+
+               return 0;
        }
 
-       if (asoc->pmtu_pending)
-               sctp_assoc_pending_pmtu(asoc);
+       if (sflags & SCTP_ABORT) {
+               struct sctp_chunk *chunk;
 
-       /* If fragmentation is disabled and the message length exceeds the
-        * association fragmentation point, return EMSGSIZE.  The I-D
-        * does not specify what this error is, but this looks like
-        * a great fit.
-        */
-       if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) {
-               err = -EMSGSIZE;
-               goto out_free;
+               chunk = sctp_make_abort_user(asoc, msg, msg_len);
+               if (!chunk)
+                       return -ENOMEM;
+
+               pr_debug("%s: aborting association:%p\n", __func__, asoc);
+               sctp_primitive_ABORT(net, asoc, chunk);
+
+               return 0;
        }
 
-       /* Check for invalid stream. */
+       return 1;
+}
+
+static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
+                               struct msghdr *msg, size_t msg_len,
+                               struct sctp_transport *transport,
+                               struct sctp_sndrcvinfo *sinfo)
+{
+       struct sock *sk = asoc->base.sk;
+       struct net *net = sock_net(sk);
+       struct sctp_datamsg *datamsg;
+       bool wait_connect = false;
+       struct sctp_chunk *chunk;
+       long timeo;
+       int err;
+
        if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
                err = -EINVAL;
-               goto out_free;
+               goto err;
        }
 
-       /* Allocate sctp_stream_out_ext if not already done */
        if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
                err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
                if (err)
-                       goto out_free;
+                       goto err;
        }
 
+       if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+               err = -EMSGSIZE;
+               goto err;
+       }
+
+       if (asoc->pmtu_pending)
+               sctp_assoc_pending_pmtu(asoc);
+
        if (sctp_wspace(asoc) < msg_len)
                sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
 
-       timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
        if (!sctp_wspace(asoc)) {
-               /* sk can be changed by peel off when waiting for buf. */
+               timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
                err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
-               if (err) {
-                       if (err == -ESRCH) {
-                               /* asoc is already dead. */
-                               new_asoc = NULL;
-                               err = -EPIPE;
-                       }
-                       goto out_free;
-               }
+               if (err)
+                       goto err;
        }
 
-       /* If an address is passed with the sendto/sendmsg call, it is used
-        * to override the primary destination address in the TCP model, or
-        * when SCTP_ADDR_OVER flag is set in the UDP model.
-        */
-       if ((sctp_style(sk, TCP) && msg_name) ||
-           (sinfo_flags & SCTP_ADDR_OVER)) {
-               chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
-               if (!chunk_tp) {
-                       err = -EINVAL;
-                       goto out_free;
-               }
-       } else
-               chunk_tp = NULL;
-
-       /* Auto-connect, if we aren't connected already. */
        if (sctp_state(asoc, CLOSED)) {
                err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
-               if (err < 0)
-                       goto out_free;
+               if (err)
+                       goto err;
 
-               /* If stream interleave is enabled, wait_connect has to be
-                * done earlier than data enqueue, as it needs to make data
-                * or idata according to asoc->intl_enable which is set
-                * after connection is done.
-                */
-               if (sctp_sk(asoc->base.sk)->strm_interleave) {
+               if (sctp_sk(sk)->strm_interleave) {
                        timeo = sock_sndtimeo(sk, 0);
                        err = sctp_wait_for_connect(asoc, &timeo);
                        if (err)
-                               goto out_unlock;
+                               goto err;
                } else {
                        wait_connect = true;
                }
@@ -2003,73 +1913,186 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
                pr_debug("%s: we associated primitively\n", __func__);
        }
 
-       /* Break the message into multiple chunks of maximum size. */
        datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
        if (IS_ERR(datamsg)) {
                err = PTR_ERR(datamsg);
-               goto out_free;
+               goto err;
        }
+
        asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
 
-       /* Now send the (possibly) fragmented message. */
        list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
                sctp_chunk_hold(chunk);
-
-               /* Do accounting for the write space.  */
                sctp_set_owner_w(chunk);
-
-               chunk->transport = chunk_tp;
+               chunk->transport = transport;
        }
 
-       /* Send it to the lower layers.  Note:  all chunks
-        * must either fail or succeed.   The lower layer
-        * works that way today.  Keep it that way or this
-        * breaks.
-        */
        err = sctp_primitive_SEND(net, asoc, datamsg);
-       /* Did the lower layer accept the chunk? */
        if (err) {
                sctp_datamsg_free(datamsg);
-               goto out_free;
+               goto err;
        }
 
        pr_debug("%s: we sent primitively\n", __func__);
 
        sctp_datamsg_put(datamsg);
-       err = msg_len;
 
        if (unlikely(wait_connect)) {
-               timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
+               timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
                sctp_wait_for_connect(asoc, &timeo);
        }
 
-       /* If we are already past ASSOCIATE, the lower
-        * layers are responsible for association cleanup.
-        */
-       goto out_unlock;
+       err = msg_len;
 
-out_free:
-       if (new_asoc)
-               sctp_association_free(asoc);
-out_unlock:
-       release_sock(sk);
+err:
+       return err;
+}
 
-out_nounlock:
-       return sctp_error(sk, msg_flags, err);
+static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
+                                              const struct msghdr *msg,
+                                              struct sctp_cmsgs *cmsgs)
+{
+       union sctp_addr *daddr = NULL;
+       int err;
 
-#if 0
-do_sock_err:
-       if (msg_len)
-               err = msg_len;
-       else
-               err = sock_error(sk);
-       goto out;
+       if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
+               int len = msg->msg_namelen;
 
-do_interrupted:
-       if (msg_len)
-               err = msg_len;
-       goto out;
-#endif /* 0 */
+               if (len > sizeof(*daddr))
+                       len = sizeof(*daddr);
+
+               daddr = (union sctp_addr *)msg->msg_name;
+
+               err = sctp_verify_addr(sk, daddr, len);
+               if (err)
+                       return ERR_PTR(err);
+       }
+
+       return daddr;
+}
+
+static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
+                                     struct sctp_sndrcvinfo *sinfo,
+                                     struct sctp_cmsgs *cmsgs)
+{
+       if (!cmsgs->srinfo && !cmsgs->sinfo) {
+               sinfo->sinfo_stream = asoc->default_stream;
+               sinfo->sinfo_ppid = asoc->default_ppid;
+               sinfo->sinfo_context = asoc->default_context;
+               sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
+
+               if (!cmsgs->prinfo)
+                       sinfo->sinfo_flags = asoc->default_flags;
+       }
+
+       if (!cmsgs->srinfo && !cmsgs->prinfo)
+               sinfo->sinfo_timetolive = asoc->default_timetolive;
+
+       if (cmsgs->authinfo) {
+               /* Reuse sinfo_tsn to indicate that authinfo was set and
+                * sinfo_ssn to save the keyid on tx path.
+                */
+               sinfo->sinfo_tsn = 1;
+               sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber;
+       }
+}
+
+static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
+{
+       struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+       struct sctp_transport *transport = NULL;
+       struct sctp_sndrcvinfo _sinfo, *sinfo;
+       struct sctp_association *asoc;
+       struct sctp_cmsgs cmsgs;
+       union sctp_addr *daddr;
+       bool new = false;
+       __u16 sflags;
+       int err;
+
+       /* Parse and get snd_info */
+       err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len);
+       if (err)
+               goto out;
+
+       sinfo  = &_sinfo;
+       sflags = sinfo->sinfo_flags;
+
+       /* Get daddr from msg */
+       daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs);
+       if (IS_ERR(daddr)) {
+               err = PTR_ERR(daddr);
+               goto out;
+       }
+
+       lock_sock(sk);
+
+       /* SCTP_SENDALL process */
+       if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) {
+               list_for_each_entry(asoc, &ep->asocs, asocs) {
+                       err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+                                                       msg_len);
+                       if (err == 0)
+                               continue;
+                       if (err < 0)
+                               goto out_unlock;
+
+                       sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+                       err = sctp_sendmsg_to_asoc(asoc, msg, msg_len,
+                                                  NULL, sinfo);
+                       if (err < 0)
+                               goto out_unlock;
+
+                       iov_iter_revert(&msg->msg_iter, err);
+               }
+
+               goto out_unlock;
+       }
+
+       /* Get and check or create asoc */
+       if (daddr) {
+               asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+               if (asoc) {
+                       err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+                                                       msg_len);
+                       if (err <= 0)
+                               goto out_unlock;
+               } else {
+                       err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr,
+                                                   &transport);
+                       if (err)
+                               goto out_unlock;
+
+                       asoc = transport->asoc;
+                       new = true;
+               }
+
+               if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER))
+                       transport = NULL;
+       } else {
+               asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id);
+               if (!asoc) {
+                       err = -EPIPE;
+                       goto out_unlock;
+               }
+
+               err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len);
+               if (err <= 0)
+                       goto out_unlock;
+       }
+
+       /* Update snd_info with the asoc */
+       sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+       /* Send msg to the asoc */
+       err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo);
+       if (err < 0 && err != -ESRCH && new)
+               sctp_association_free(asoc);
+
+out_unlock:
+       release_sock(sk);
+out:
+       return sctp_error(sk, msg->msg_flags, err);
 }
 
 /* This is an extended version of skb_pull() that removes the data from the
@@ -3623,6 +3646,33 @@ static int sctp_setsockopt_del_key(struct sock *sk,
 
 }
 
+/*
+ * 8.3.4  Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY)
+ *
+ * This set option will deactivate a shared secret key.
+ */
+static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
+                                         unsigned int optlen)
+{
+       struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+       struct sctp_authkeyid val;
+       struct sctp_association *asoc;
+
+       if (!ep->auth_enable)
+               return -EACCES;
+
+       if (optlen != sizeof(struct sctp_authkeyid))
+               return -EINVAL;
+       if (copy_from_user(&val, optval, optlen))
+               return -EFAULT;
+
+       asoc = sctp_id2assoc(sk, val.scact_assoc_id);
+       if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
+               return -EINVAL;
+
+       return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+}
+
 /*
  * 8.1.23 SCTP_AUTO_ASCONF
  *
@@ -4215,6 +4265,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
        case SCTP_AUTH_DELETE_KEY:
                retval = sctp_setsockopt_del_key(sk, optval, optlen);
                break;
+       case SCTP_AUTH_DEACTIVATE_KEY:
+               retval = sctp_setsockopt_deactivate_key(sk, optval, optlen);
+               break;
        case SCTP_AUTO_ASCONF:
                retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
                break;
@@ -7189,6 +7242,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
        case SCTP_AUTH_KEY:
        case SCTP_AUTH_CHUNK:
        case SCTP_AUTH_DELETE_KEY:
+       case SCTP_AUTH_DEACTIVATE_KEY:
                retval = -EOPNOTSUPP;
                break;
        case SCTP_HMAC_IDENT:
@@ -7811,8 +7865,8 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 
                        if (cmsgs->srinfo->sinfo_flags &
                            ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-                             SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
-                             SCTP_ABORT | SCTP_EOF))
+                             SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+                             SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
                                return -EINVAL;
                        break;
 
@@ -7835,10 +7889,60 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 
                        if (cmsgs->sinfo->snd_flags &
                            ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-                             SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
-                             SCTP_ABORT | SCTP_EOF))
+                             SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+                             SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
                                return -EINVAL;
                        break;
+               case SCTP_PRINFO:
+                       /* SCTP Socket API Extension
+                        * 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+                        *
+                        * This cmsghdr structure specifies SCTP options for sendmsg().
+                        *
+                        * cmsg_level    cmsg_type      cmsg_data[]
+                        * ------------  ------------   ---------------------
+                        * IPPROTO_SCTP  SCTP_PRINFO    struct sctp_prinfo
+                        */
+                       if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_prinfo)))
+                               return -EINVAL;
+
+                       cmsgs->prinfo = CMSG_DATA(cmsg);
+                       if (cmsgs->prinfo->pr_policy & ~SCTP_PR_SCTP_MASK)
+                               return -EINVAL;
+
+                       if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE)
+                               cmsgs->prinfo->pr_value = 0;
+                       break;
+               case SCTP_AUTHINFO:
+                       /* SCTP Socket API Extension
+                        * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+                        *
+                        * This cmsghdr structure specifies SCTP options for sendmsg().
+                        *
+                        * cmsg_level    cmsg_type      cmsg_data[]
+                        * ------------  ------------   ---------------------
+                        * IPPROTO_SCTP  SCTP_AUTHINFO  struct sctp_authinfo
+                        */
+                       if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo)))
+                               return -EINVAL;
+
+                       cmsgs->authinfo = CMSG_DATA(cmsg);
+                       break;
+               case SCTP_DSTADDRV4:
+               case SCTP_DSTADDRV6:
+                       /* SCTP Socket API Extension
+                        * 5.3.9/10 SCTP Destination IPv4/6 Address Structure (SCTP_DSTADDRV4/6)
+                        *
+                        * This cmsghdr structure specifies SCTP options for sendmsg().
+                        *
+                        * cmsg_level    cmsg_type         cmsg_data[]
+                        * ------------  ------------   ---------------------
+                        * IPPROTO_SCTP  SCTP_DSTADDRV4 struct in_addr
+                        * ------------  ------------   ---------------------
+                        * IPPROTO_SCTP  SCTP_DSTADDRV6 struct in6_addr
+                        */
+                       cmsgs->addrs_msg = my_msg;
+                       break;
                default:
                        return -EINVAL;
                }
@@ -8062,6 +8166,26 @@ static void sctp_wfree(struct sk_buff *skb)
        sk->sk_wmem_queued   -= skb->truesize;
        sk_mem_uncharge(sk, skb->truesize);
 
+       if (chunk->shkey) {
+               struct sctp_shared_key *shkey = chunk->shkey;
+
+               /* refcnt == 2 and !list_empty mean after this release, it's
+                * not being used anywhere, and it's time to notify userland
+                * that this shkey can be freed if it's been deactivated.
+                */
+               if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+                   refcount_read(&shkey->refcnt) == 2) {
+                       struct sctp_ulpevent *ev;
+
+                       ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+                                                       SCTP_AUTH_FREE_KEY,
+                                                       GFP_KERNEL);
+                       if (ev)
+                               asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+               }
+               sctp_auth_shkey_release(chunk->shkey);
+       }
+
        sock_wfree(skb);
        sctp_wake_up_waiters(sk, asoc);
 
index 38ae22b65e778b7f9d4e3cc0afa4f0c98c211479..5f8046c62d90ba435d851d6736c81c91f1679212 100644 (file)
@@ -7,13 +7,11 @@
  *  applicable with RoCE-cards only
  *
  *  Initial restrictions:
- *    - non-blocking connect postponed
- *    - IPv6 support postponed
  *    - support for alternate links postponed
  *    - partial support for non-blocking sockets only
  *    - support for urgent data postponed
  *
- *  Copyright IBM Corp. 2016
+ *  Copyright IBM Corp. 2016, 2018
  *
  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  *              based on prototype from Frank Blaschka
@@ -24,7 +22,6 @@
 
 #include <linux/module.h>
 #include <linux/socket.h>
-#include <linux/inetdevice.h>
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
@@ -66,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = {
        .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
 };
 
+static struct smc_hashinfo smc_v6_hashinfo = {
+       .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
+};
+
 int smc_hash_sk(struct sock *sk)
 {
        struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
@@ -105,6 +106,18 @@ struct proto smc_proto = {
 };
 EXPORT_SYMBOL_GPL(smc_proto);
 
+struct proto smc_proto6 = {
+       .name           = "SMC6",
+       .owner          = THIS_MODULE,
+       .keepalive      = smc_set_keepalive,
+       .hash           = smc_hash_sk,
+       .unhash         = smc_unhash_sk,
+       .obj_size       = sizeof(struct smc_sock),
+       .h.smc_hash     = &smc_v6_hashinfo,
+       .slab_flags     = SLAB_TYPESAFE_BY_RCU,
+};
+EXPORT_SYMBOL_GPL(smc_proto6);
+
 static int smc_release(struct socket *sock)
 {
        struct sock *sk = sock->sk;
@@ -161,19 +174,22 @@ static void smc_destruct(struct sock *sk)
        sk_refcnt_debug_dec(sk);
 }
 
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+                                  int protocol)
 {
        struct smc_sock *smc;
+       struct proto *prot;
        struct sock *sk;
 
-       sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
+       prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+       sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
        if (!sk)
                return NULL;
 
        sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
        sk->sk_state = SMC_INIT;
        sk->sk_destruct = smc_destruct;
-       sk->sk_protocol = SMCPROTO_SMC;
+       sk->sk_protocol = protocol;
        smc = smc_sk(sk);
        INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
        INIT_LIST_HEAD(&smc->accept_q);
@@ -200,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
                goto out;
 
        rc = -EAFNOSUPPORT;
+       if (addr->sin_family != AF_INET &&
+           addr->sin_family != AF_INET6 &&
+           addr->sin_family != AF_UNSPEC)
+               goto out;
        /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
-       if ((addr->sin_family != AF_INET) &&
-           ((addr->sin_family != AF_UNSPEC) ||
-            (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
+       if (addr->sin_family == AF_UNSPEC &&
+           addr->sin_addr.s_addr != htonl(INADDR_ANY))
                goto out;
 
        lock_sock(sk);
@@ -273,46 +292,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
        smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 }
 
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
-                        __be32 *subnet, u8 *prefix_len)
-{
-       struct dst_entry *dst = sk_dst_get(clcsock->sk);
-       struct in_device *in_dev;
-       struct sockaddr_in addr;
-       int rc = -ENOENT;
-
-       if (!dst) {
-               rc = -ENOTCONN;
-               goto out;
-       }
-       if (!dst->dev) {
-               rc = -ENODEV;
-               goto out_rel;
-       }
-
-       /* get address to which the internal TCP socket is bound */
-       kernel_getsockname(clcsock, (struct sockaddr *)&addr);
-       /* analyze IPv4 specific data of net_device belonging to TCP socket */
-       rcu_read_lock();
-       in_dev = __in_dev_get_rcu(dst->dev);
-       for_ifa(in_dev) {
-               if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
-                       continue;
-               *prefix_len = inet_mask_len(ifa->ifa_mask);
-               *subnet = ifa->ifa_address & ifa->ifa_mask;
-               rc = 0;
-               break;
-       } endfor_ifa(in_dev);
-       rcu_read_unlock();
-
-out_rel:
-       dst_release(dst);
-out:
-       return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
        struct smc_link_group *lgr = smc->conn.lgr;
        struct smc_link *link;
@@ -332,6 +312,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
                return rc;
        }
 
+       if (link->llc_confirm_rc)
+               return SMC_CLC_DECL_RMBE_EC;
+
        rc = smc_ib_modify_qp_rts(link);
        if (rc)
                return SMC_CLC_DECL_INTERR;
@@ -346,11 +329,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
        /* send CONFIRM LINK response over RoCE fabric */
        rc = smc_llc_send_confirm_link(link,
                                       link->smcibdev->mac[link->ibport - 1],
-                                      gid, SMC_LLC_RESP);
+                                      &link->smcibdev->gid[link->ibport - 1],
+                                      SMC_LLC_RESP);
        if (rc < 0)
                return SMC_CLC_DECL_TCL;
 
-       return rc;
+       /* receive ADD LINK request from server over RoCE fabric */
+       rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+                                                        SMC_LLC_WAIT_TIME);
+       if (rest <= 0) {
+               struct smc_clc_msg_decline dclc;
+
+               rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+                                     SMC_CLC_DECLINE);
+               return rc;
+       }
+
+       /* send add link reject message, only one link supported for now */
+       rc = smc_llc_send_add_link(link,
+                                  link->smcibdev->mac[link->ibport - 1],
+                                  &link->smcibdev->gid[link->ibport - 1],
+                                  SMC_LLC_RESP);
+       if (rc < 0)
+               return SMC_CLC_DECL_TCL;
+
+       link->state = SMC_LNK_ACTIVE;
+
+       return 0;
 }
 
 static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -372,19 +377,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
        link->peer_mtu = clc->qp_mtu;
 }
 
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
-       spin_lock_bh(&smc_lgr_list.lock);
-       /* do not use this link group for new connections */
-       if (!list_empty(&lgr->list))
-               list_del_init(&lgr->list);
-       spin_unlock_bh(&smc_lgr_list.lock);
-}
-
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc)
 {
-       struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
        struct smc_clc_msg_accept_confirm aclc;
        int local_contact = SMC_FIRST_CONTACT;
        struct smc_ib_device *smcibdev;
@@ -438,8 +433,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
        srv_first_contact = aclc.hdr.flag;
        mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
-                                       ibport, &aclc.lcl, srv_first_contact);
+       local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+                                       srv_first_contact);
        if (local_contact < 0) {
                rc = local_contact;
                if (rc == -ENOMEM)
@@ -498,8 +493,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
        if (local_contact == SMC_FIRST_CONTACT) {
                /* QP confirmation over RoCE fabric */
-               reason_code = smc_clnt_conf_first_link(
-                       smc, &smcibdev->gid[ibport - 1]);
+               reason_code = smc_clnt_conf_first_link(smc);
                if (reason_code < 0) {
                        rc = reason_code;
                        goto out_err_unlock;
@@ -556,9 +550,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
        /* separate smc parameter checking to be safe */
        if (alen < sizeof(addr->sa_family))
                goto out_err;
-       if (addr->sa_family != AF_INET)
+       if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
                goto out_err;
-       smc->addr = addr;       /* needed for nonblocking connect */
 
        lock_sock(sk);
        switch (sk->sk_state) {
@@ -599,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
        int rc;
 
        release_sock(lsk);
-       new_sk = smc_sock_alloc(sock_net(lsk), NULL);
+       new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
        if (!new_sk) {
                rc = -ENOMEM;
                lsk->sk_err = ENOMEM;
@@ -748,9 +741,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 
                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
                                      SMC_CLC_DECLINE);
+               return rc;
        }
 
-       return rc;
+       if (link->llc_confirm_resp_rc)
+               return SMC_CLC_DECL_RMBE_EC;
+
+       /* send ADD LINK request to client over the RoCE fabric */
+       rc = smc_llc_send_add_link(link,
+                                  link->smcibdev->mac[link->ibport - 1],
+                                  &link->smcibdev->gid[link->ibport - 1],
+                                  SMC_LLC_REQ);
+       if (rc < 0)
+               return SMC_CLC_DECL_TCL;
+
+       /* receive ADD LINK response from client over the RoCE fabric */
+       rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+                                                        SMC_LLC_WAIT_TIME);
+       if (rest <= 0) {
+               struct smc_clc_msg_decline dclc;
+
+               rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+                                     SMC_CLC_DECLINE);
+               return rc;
+       }
+
+       link->state = SMC_LNK_ACTIVE;
+
+       return 0;
 }
 
 /* setup for RDMA connection of server */
@@ -766,13 +784,10 @@ static void smc_listen_work(struct work_struct *work)
        struct sock *newsmcsk = &new_smc->sk;
        struct smc_clc_msg_proposal *pclc;
        struct smc_ib_device *smcibdev;
-       struct sockaddr_in peeraddr;
        u8 buf[SMC_CLC_MAX_LEN];
        struct smc_link *link;
        int reason_code = 0;
        int rc = 0;
-       __be32 subnet;
-       u8 prefix_len;
        u8 ibport;
 
        /* check if peer is smc capable */
@@ -807,28 +822,19 @@ static void smc_listen_work(struct work_struct *work)
                goto decline_rdma;
        }
 
-       /* determine subnet and mask from internal TCP socket */
-       rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
-       if (rc) {
-               reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-               goto decline_rdma;
-       }
-
        pclc = (struct smc_clc_msg_proposal *)&buf;
        pclc_prfx = smc_clc_proposal_get_prefix(pclc);
-       if (pclc_prfx->outgoing_subnet != subnet ||
-           pclc_prfx->prefix_len != prefix_len) {
+
+       rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
+       if (rc) {
                reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
                goto decline_rdma;
        }
 
-       /* get address of the peer connected to the internal TCP socket */
-       kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);
-
        /* allocate connection / link group */
        mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
-                                       smcibdev, ibport, &pclc->lcl, 0);
+       local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+                                       0);
        if (local_contact < 0) {
                rc = local_contact;
                if (rc == -ENOMEM)
@@ -977,10 +983,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
                lsmc->clcsock = NULL;
        }
        release_sock(lsk);
-       /* no more listening, wake up smc_close_wait_listen_clcsock and
-        * accept
-        */
-       lsk->sk_state_change(lsk);
        sock_put(&lsmc->sk); /* sock_hold in smc_listen */
 }
 
@@ -1382,6 +1384,7 @@ static const struct proto_ops smc_sock_ops = {
 static int smc_create(struct net *net, struct socket *sock, int protocol,
                      int kern)
 {
+       int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
        struct smc_sock *smc;
        struct sock *sk;
        int rc;
@@ -1391,22 +1394,24 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
                goto out;
 
        rc = -EPROTONOSUPPORT;
-       if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
+       if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
                goto out;
 
        rc = -ENOBUFS;
        sock->ops = &smc_sock_ops;
-       sk = smc_sock_alloc(net, sock);
+       sk = smc_sock_alloc(net, sock, protocol);
        if (!sk)
                goto out;
 
        /* create internal TCP socket for CLC handshake and fallback */
        smc = smc_sk(sk);
        smc->use_fallback = false; /* assume rdma capability first */
-       rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
-                             IPPROTO_TCP, &smc->clcsock);
-       if (rc)
+       rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+                             &smc->clcsock);
+       if (rc) {
                sk_common_release(sk);
+               goto out;
+       }
        smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
        smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
 
@@ -1442,16 +1447,23 @@ static int __init smc_init(void)
 
        rc = proto_register(&smc_proto, 1);
        if (rc) {
-               pr_err("%s: proto_register fails with %d\n", __func__, rc);
+               pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
                goto out_pnet;
        }
 
+       rc = proto_register(&smc_proto6, 1);
+       if (rc) {
+               pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
+               goto out_proto;
+       }
+
        rc = sock_register(&smc_sock_family_ops);
        if (rc) {
                pr_err("%s: sock_register fails with %d\n", __func__, rc);
-               goto out_proto;
+               goto out_proto6;
        }
        INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
+       INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
 
        rc = smc_ib_register_client();
        if (rc) {
@@ -1464,6 +1476,8 @@ static int __init smc_init(void)
 
 out_sock:
        sock_unregister(PF_SMC);
+out_proto6:
+       proto_unregister(&smc_proto6);
 out_proto:
        proto_unregister(&smc_proto);
 out_pnet:
@@ -1482,11 +1496,13 @@ static void __exit smc_exit(void)
        spin_unlock_bh(&smc_lgr_list.lock);
        list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
                list_del_init(&lgr->list);
+               cancel_delayed_work_sync(&lgr->free_work);
                smc_lgr_free(lgr); /* free link group */
        }
        static_branch_disable(&tcp_have_smc);
        smc_ib_unregister_client();
        sock_unregister(PF_SMC);
+       proto_unregister(&smc_proto6);
        proto_unregister(&smc_proto);
        smc_pnet_exit();
 }
index 9518986c97b1846fcaac148b7b9f9a61bf1473f2..e4829a2f46baf8fb7c129ba4ffeca24c5189134e 100644 (file)
 
 #include "smc_ib.h"
 
-#define SMCPROTO_SMC           0       /* SMC protocol */
+#define SMCPROTO_SMC           0       /* SMC protocol, IPv4 */
+#define SMCPROTO_SMC6          1       /* SMC protocol, IPv6 */
 
 #define SMC_MAX_PORTS          2       /* Max # of ports */
 
 extern struct proto smc_proto;
+extern struct proto smc_proto6;
 
 #ifdef ATOMIC64_INIT
 #define KERNEL_HAS_ATOMIC64
@@ -172,7 +174,6 @@ struct smc_sock {                           /* smc sock container */
        struct sock             sk;
        struct socket           *clcsock;       /* internal tcp socket */
        struct smc_connection   conn;           /* smc connection */
-       struct sockaddr         *addr;          /* inet connect address */
        struct smc_sock         *listen_smc;    /* listen parent */
        struct work_struct      tcp_listen_work;/* handle tcp socket accepts */
        struct work_struct      smc_listen_work;/* prepare new accept socket */
@@ -263,10 +264,8 @@ static inline bool using_ipsec(struct smc_sock *smc)
 
 struct smc_clc_msg_local;
 
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
-                        u8 *prefix_len);
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
                    struct smc_ib_device *smcibdev, u8 ibport,
                    struct smc_clc_msg_local *lcl, int srv_first_contact);
 struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
index 3cd086e5bd28c10eff887d94b51358757db094be..b42395d24cba50b0e30c39ab6275cdaa1eca3235 100644 (file)
@@ -269,7 +269,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
 
        if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved))
                return; /* short message */
-       if (cdc->len != sizeof(*cdc))
+       if (cdc->len != SMC_WR_TX_SIZE)
                return; /* invalid message */
        smc_cdc_msg_recv(cdc, link, wc->wr_id);
 }
index 8ac51583a063ca27449c52b296e0b3372d635261..64fbc3230e6c456c355e8dd24e9394301fff18e3 100644 (file)
@@ -5,15 +5,17 @@
  *  CLC (connection layer control) handshake over initial TCP socket to
  *  prepare for RDMA traffic
  *
- *  Copyright IBM Corp. 2016
+ *  Copyright IBM Corp. 2016, 2018
  *
  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  */
 
 #include <linux/in.h>
+#include <linux/inetdevice.h>
 #include <linux/if_ether.h>
 #include <linux/sched/signal.h>
 
+#include <net/addrconf.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 
@@ -22,6 +24,9 @@
 #include "smc_clc.h"
 #include "smc_ib.h"
 
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
 /* check if received message has a correct header length and contains valid
  * heading and trailing eyecatchers
  */
@@ -70,6 +75,172 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
        return true;
 }
 
+/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
+static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
+                                struct smc_clc_msg_proposal_prefix *prop)
+{
+       struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
+
+       if (!in_dev)
+               return -ENODEV;
+       for_ifa(in_dev) {
+               if (!inet_ifa_match(ipv4, ifa))
+                       continue;
+               prop->prefix_len = inet_mask_len(ifa->ifa_mask);
+               prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
+               /* prop->ipv6_prefixes_cnt = 0; already done by memset before */
+               return 0;
+       } endfor_ifa(in_dev);
+       return -ENOENT;
+}
+
+/* fill CLC proposal msg with ipv6 prefixes from device */
+static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
+                                struct smc_clc_msg_proposal_prefix *prop,
+                                struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
+       struct inet6_ifaddr *ifa;
+       int cnt = 0;
+
+       if (!in6_dev)
+               return -ENODEV;
+       /* use a maximum of 8 IPv6 prefixes from device */
+       list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+               if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+                       continue;
+               ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
+                                &ifa->addr, ifa->prefix_len);
+               ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
+               cnt++;
+               if (cnt == SMC_CLC_MAX_V6_PREFIX)
+                       break;
+       }
+       prop->ipv6_prefixes_cnt = cnt;
+       if (cnt)
+               return 0;
+#endif
+       return -ENOENT;
+}
+
+/* retrieve and set prefixes in CLC proposal msg */
+static int smc_clc_prfx_set(struct socket *clcsock,
+                           struct smc_clc_msg_proposal_prefix *prop,
+                           struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+       struct dst_entry *dst = sk_dst_get(clcsock->sk);
+       struct sockaddr_storage addrs;
+       struct sockaddr_in6 *addr6;
+       struct sockaddr_in *addr;
+       int rc = -ENOENT;
+
+       memset(prop, 0, sizeof(*prop));
+       if (!dst) {
+               rc = -ENOTCONN;
+               goto out;
+       }
+       if (!dst->dev) {
+               rc = -ENODEV;
+               goto out_rel;
+       }
+       /* get address to which the internal TCP socket is bound */
+       kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+       /* analyze IP specific data of net_device belonging to TCP socket */
+       addr6 = (struct sockaddr_in6 *)&addrs;
+       rcu_read_lock();
+       if (addrs.ss_family == PF_INET) {
+               /* IPv4 */
+               addr = (struct sockaddr_in *)&addrs;
+               rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
+       } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
+               /* mapped IPv4 address - peer is IPv4 only */
+               rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
+                                          prop);
+       } else {
+               /* IPv6 */
+               rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
+       }
+       rcu_read_unlock();
+out_rel:
+       dst_release(dst);
+out:
+       return rc;
+}
+
+/* match ipv4 addrs of dev against addr in CLC proposal */
+static int smc_clc_prfx_match4_rcu(struct net_device *dev,
+                                  struct smc_clc_msg_proposal_prefix *prop)
+{
+       struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+       if (!in_dev)
+               return -ENODEV;
+       for_ifa(in_dev) {
+               if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
+                   inet_ifa_match(prop->outgoing_subnet, ifa))
+                       return 0;
+       } endfor_ifa(in_dev);
+
+       return -ENOENT;
+}
+
+/* match ipv6 addrs of dev against addrs in CLC proposal */
+static int smc_clc_prfx_match6_rcu(struct net_device *dev,
+                                  struct smc_clc_msg_proposal_prefix *prop)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       struct inet6_dev *in6_dev = __in6_dev_get(dev);
+       struct smc_clc_ipv6_prefix *ipv6_prfx;
+       struct inet6_ifaddr *ifa;
+       int i, max;
+
+       if (!in6_dev)
+               return -ENODEV;
+       /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
+       ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
+       max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
+       list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+               if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+                       continue;
+               for (i = 0; i < max; i++) {
+                       if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
+                           ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
+                                             ifa->prefix_len))
+                               return 0;
+               }
+       }
+#endif
+       return -ENOENT;
+}
+
+/* check if proposed prefixes match one of our device prefixes */
+int smc_clc_prfx_match(struct socket *clcsock,
+                      struct smc_clc_msg_proposal_prefix *prop)
+{
+       struct dst_entry *dst = sk_dst_get(clcsock->sk);
+       int rc;
+
+       if (!dst) {
+               rc = -ENOTCONN;
+               goto out;
+       }
+       if (!dst->dev) {
+               rc = -ENODEV;
+               goto out_rel;
+       }
+       rcu_read_lock();
+       if (!prop->ipv6_prefixes_cnt)
+               rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
+       else
+               rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
+       rcu_read_unlock();
+out_rel:
+       dst_release(dst);
+out:
+       return rc;
+}
+
 /* Wait for data on the tcp-socket, analyze received data
  * Returns:
  * 0 if success and it was not a decline that we received.
@@ -189,16 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc,
                          struct smc_ib_device *smcibdev,
                          u8 ibport)
 {
+       struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
        struct smc_clc_msg_proposal_prefix pclc_prfx;
        struct smc_clc_msg_proposal pclc;
        struct smc_clc_msg_trail trl;
+       int len, i, plen, rc;
        int reason_code = 0;
-       struct kvec vec[3];
+       struct kvec vec[4];
        struct msghdr msg;
-       int len, plen, rc;
+
+       /* retrieve ip prefixes for CLC proposal msg */
+       rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
+       if (rc)
+               return SMC_CLC_DECL_CNFERR; /* configuration error */
 
        /* send SMC Proposal CLC message */
-       plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
+       plen = sizeof(pclc) + sizeof(pclc_prfx) +
+              (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
+              sizeof(trl);
        memset(&pclc, 0, sizeof(pclc));
        memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
        pclc.hdr.type = SMC_CLC_PROPOSAL;
@@ -209,23 +388,22 @@ int smc_clc_send_proposal(struct smc_sock *smc,
        memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
        pclc.iparea_offset = htons(0);
 
-       memset(&pclc_prfx, 0, sizeof(pclc_prfx));
-       /* determine subnet and mask from internal TCP socket */
-       rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
-                                 &pclc_prfx.prefix_len);
-       if (rc)
-               return SMC_CLC_DECL_CNFERR; /* configuration error */
-       pclc_prfx.ipv6_prefixes_cnt = 0;
        memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
        memset(&msg, 0, sizeof(msg));
-       vec[0].iov_base = &pclc;
-       vec[0].iov_len = sizeof(pclc);
-       vec[1].iov_base = &pclc_prfx;
-       vec[1].iov_len = sizeof(pclc_prfx);
-       vec[2].iov_base = &trl;
-       vec[2].iov_len = sizeof(trl);
+       i = 0;
+       vec[i].iov_base = &pclc;
+       vec[i++].iov_len = sizeof(pclc);
+       vec[i].iov_base = &pclc_prfx;
+       vec[i++].iov_len = sizeof(pclc_prfx);
+       if (pclc_prfx.ipv6_prefixes_cnt > 0) {
+               vec[i].iov_base = &ipv6_prfx[0];
+               vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
+                                  sizeof(ipv6_prfx[0]);
+       }
+       vec[i].iov_base = &trl;
+       vec[i++].iov_len = sizeof(trl);
        /* due to the few bytes needed for clc-handshake this cannot block */
-       len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
+       len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
        if (len < sizeof(pclc)) {
                if (len >= 0) {
                        reason_code = -ENETUNREACH;
index c145a0f36a68caafc01c28620df246d29f0ef1ab..63bf1dc2c1f9731d97374e68ed7f804b3bfaba2f 100644 (file)
@@ -22,9 +22,6 @@
 #define SMC_CLC_CONFIRM                0x03
 #define SMC_CLC_DECLINE                0x04
 
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
 #define SMC_CLC_V1             0x1             /* SMC version                */
 #define CLC_WAIT_TIME          (6 * HZ)        /* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM       0x01010000  /* insufficient memory resources  */
@@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
 #define SMC_CLC_DECL_INTERR    0x99990000  /* internal error                 */
 #define SMC_CLC_DECL_TCL       0x02040000  /* timeout w4 QP confirm          */
 #define SMC_CLC_DECL_SEND      0x07000000  /* sending problem                */
+#define SMC_CLC_DECL_RMBE_EC   0x08000000  /* peer has eyecatcher in RMBE    */
 
 struct smc_clc_msg_hdr {       /* header1 of clc messages */
        u8 eyecatcher[4];       /* eye catcher */
@@ -62,10 +60,15 @@ struct smc_clc_msg_local {  /* header2 of clc messages */
        u8 mac[6];              /* mac of ib_device port */
 };
 
+#define SMC_CLC_MAX_V6_PREFIX  8
+
+/* Struct would be 4 byte aligned, but it is used in an array that is sent
+ * to peers and must conform to RFC7609, hence we need to use packed here.
+ */
 struct smc_clc_ipv6_prefix {
-       u8 prefix[4];
+       struct in6_addr prefix;
        u8 prefix_len;
-} __packed;
+} __packed;                    /* format defined in RFC7609 */
 
 struct smc_clc_msg_proposal_prefix {   /* prefix part of clc proposal message*/
        __be32 outgoing_subnet; /* subnet mask */
@@ -81,9 +84,11 @@ struct smc_clc_msg_proposal {        /* clc proposal message sent by Linux */
 } __aligned(4);
 
 #define SMC_CLC_PROPOSAL_MAX_OFFSET    0x28
-#define SMC_CLC_PROPOSAL_MAX_PREFIX    (8 * sizeof(struct smc_clc_ipv6_prefix))
+#define SMC_CLC_PROPOSAL_MAX_PREFIX    (SMC_CLC_MAX_V6_PREFIX * \
+                                        sizeof(struct smc_clc_ipv6_prefix))
 #define SMC_CLC_MAX_LEN                (sizeof(struct smc_clc_msg_proposal) + \
                                 SMC_CLC_PROPOSAL_MAX_OFFSET + \
+                                sizeof(struct smc_clc_msg_proposal_prefix) + \
                                 SMC_CLC_PROPOSAL_MAX_PREFIX + \
                                 sizeof(struct smc_clc_msg_trail))
 
@@ -124,9 +129,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
               ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
-struct smc_sock;
-struct smc_ib_device;
-
+int smc_clc_prfx_match(struct socket *clcsock,
+                      struct smc_clc_msg_proposal_prefix *prop);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
                     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
index e339c0186dcfc2990d50c0b733e050a10f24b661..fa41d988174146f6888d29db743b074d7b1ee1db 100644 (file)
@@ -30,27 +30,6 @@ static void smc_close_cleanup_listen(struct sock *parent)
                smc_close_non_accepted(sk);
 }
 
-static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
-{
-       DEFINE_WAIT_FUNC(wait, woken_wake_function);
-       struct sock *sk = &smc->sk;
-       signed long timeout;
-
-       timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
-       add_wait_queue(sk_sleep(sk), &wait);
-       do {
-               release_sock(sk);
-               if (smc->clcsock)
-                       timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
-                                            timeout);
-               sched_annotate_sleep();
-               lock_sock(sk);
-               if (!smc->clcsock)
-                       break;
-       } while (timeout);
-       remove_wait_queue(sk_sleep(sk), &wait);
-}
-
 /* wait for sndbuf data being transmitted */
 static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
 {
@@ -204,9 +183,11 @@ int smc_close_active(struct smc_sock *smc)
                        rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
                        /* wake up kernel_accept of smc_tcp_listen_worker */
                        smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
-                       smc_close_wait_listen_clcsock(smc);
                }
                smc_close_cleanup_listen(sk);
+               release_sock(sk);
+               flush_work(&smc->tcp_listen_work);
+               lock_sock(sk);
                break;
        case SMC_ACTIVE:
                smc_close_stream_wait(smc, timeout);
index 2424c7100aaf63cf19e9d63aaa110f25c6aefe3a..f44f6803f7ff2c8585caf555b7b4aea4168b680b 100644 (file)
 
 static u32 smc_lgr_num;                        /* unique link group number */
 
+static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
+{
+       /* client link group creation always follows the server link group
+        * creation. For client use a somewhat higher removal delay time,
+        * otherwise there is a risk of out-of-sync link groups.
+        */
+       mod_delayed_work(system_wq, &lgr->free_work,
+                        lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+                                                SMC_LGR_FREE_DELAY_SERV);
+}
+
 /* Register connection's alert token in our lookup structure.
  * To use rbtrees we have to implement our own insert core.
  * Requires @conns_lock
@@ -111,13 +122,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
        write_unlock_bh(&lgr->conns_lock);
        if (!reduced || lgr->conns_num)
                return;
-       /* client link group creation always follows the server link group
-        * creation. For client use a somewhat higher removal delay time,
-        * otherwise there is a risk of out-of-sync link groups.
-        */
-       mod_delayed_work(system_wq, &lgr->free_work,
-                        lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
-                                                SMC_LGR_FREE_DELAY_SERV);
+       smc_lgr_schedule_free_work(lgr);
 }
 
 static void smc_lgr_free_work(struct work_struct *work)
@@ -140,11 +145,12 @@ static void smc_lgr_free_work(struct work_struct *work)
        list_del_init(&lgr->list); /* remove from smc_lgr_list */
 free:
        spin_unlock_bh(&smc_lgr_list.lock);
-       smc_lgr_free(lgr);
+       if (!delayed_work_pending(&lgr->free_work))
+               smc_lgr_free(lgr);
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
                          struct smc_ib_device *smcibdev, u8 ibport,
                          char *peer_systemid, unsigned short vlan_id)
 {
@@ -161,7 +167,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
        }
        lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
        lgr->sync_err = false;
-       lgr->daddr = peer_in_addr;
        memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
        lgr->vlan_id = vlan_id;
        rwlock_init(&lgr->sndbufs_lock);
@@ -177,6 +182,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 
        lnk = &lgr->lnk[SMC_SINGLE_LINK];
        /* initialize link */
+       lnk->state = SMC_LNK_ACTIVATING;
+       lnk->link_id = SMC_SINGLE_LINK;
        lnk->smcibdev = smcibdev;
        lnk->ibport = ibport;
        lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
@@ -198,6 +205,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
                goto destroy_qp;
        init_completion(&lnk->llc_confirm);
        init_completion(&lnk->llc_confirm_resp);
+       init_completion(&lnk->llc_add);
+       init_completion(&lnk->llc_add_resp);
 
        smc->conn.lgr = lgr;
        rwlock_init(&lgr->conns_lock);
@@ -306,6 +315,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
        kfree(lgr);
 }
 
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+       spin_lock_bh(&smc_lgr_list.lock);
+       /* do not use this link group for new connections */
+       if (!list_empty(&lgr->list))
+               list_del_init(&lgr->list);
+       spin_unlock_bh(&smc_lgr_list.lock);
+}
+
 /* terminate linkgroup abnormally */
 void smc_lgr_terminate(struct smc_link_group *lgr)
 {
@@ -313,15 +331,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
        struct smc_sock *smc;
        struct rb_node *node;
 
-       spin_lock_bh(&smc_lgr_list.lock);
-       if (list_empty(&lgr->list)) {
-               /* termination already triggered */
-               spin_unlock_bh(&smc_lgr_list.lock);
-               return;
-       }
-       /* do not use this link group for new connections */
-       list_del_init(&lgr->list);
-       spin_unlock_bh(&smc_lgr_list.lock);
+       smc_lgr_forget(lgr);
 
        write_lock_bh(&lgr->conns_lock);
        node = rb_first(&lgr->conns_all);
@@ -339,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
        }
        write_unlock_bh(&lgr->conns_lock);
        wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
+       smc_lgr_schedule_free_work(lgr);
 }
 
 /* Determine vlan of internal TCP socket.
@@ -400,7 +411,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 }
 
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
                    struct smc_ib_device *smcibdev, u8 ibport,
                    struct smc_clc_msg_local *lcl, int srv_first_contact)
 {
@@ -457,7 +468,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
 
 create:
        if (local_contact == SMC_FIRST_CONTACT) {
-               rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+               rc = smc_lgr_create(smc, smcibdev, ibport,
                                    lcl->id_for_peer, vlan_id);
                if (rc)
                        goto out;
@@ -465,7 +476,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
                rc = smc_link_determine_gid(conn->lgr);
        }
        conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
-       conn->local_tx_ctrl.len = sizeof(struct smc_cdc_msg);
+       conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
 #ifndef KERNEL_HAS_ATOMIC64
        spin_lock_init(&conn->acurs_lock);
 #endif
@@ -698,27 +709,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
        return -ENOSPC;
 }
 
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
-                           struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
 {
-       u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
-       struct smc_link_group *lgr = conn->lgr;
-       u32 rkey = ntohl(clc->rmb_rkey);
+       u64 dma_addr = be64_to_cpu(nw_vaddr);
+       u32 rkey = ntohl(nw_rkey);
        int i;
 
        for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
                if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
                    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
                    test_bit(i, lgr->rtokens_used_mask)) {
-                       conn->rtoken_idx = i;
+                       /* already in list */
+                       return i;
+               }
+       }
+       i = smc_rmb_reserve_rtoken_idx(lgr);
+       if (i < 0)
+               return i;
+       lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+       lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+       return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+       u32 rkey = ntohl(nw_rkey);
+       int i;
+
+       for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+               if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+                   test_bit(i, lgr->rtokens_used_mask)) {
+                       lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+                       lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+                       clear_bit(i, lgr->rtokens_used_mask);
                        return 0;
                }
        }
-       conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+       return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+                           struct smc_clc_msg_accept_confirm *clc)
+{
+       conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+                                         clc->rmb_rkey);
        if (conn->rtoken_idx < 0)
                return conn->rtoken_idx;
-       lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
-       lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
        return 0;
 }
index fe691bf9af91db175cce87be8a4aa710d143178e..07e2a393e6d98b672d4160ff1486c0bd6e43df4b 100644 (file)
@@ -32,6 +32,12 @@ enum smc_lgr_role {          /* possible roles of a link group */
        SMC_SERV        /* server */
 };
 
+enum smc_link_state {                  /* possible states of a link */
+       SMC_LNK_INACTIVE,       /* link is inactive */
+       SMC_LNK_ACTIVATING,     /* link is being activated */
+       SMC_LNK_ACTIVE          /* link is active */
+};
+
 #define SMC_WR_BUF_SIZE                48      /* size of work request buffer */
 
 struct smc_wr_buf {
@@ -87,8 +93,14 @@ struct smc_link {
        u8                      peer_mac[ETH_ALEN];     /* = gid[8:10||13:15] */
        u8                      peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
        u8                      link_id;        /* unique # within link group */
+
+       enum smc_link_state     state;          /* state of link */
        struct completion       llc_confirm;    /* wait for rx of conf link */
        struct completion       llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+       int                     llc_confirm_rc; /* rc from confirm link msg */
+       int                     llc_confirm_resp_rc; /* rc from conf_resp msg */
+       struct completion       llc_add;        /* wait for rx of add link */
+       struct completion       llc_add_resp;   /* wait for rx of add link rsp*/
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
@@ -124,7 +136,6 @@ struct smc_rtoken {                         /* address/key of remote RMB */
 struct smc_link_group {
        struct list_head        list;
        enum smc_lgr_role       role;           /* client or server */
-       __be32                  daddr;          /* destination ip address */
        struct smc_link         lnk[SMC_LINKS_PER_LGR_MAX];     /* smc link */
        char                    peer_systemid[SMC_SYSTEMID_LEN];
                                                /* unique system_id of peer */
@@ -186,10 +197,13 @@ struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
                            struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
index 2a8957bd6d3890debe7e3c58aec0cede68d104ee..26df554f7588d665b704438a014c693e18fc69ac 100644 (file)
@@ -23,6 +23,8 @@
 #include "smc_wr.h"
 #include "smc.h"
 
+#define SMC_MAX_CQE 32766      /* max. # of completion queue elements */
+
 #define SMC_QP_MIN_RNR_TIMER           5
 #define SMC_QP_TIMEOUT                 15 /* 4096 * 2 ** timeout usec */
 #define SMC_QP_RETRY_CNT                       7 /* 7: infinite */
@@ -438,9 +440,15 @@ int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
 {
        struct ib_cq_init_attr cqattr = {
-               .cqe = SMC_WR_MAX_CQE, .comp_vector = 0 };
+               .cqe = SMC_MAX_CQE, .comp_vector = 0 };
+       int cqe_size_order, smc_order;
        long rc;
 
+       /* the calculated number of cq entries fits to mlx5 cq allocation */
+       cqe_size_order = cache_line_size() == 128 ? 7 : 6;
+       smc_order = MAX_ORDER - cqe_size_order - 1;
+       if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE)
+               cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2;
        smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev,
                                              smc_wr_tx_cq_handler, NULL,
                                              smcibdev, &cqattr);
index 92fe4cc8c82c2f0e860c40e40e61e1fa1ccea427..ea4b21981b4bd13d510bbc452398e868db36cfbc 100644 (file)
@@ -4,9 +4,6 @@
  *
  *  Link Layer Control (LLC)
  *
- *  For now, we only support the necessary "confirm link" functionality
- *  which happens for the first RoCE link after successful CLC handshake.
- *
  *  Copyright IBM Corp. 2016
  *
  *  Author(s):  Klaus Wacker <Klaus.Wacker@de.ibm.com>
 #include "smc_clc.h"
 #include "smc_llc.h"
 
+#define SMC_LLC_DATA_LEN               40
+
+struct smc_llc_hdr {
+       struct smc_wr_rx_hdr common;
+       u8 length;      /* 44 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u8 reserved:4,
+          add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u8 add_link_rej_rsn:4,
+          reserved:4;
+#endif
+       u8 flags;
+};
+
+#define SMC_LLC_FLAG_NO_RMBE_EYEC      0x03
+
+struct smc_llc_msg_confirm_link {      /* type 0x01 */
+       struct smc_llc_hdr hd;
+       u8 sender_mac[ETH_ALEN];
+       u8 sender_gid[SMC_GID_SIZE];
+       u8 sender_qp_num[3];
+       u8 link_num;
+       u8 link_uid[SMC_LGR_ID_SIZE];
+       u8 max_links;
+       u8 reserved[9];
+};
+
+#define SMC_LLC_FLAG_ADD_LNK_REJ       0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH    1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS      2
+
+struct smc_llc_msg_add_link {          /* type 0x02 */
+       struct smc_llc_hdr hd;
+       u8 sender_mac[ETH_ALEN];
+       u8 reserved2[2];
+       u8 sender_gid[SMC_GID_SIZE];
+       u8 sender_qp_num[3];
+       u8 link_num;
+       u8 flags2;      /* QP mtu */
+       u8 initial_psn[3];
+       u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL      0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY  0x20
+
+struct smc_llc_msg_del_link {          /* type 0x04 */
+       struct smc_llc_hdr hd;
+       u8 link_num;
+       __be32 reason;
+       u8 reserved[35];
+} __packed;                    /* format defined in RFC7609 */
+
+struct smc_llc_msg_test_link {         /* type 0x07 */
+       struct smc_llc_hdr hd;
+       u8 user_data[16];
+       u8 reserved[24];
+};
+
+struct smc_rmb_rtoken {
+       union {
+               u8 num_rkeys;   /* first rtoken byte of CONFIRM LINK msg */
+                               /* is actually the num of rtokens, first */
+                               /* rtoken is always for the current link */
+               u8 link_id;     /* link id of the rtoken */
+       };
+       __be32 rmb_key;
+       __be64 rmb_vaddr;
+} __packed;                    /* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG  3
+
+struct smc_llc_msg_confirm_rkey {      /* type 0x06 */
+       struct smc_llc_hdr hd;
+       struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+       u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
+       struct smc_llc_hdr hd;
+       u8 num_rkeys;
+       struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX   8
+#define SMC_LLC_FLAG_RKEY_NEG  0x20
+
+struct smc_llc_msg_delete_rkey {       /* type 0x09 */
+       struct smc_llc_hdr hd;
+       u8 num_rkeys;
+       u8 err_mask;
+       u8 reserved[2];
+       __be32 rkey[8];
+       u8 reserved2[4];
+};
+
+union smc_llc_msg {
+       struct smc_llc_msg_confirm_link confirm_link;
+       struct smc_llc_msg_add_link add_link;
+       struct smc_llc_msg_del_link delete_link;
+
+       struct smc_llc_msg_confirm_rkey confirm_rkey;
+       struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+       struct smc_llc_msg_delete_rkey delete_rkey;
+
+       struct smc_llc_msg_test_link test_link;
+       struct {
+               struct smc_llc_hdr hdr;
+               u8 data[SMC_LLC_DATA_LEN];
+       } raw;
+};
+
+#define SMC_LLC_FLAG_RESP              0x80
+
 /********************************** send *************************************/
 
 struct smc_llc_tx_pend {
@@ -87,14 +200,112 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
        memset(confllc, 0, sizeof(*confllc));
        confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
        confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+       confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
        if (reqresp == SMC_LLC_RESP)
                confllc->hd.flags |= SMC_LLC_FLAG_RESP;
        memcpy(confllc->sender_mac, mac, ETH_ALEN);
        memcpy(confllc->sender_gid, gid, SMC_GID_SIZE);
        hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
-       /* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
+       confllc->link_num = link->link_id;
        memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
-       confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+       confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+                         union ib_gid *gid,
+                         enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_add_link *addllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       addllc = (struct smc_llc_msg_add_link *)wr_buf;
+       memset(addllc, 0, sizeof(*addllc));
+       addllc->hd.common.type = SMC_LLC_ADD_LINK;
+       addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+       if (reqresp == SMC_LLC_RESP) {
+               addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+               /* always reject more links for now */
+               addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+               addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+       }
+       memcpy(addllc->sender_mac, mac, ETH_ALEN);
+       memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+                            enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_del_link *delllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       delllc = (struct smc_llc_msg_del_link *)wr_buf;
+       memset(delllc, 0, sizeof(*delllc));
+       delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+       delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+       if (reqresp == SMC_LLC_RESP)
+               delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+       /* DEL_LINK_ALL because only 1 link supported */
+       delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+       delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+       delllc->link_num = link->link_id;
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+                          enum smc_llc_reqresp reqresp)
+{
+       struct smc_llc_msg_test_link *testllc;
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       testllc = (struct smc_llc_msg_test_link *)wr_buf;
+       memset(testllc, 0, sizeof(*testllc));
+       testllc->hd.common.type = SMC_LLC_TEST_LINK;
+       testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+       if (reqresp == SMC_LLC_RESP)
+               testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+       memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+       /* send llc message */
+       rc = smc_wr_tx_send(link, pend);
+       return rc;
+}
+
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+       struct smc_wr_tx_pend_priv *pend;
+       struct smc_wr_buf *wr_buf;
+       int rc;
+
+       rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+       if (rc)
+               return rc;
+       memcpy(wr_buf, llcbuf, llclen);
        /* send llc message */
        rc = smc_wr_tx_send(link, pend);
        return rc;
@@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
                                    struct smc_llc_msg_confirm_link *llc)
 {
        struct smc_link_group *lgr;
+       int conf_rc;
 
        lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+       /* RMBE eyecatchers are not supported */
+       if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+               conf_rc = 0;
+       else
+               conf_rc = ENOTSUPP;
+
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-               if (lgr->role == SMC_SERV)
+               if (lgr->role == SMC_SERV &&
+                   link->state == SMC_LNK_ACTIVATING) {
+                       link->llc_confirm_resp_rc = conf_rc;
                        complete(&link->llc_confirm_resp);
+               }
        } else {
-               if (lgr->role == SMC_CLNT) {
+               if (lgr->role == SMC_CLNT &&
+                   link->state == SMC_LNK_ACTIVATING) {
+                       link->llc_confirm_rc = conf_rc;
                        link->link_id = llc->link_num;
                        complete(&link->llc_confirm);
                }
        }
 }
 
+static void smc_llc_rx_add_link(struct smc_link *link,
+                               struct smc_llc_msg_add_link *llc)
+{
+       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+                                                 lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               if (link->state == SMC_LNK_ACTIVATING)
+                       complete(&link->llc_add_resp);
+       } else {
+               if (link->state == SMC_LNK_ACTIVATING) {
+                       complete(&link->llc_add);
+                       return;
+               }
+
+               if (lgr->role == SMC_SERV) {
+                       smc_llc_send_add_link(link,
+                                       link->smcibdev->mac[link->ibport - 1],
+                                       &link->smcibdev->gid[link->ibport - 1],
+                                       SMC_LLC_REQ);
+
+               } else {
+                       smc_llc_send_add_link(link,
+                                       link->smcibdev->mac[link->ibport - 1],
+                                       &link->smcibdev->gid[link->ibport - 1],
+                                       SMC_LLC_RESP);
+               }
+       }
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+                                  struct smc_llc_msg_del_link *llc)
+{
+       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+                                                 lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               if (lgr->role == SMC_SERV)
+                       smc_lgr_terminate(lgr);
+       } else {
+               if (lgr->role == SMC_SERV) {
+                       smc_lgr_forget(lgr);
+                       smc_llc_send_delete_link(link, SMC_LLC_REQ);
+               } else {
+                       smc_llc_send_delete_link(link, SMC_LLC_RESP);
+                       smc_lgr_terminate(lgr);
+               }
+       }
+}
+
+static void smc_llc_rx_test_link(struct smc_link *link,
+                                struct smc_llc_msg_test_link *llc)
+{
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+       }
+}
+
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+                                   struct smc_llc_msg_confirm_rkey *llc)
+{
+       struct smc_link_group *lgr;
+       int rc;
+
+       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               rc = smc_rtoken_add(lgr,
+                                   llc->rtoken[0].rmb_vaddr,
+                                   llc->rtoken[0].rmb_key);
+
+               /* ignore rtokens for other links, we have only one link */
+
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               if (rc < 0)
+                       llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+       }
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+                                     struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               /* ignore rtokens for other links, we have only one link */
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+       }
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+                                  struct smc_llc_msg_delete_rkey *llc)
+{
+       struct smc_link_group *lgr;
+       u8 err_mask = 0;
+       int i, max;
+
+       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+       if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+               /* unused as long as we don't send this type of msg */
+       } else {
+               max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+               for (i = 0; i < max; i++) {
+                       if (smc_rtoken_delete(lgr, llc->rkey[i]))
+                               err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+               }
+
+               if (err_mask) {
+                       llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+                       llc->err_mask = err_mask;
+               }
+
+               llc->hd.flags |= SMC_LLC_FLAG_RESP;
+               smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+       }
+}
+
 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 {
        struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
                return; /* short message */
        if (llc->raw.hdr.length != sizeof(*llc))
                return; /* invalid message */
-       if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+       switch (llc->raw.hdr.common.type) {
+       case SMC_LLC_TEST_LINK:
+               smc_llc_rx_test_link(link, &llc->test_link);
+               break;
+       case SMC_LLC_CONFIRM_LINK:
                smc_llc_rx_confirm_link(link, &llc->confirm_link);
+               break;
+       case SMC_LLC_ADD_LINK:
+               smc_llc_rx_add_link(link, &llc->add_link);
+               break;
+       case SMC_LLC_DELETE_LINK:
+               smc_llc_rx_delete_link(link, &llc->delete_link);
+               break;
+       case SMC_LLC_CONFIRM_RKEY:
+               smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+               break;
+       case SMC_LLC_CONFIRM_RKEY_CONT:
+               smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+               break;
+       case SMC_LLC_DELETE_RKEY:
+               smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+               break;
+       }
 }
 
 /***************************** init, exit, misc ******************************/
@@ -139,6 +509,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
                .handler        = smc_llc_rx_handler,
                .type           = SMC_LLC_CONFIRM_LINK
        },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_TEST_LINK
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_ADD_LINK
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_DELETE_LINK
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_CONFIRM_RKEY
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_CONFIRM_RKEY_CONT
+       },
+       {
+               .handler        = smc_llc_rx_handler,
+               .type           = SMC_LLC_DELETE_RKEY
+       },
        {
                .handler        = NULL,
        }
index 51b27ce90dbd25dfbbfb23a57e3253021f971bd9..e4a7d5e234d5d61cee281af9ac7e396df34d87f5 100644 (file)
@@ -18,6 +18,7 @@
 #define SMC_LLC_FLAG_RESP              0x80
 
 #define SMC_LLC_WAIT_FIRST_TIME                (5 * HZ)
+#define SMC_LLC_WAIT_TIME              (2 * HZ)
 
 enum smc_llc_reqresp {
        SMC_LLC_REQ,
@@ -26,39 +27,23 @@ enum smc_llc_reqresp {
 
 enum smc_llc_msg_type {
        SMC_LLC_CONFIRM_LINK            = 0x01,
-};
-
-#define SMC_LLC_DATA_LEN               40
-
-struct smc_llc_hdr {
-       struct smc_wr_rx_hdr common;
-       u8 length;      /* 44 */
-       u8 reserved;
-       u8 flags;
-};
-
-struct smc_llc_msg_confirm_link {      /* type 0x01 */
-       struct smc_llc_hdr hd;
-       u8 sender_mac[ETH_ALEN];
-       u8 sender_gid[SMC_GID_SIZE];
-       u8 sender_qp_num[3];
-       u8 link_num;
-       u8 link_uid[SMC_LGR_ID_SIZE];
-       u8 max_links;
-       u8 reserved[9];
-};
-
-union smc_llc_msg {
-       struct smc_llc_msg_confirm_link confirm_link;
-       struct {
-               struct smc_llc_hdr hdr;
-               u8 data[SMC_LLC_DATA_LEN];
-       } raw;
+       SMC_LLC_ADD_LINK                = 0x02,
+       SMC_LLC_DELETE_LINK             = 0x04,
+       SMC_LLC_CONFIRM_RKEY            = 0x06,
+       SMC_LLC_TEST_LINK               = 0x07,
+       SMC_LLC_CONFIRM_RKEY_CONT       = 0x08,
+       SMC_LLC_DELETE_RKEY             = 0x09,
 };
 
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
                              enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+                         enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+                            enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+                          enum smc_llc_reqresp reqresp);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
index ef0c3494c9cb57249444337906fd3cb6f5e62a91..210bec3c3ebeca5727197cb8066f996c667a2aff 100644 (file)
@@ -19,7 +19,6 @@
 #include "smc.h"
 #include "smc_core.h"
 
-#define SMC_WR_MAX_CQE 32768   /* max. # of completion queue elements */
 #define SMC_WR_BUF_CNT 16      /* # of ctrl buffers per link */
 
 #define SMC_WR_TX_WAIT_FREE_SLOT_TIME  (10 * HZ)
index ab58e57c09caeaf0e26e03aefc83594ac7d61938..3d1948d27a25585b3377e4f4595bf77297a77e63 100644 (file)
@@ -233,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
        return __put_user(klen, ulen);
 }
 
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
@@ -2289,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
        if (!sock)
                return err;
 
-       err = sock_error(sock->sk);
-       if (err) {
-               datagrams = err;
-               goto out_put;
+       if (likely(!(flags & MSG_ERRQUEUE))) {
+               err = sock_error(sock->sk);
+               if (err) {
+                       datagrams = err;
+                       goto out_put;
+               }
        }
 
        entry = mmsg;
@@ -2588,6 +2590,11 @@ void sock_unregister(int family)
 }
 EXPORT_SYMBOL(sock_unregister);
 
+bool sock_is_registered(int family)
+{
+       return family < NPROTO && rcu_access_pointer(net_families[family]);
+}
+
 static int __init sock_init(void)
 {
        int err;
index c25a3a149dc4e6d50b20f2ba3ffc5a77d213fdde..e450212121d27f7e6ebddb893ba0856ca2376464 100644 (file)
@@ -34,3 +34,11 @@ config TIPC_MEDIA_UDP
          Saying Y here will enable support for running TIPC over IP/UDP
        bool
        default y
+
+config TIPC_DIAG
+       tristate "TIPC: socket monitoring interface"
+       depends on TIPC
+       default y
+       ---help---
+       Support for TIPC socket monitoring interface used by ss tool.
+       If unsure, say Y.
index 1edb7192aa2f6e2a124bbb825fca924cf2e43484..aca168f2abb182561c030ae9d9b52c04fca21112 100644 (file)
@@ -14,3 +14,8 @@ tipc-y        += addr.o bcast.o bearer.o \
 tipc-$(CONFIG_TIPC_MEDIA_UDP)  += udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)   += ib_media.o
 tipc-$(CONFIG_SYSCTL)          += sysctl.o
+
+
+obj-$(CONFIG_TIPC_DIAG)        += diag.o
+
+tipc_diag-y    := diag.o
index 48fd3b5a73fbaf934178c444cbba07aa1a0f5b8c..b88d48d009130985db69993bc115c8cae80a71a9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/addr.c: TIPC address utility routines
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/kernel.h>
 #include "addr.h"
 #include "core.h"
 
-/**
- * in_own_cluster - test for cluster inclusion; <0.0.0> always matches
- */
-int in_own_cluster(struct net *net, u32 addr)
-{
-       return in_own_cluster_exact(net, addr) || !addr;
-}
-
-int in_own_cluster_exact(struct net *net, u32 addr)
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-       return !((addr ^ tn->own_addr) >> 12);
+       if (!domain || (domain == addr))
+               return true;
+       if (!legacy_format)
+               return false;
+       if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
+               return true;
+       if (domain == (addr & TIPC_ZONE_CLUSTER_MASK)) /* domain <Z.C.0> */
+               return true;
+       if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */
+               return true;
+       return false;
 }
 
-/**
- * in_own_node - test for node inclusion; <0.0.0> always matches
- */
-int in_own_node(struct net *net, u32 addr)
+void tipc_set_node_id(struct net *net, u8 *id)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct tipc_net *tn = tipc_net(net);
+       u32 *tmp = (u32 *)id;
 
-       return (addr == tn->own_addr) || !addr;
+       memcpy(tn->node_id, id, NODE_ID_LEN);
+       tipc_nodeid2string(tn->node_id_string, id);
+       tn->trial_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3];
+       pr_info("Own node identity %s, cluster identity %u\n",
+               tipc_own_id_string(net), tn->net_id);
 }
 
-/**
- * addr_domain - convert 2-bit scope value to equivalent message lookup domain
- *
- * Needed when address of a named message must be looked up a second time
- * after a network hop.
- */
-u32 addr_domain(struct net *net, u32 sc)
+void tipc_set_node_addr(struct net *net, u32 addr)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct tipc_net *tn = tipc_net(net);
+       u8 node_id[NODE_ID_LEN] = {0,};
 
-       if (likely(sc == TIPC_NODE_SCOPE))
-               return tn->own_addr;
-       if (sc == TIPC_CLUSTER_SCOPE)
-               return tipc_cluster_mask(tn->own_addr);
-       return tipc_zone_mask(tn->own_addr);
+       tn->node_addr = addr;
+       if (!tipc_own_id(net)) {
+               sprintf(node_id, "%x", addr);
+               tipc_set_node_id(net, node_id);
+       }
+       tn->trial_addr = addr;
+       pr_info("32-bit node address hash set to %x\n", addr);
 }
 
-/**
- * tipc_addr_domain_valid - validates a network domain address
- *
- * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>,
- * where Z, C, and N are non-zero.
- *
- * Returns 1 if domain address is valid, otherwise 0
- */
-int tipc_addr_domain_valid(u32 addr)
+char *tipc_nodeid2string(char *str, u8 *id)
 {
-       u32 n = tipc_node(addr);
-       u32 c = tipc_cluster(addr);
-       u32 z = tipc_zone(addr);
-
-       if (n && (!z || !c))
-               return 0;
-       if (c && !z)
-               return 0;
-       return 1;
-}
+       int i;
+       u8 c;
 
-/**
- * tipc_addr_node_valid - validates a proposed network address for this node
- *
- * Accepts <Z.C.N>, where Z, C, and N are non-zero.
- *
- * Returns 1 if address can be used, otherwise 0
- */
-int tipc_addr_node_valid(u32 addr)
-{
-       return tipc_addr_domain_valid(addr) && tipc_node(addr);
-}
+       /* Already a string ? */
+       for (i = 0; i < NODE_ID_LEN; i++) {
+               c = id[i];
+               if (c >= '0' && c <= '9')
+                       continue;
+               if (c >= 'A' && c <= 'Z')
+                       continue;
+               if (c >= 'a' && c <= 'z')
+                       continue;
+               if (c == '.')
+                       continue;
+               if (c == ':')
+                       continue;
+               if (c == '_')
+                       continue;
+               if (c == '-')
+                       continue;
+               if (c == '@')
+                       continue;
+               if (c != 0)
+                       break;
+       }
+       if (i == NODE_ID_LEN) {
+               memcpy(str, id, NODE_ID_LEN);
+               str[NODE_ID_LEN] = 0;
+               return str;
+       }
 
-int tipc_in_scope(u32 domain, u32 addr)
-{
-       if (!domain || (domain == addr))
-               return 1;
-       if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
-               return 1;
-       if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */
-               return 1;
-       return 0;
-}
+       /* Translate to hex string */
+       for (i = 0; i < NODE_ID_LEN; i++)
+               sprintf(&str[2 * i], "%02x", id[i]);
 
-/**
- * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
- */
-int tipc_addr_scope(u32 domain)
-{
-       if (likely(!domain))
-               return TIPC_ZONE_SCOPE;
-       if (tipc_node(domain))
-               return TIPC_NODE_SCOPE;
-       if (tipc_cluster(domain))
-               return TIPC_CLUSTER_SCOPE;
-       return TIPC_ZONE_SCOPE;
-}
+       /* Strip off trailing zeroes */
+       for (i = NODE_ID_STR_LEN - 2; str[i] == '0'; i--)
+               str[i] = 0;
 
-char *tipc_addr_string_fill(char *string, u32 addr)
-{
-       snprintf(string, 16, "<%u.%u.%u>",
-                tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
-       return string;
+       return str;
 }
index bebb347803ce8d1d1eab6fafae533833512d9ef1..31bee0ea7b3e4c778a6d83a551245c1b9270f9f4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/addr.h: Include file for TIPC address utility routines
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, Wind River Systems
  * All rights reserved.
  *
 
 static inline u32 tipc_own_addr(struct net *net)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       return tipc_net(net)->node_addr;
+}
+
+static inline u8 *tipc_own_id(struct net *net)
+{
+       struct tipc_net *tn = tipc_net(net);
 
-       return tn->own_addr;
+       if (!strlen(tn->node_id_string))
+               return NULL;
+       return tn->node_id;
 }
 
-static inline u32 tipc_zone_mask(u32 addr)
+static inline char *tipc_own_id_string(struct net *net)
 {
-       return addr & TIPC_ZONE_MASK;
+       return tipc_net(net)->node_id_string;
 }
 
 static inline u32 tipc_cluster_mask(u32 addr)
@@ -60,15 +67,25 @@ static inline u32 tipc_cluster_mask(u32 addr)
        return addr & TIPC_ZONE_CLUSTER_MASK;
 }
 
-u32 tipc_own_addr(struct net *net);
-int in_own_cluster(struct net *net, u32 addr);
-int in_own_cluster_exact(struct net *net, u32 addr);
-int in_own_node(struct net *net, u32 addr);
-u32 addr_domain(struct net *net, u32 sc);
-int tipc_addr_domain_valid(u32);
-int tipc_addr_node_valid(u32 addr);
-int tipc_in_scope(u32 domain, u32 addr);
-int tipc_addr_scope(u32 domain);
-char *tipc_addr_string_fill(char *string, u32 addr);
+static inline int tipc_node2scope(u32 node)
+{
+       return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE;
+}
+
+static inline int tipc_scope2node(struct net *net, int sc)
+{
+       return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net);
+}
+
+static inline int in_own_node(struct net *net, u32 addr)
+{
+       return addr == tipc_own_addr(net) || !addr;
+}
+
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr);
+void tipc_set_node_id(struct net *net, u8 *id);
+void tipc_set_node_addr(struct net *net, u32 addr);
+char *tipc_nodeid2string(char *str, u8 *id);
+u32 tipc_node_id2hash(u8 *id128);
 
 #endif
index 37892b3909afc110990c11f5a12197ad506b3a02..f3711176be4559e5707d4c75d130a7907e35afe2 100644 (file)
@@ -574,5 +574,5 @@ void tipc_nlist_purge(struct tipc_nlist *nl)
 {
        tipc_dest_list_purge(&nl->list);
        nl->remote = 0;
-       nl->local = 0;
+       nl->local = false;
 }
index f3d2e83313e1d0c687a61cd67e50e84952e24a80..f7d47c89d6581d1ac9836e1669ff67b4faf017c9 100644 (file)
@@ -210,7 +210,7 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
        rcu_read_lock();
        b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
        if (b)
-               tipc_disc_add_dest(b->link_req);
+               tipc_disc_add_dest(b->disc);
        rcu_read_unlock();
 }
 
@@ -222,7 +222,7 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
        rcu_read_lock();
        b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
        if (b)
-               tipc_disc_remove_dest(b->link_req);
+               tipc_disc_remove_dest(b->disc);
        rcu_read_unlock();
 }
 
@@ -230,88 +230,67 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
  * tipc_enable_bearer - enable bearer with the given name
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
-                             u32 disc_domain, u32 priority,
+                             u32 disc_domain, u32 prio,
                              struct nlattr *attr[])
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct tipc_net *tn = tipc_net(net);
+       struct tipc_bearer_names b_names;
+       int with_this_prio = 1;
        struct tipc_bearer *b;
        struct tipc_media *m;
-       struct tipc_bearer_names b_names;
        struct sk_buff *skb;
-       char addr_string[16];
-       u32 bearer_id;
-       u32 with_this_prio;
-       u32 i;
+       int bearer_id = 0;
        int res = -EINVAL;
+       char *errstr = "";
 
-       if (!tn->own_addr) {
-               pr_warn("Bearer <%s> rejected, not supported in standalone mode\n",
-                       name);
-               return -ENOPROTOOPT;
-       }
        if (!bearer_name_validate(name, &b_names)) {
-               pr_warn("Bearer <%s> rejected, illegal name\n", name);
-               return -EINVAL;
-       }
-       if (tipc_addr_domain_valid(disc_domain) &&
-           (disc_domain != tn->own_addr)) {
-               if (tipc_in_scope(disc_domain, tn->own_addr)) {
-                       disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
-                       res = 0;   /* accept any node in own cluster */
-               } else if (in_own_cluster_exact(net, disc_domain))
-                       res = 0;   /* accept specified node in own cluster */
+               errstr = "illegal name";
+               goto rejected;
        }
-       if (res) {
-               pr_warn("Bearer <%s> rejected, illegal discovery domain\n",
-                       name);
-               return -EINVAL;
-       }
-       if ((priority > TIPC_MAX_LINK_PRI) &&
-           (priority != TIPC_MEDIA_LINK_PRI)) {
-               pr_warn("Bearer <%s> rejected, illegal priority\n", name);
-               return -EINVAL;
+
+       if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
+               errstr = "illegal priority";
+               goto rejected;
        }
 
        m = tipc_media_find(b_names.media_name);
        if (!m) {
-               pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
-                       name, b_names.media_name);
-               return -EINVAL;
+               errstr = "media not registered";
+               goto rejected;
        }
 
-       if (priority == TIPC_MEDIA_LINK_PRI)
-               priority = m->priority;
+       if (prio == TIPC_MEDIA_LINK_PRI)
+               prio = m->priority;
 
-restart:
-       bearer_id = MAX_BEARERS;
-       with_this_prio = 1;
-       for (i = MAX_BEARERS; i-- != 0; ) {
-               b = rtnl_dereference(tn->bearer_list[i]);
-               if (!b) {
-                       bearer_id = i;
-                       continue;
-               }
+       /* Check new bearer vs existing ones and find free bearer id if any */
+       while (bearer_id < MAX_BEARERS) {
+               b = rtnl_dereference(tn->bearer_list[bearer_id]);
+               if (!b)
+                       break;
                if (!strcmp(name, b->name)) {
-                       pr_warn("Bearer <%s> rejected, already enabled\n",
-                               name);
-                       return -EINVAL;
+                       errstr = "already enabled";
+                       goto rejected;
                }
-               if ((b->priority == priority) &&
-                   (++with_this_prio > 2)) {
-                       if (priority-- == 0) {
-                               pr_warn("Bearer <%s> rejected, duplicate priority\n",
-                                       name);
-                               return -EINVAL;
-                       }
-                       pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
-                               name, priority + 1, priority);
-                       goto restart;
+               bearer_id++;
+               if (b->priority != prio)
+                       continue;
+               if (++with_this_prio <= 2)
+                       continue;
+               pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
+                       name, prio);
+               if (prio == TIPC_MIN_LINK_PRI) {
+                       errstr = "cannot adjust to lower";
+                       goto rejected;
                }
+               pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
+               prio--;
+               bearer_id = 0;
+               with_this_prio = 1;
        }
+
        if (bearer_id >= MAX_BEARERS) {
-               pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
-                       name, MAX_BEARERS);
-               return -EINVAL;
+               errstr = "max 3 bearers permitted";
+               goto rejected;
        }
 
        b = kzalloc(sizeof(*b), GFP_ATOMIC);
@@ -322,10 +301,9 @@ static int tipc_enable_bearer(struct net *net, const char *name,
        b->media = m;
        res = m->enable_media(net, b, attr);
        if (res) {
-               pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
-                       name, -res);
                kfree(b);
-               return -EINVAL;
+               errstr = "failed to enable media";
+               goto rejected;
        }
 
        b->identity = bearer_id;
@@ -333,15 +311,15 @@ static int tipc_enable_bearer(struct net *net, const char *name,
        b->window = m->window;
        b->domain = disc_domain;
        b->net_plane = bearer_id + 'A';
-       b->priority = priority;
+       b->priority = prio;
        test_and_set_bit_lock(0, &b->up);
 
        res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
        if (res) {
                bearer_disable(net, b);
-               pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
-                       name);
-               return -EINVAL;
+               kfree(b);
+               errstr = "failed to create discoverer";
+               goto rejected;
        }
 
        rcu_assign_pointer(tn->bearer_list[bearer_id], b);
@@ -353,9 +331,11 @@ static int tipc_enable_bearer(struct net *net, const char *name,
                return -ENOMEM;
        }
 
-       pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
-               name,
-               tipc_addr_string_fill(addr_string, disc_domain), priority);
+       pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
+
+       return res;
+rejected:
+       pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr);
        return res;
 }
 
@@ -385,8 +365,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
        tipc_node_delete_links(net, bearer_id);
        b->media->disable_media(b);
        RCU_INIT_POINTER(b->media_ptr, NULL);
-       if (b->link_req)
-               tipc_disc_delete(b->link_req);
+       if (b->disc)
+               tipc_disc_delete(b->disc);
        RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
        kfree_rcu(b, rcu);
        tipc_mon_delete(net, bearer_id);
@@ -395,11 +375,13 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
                         struct nlattr *attr[])
 {
+       char *dev_name = strchr((const char *)b->name, ':') + 1;
+       int hwaddr_len = b->media->hwaddr_len;
+       u8 node_id[NODE_ID_LEN] = {0,};
        struct net_device *dev;
-       char *driver_name = strchr((const char *)b->name, ':') + 1;
 
        /* Find device with specified name */
-       dev = dev_get_by_name(net, driver_name);
+       dev = dev_get_by_name(net, dev_name);
        if (!dev)
                return -ENODEV;
        if (tipc_mtu_bad(dev, 0)) {
@@ -407,6 +389,16 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
                return -EINVAL;
        }
 
+       /* Autoconfigure own node identity if needed */
+       if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) {
+               memcpy(node_id, dev->dev_addr, hwaddr_len);
+               tipc_net_init(net, node_id, 0);
+       }
+       if (!tipc_own_id(net)) {
+               pr_warn("Failed to obtain node identity\n");
+               return -EINVAL;
+       }
+
        /* Associate TIPC bearer with L2 bearer */
        rcu_assign_pointer(b->media_ptr, dev);
        b->pt.dev = dev;
@@ -414,7 +406,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
        b->pt.func = tipc_l2_rcv_msg;
        dev_add_pack(&b->pt);
        memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
-       memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
+       memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len);
        b->bcast_addr.media_id = b->media->type_id;
        b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
        b->mtu = dev->mtu;
@@ -861,12 +853,10 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
        char *bearer;
        struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
        struct net *net = sock_net(skb->sk);
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-       u32 domain;
+       u32 domain = 0;
        u32 prio;
 
        prio = TIPC_MEDIA_LINK_PRI;
-       domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
 
        if (!info->attrs[TIPC_NLA_BEARER])
                return -EINVAL;
index a53613d95bc9fee54bb8762d10b97ce3118d5d29..6efcee63a3819cf13b15704c870b05b7c5745990 100644 (file)
@@ -159,7 +159,7 @@ struct tipc_bearer {
        u32 tolerance;
        u32 domain;
        u32 identity;
-       struct tipc_link_req *link_req;
+       struct tipc_discoverer *disc;
        char net_plane;
        unsigned long up;
 };
index 0b982d048fb9b36c8bec876c32361f264e5dac9e..52dfc51ac4d5716ba7e3b9fe52bafbd649e79dc9 100644 (file)
@@ -56,7 +56,11 @@ static int __net_init tipc_init_net(struct net *net)
        int err;
 
        tn->net_id = 4711;
-       tn->own_addr = 0;
+       tn->node_addr = 0;
+       tn->trial_addr = 0;
+       tn->addr_trial_end = 0;
+       memset(tn->node_id, 0, sizeof(tn->node_id));
+       memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
        tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
        get_random_bytes(&tn->random, sizeof(int));
        INIT_LIST_HEAD(&tn->node_list);
@@ -105,6 +109,7 @@ static struct pernet_operations tipc_net_ops = {
        .exit = tipc_exit_net,
        .id   = &tipc_net_id,
        .size = sizeof(struct tipc_net),
+       .async = true,
 };
 
 static int __init tipc_init(void)
index ff8b071654f508ecfb941df25e73632830b7c2c0..d0f64ca62d02439ff86c5b15a26f033ca5c80489 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/core.h: Include file for TIPC global declarations
  *
- * Copyright (c) 2005-2006, 2013 Ericsson AB
+ * Copyright (c) 2005-2006, 2013-2018 Ericsson AB
  * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
  * All rights reserved.
  *
@@ -72,15 +72,22 @@ struct tipc_monitor;
 #define NODE_HTABLE_SIZE       512
 #define MAX_BEARERS             3
 #define TIPC_DEF_MON_THRESHOLD  32
+#define NODE_ID_LEN             16
+#define NODE_ID_STR_LEN        (NODE_ID_LEN * 2 + 1)
 
 extern unsigned int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
 
 struct tipc_net {
-       u32 own_addr;
+       u8  node_id[NODE_ID_LEN];
+       u32 node_addr;
+       u32 trial_addr;
+       unsigned long addr_trial_end;
+       char node_id_string[NODE_ID_STR_LEN];
        int net_id;
        int random;
+       bool legacy_addr_format;
 
        /* Node table and node list */
        spinlock_t node_list_lock;
@@ -131,6 +138,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
        return &tipc_net(net)->node_list;
 }
 
+static inline struct name_table *tipc_name_table(struct net *net)
+{
+       return tipc_net(net)->nametbl;
+}
+
 static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
 {
        return tipc_net(net)->topsrv;
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
new file mode 100644 (file)
index 0000000..46d9cd6
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * net/tipc/diag.c: TIPC socket diag
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "socket.h"
+#include <linux/sock_diag.h>
+#include <linux/tipc_sockets_diag.h>
+
+static u64 __tipc_diag_gen_cookie(struct sock *sk)
+{
+       u32 res[2];
+
+       sock_diag_save_cookie(sk, res);
+       return *((u64 *)res);
+}
+
+static int __tipc_add_sock_diag(struct sk_buff *skb,
+                               struct netlink_callback *cb,
+                               struct tipc_sock *tsk)
+{
+       struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh);
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0,
+                              NLM_F_MULTI);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       err = tipc_sk_fill_sock_diag(skb, tsk, req->tidiag_states,
+                                    __tipc_diag_gen_cookie);
+       if (err)
+               return err;
+
+       nlmsg_end(skb, nlh);
+       return 0;
+}
+
+static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag);
+}
+
+static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
+                                      struct nlmsghdr *h)
+{
+       int hdrlen = sizeof(struct tipc_sock_diag_req);
+       struct net *net = sock_net(skb->sk);
+
+       if (nlmsg_len(h) < hdrlen)
+               return -EINVAL;
+
+       if (h->nlmsg_flags & NLM_F_DUMP) {
+               struct netlink_dump_control c = {
+                       .dump = tipc_diag_dump,
+               };
+               netlink_dump_start(net->diag_nlsk, skb, h, &c);
+               return 0;
+       }
+       return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler tipc_sock_diag_handler = {
+       .family = AF_TIPC,
+       .dump = tipc_sock_diag_handler_dump,
+};
+
+static int __init tipc_diag_init(void)
+{
+       return sock_diag_register(&tipc_sock_diag_handler);
+}
+
+static void __exit tipc_diag_exit(void)
+{
+       sock_diag_unregister(&tipc_sock_diag_handler);
+}
+
+module_init(tipc_diag_init);
+module_exit(tipc_diag_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC);
index 92e4828c6b09d5bddcfbbec634a5dba6dfbc08de..9f666e0650e23c0d4275ae219c23c5e301df5ac4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/discover.c
  *
- * Copyright (c) 2003-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2003-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
  * All rights reserved.
  *
 #include "discover.h"
 
 /* min delay during bearer start up */
-#define TIPC_LINK_REQ_INIT     msecs_to_jiffies(125)
+#define TIPC_DISC_INIT msecs_to_jiffies(125)
 /* max delay if bearer has no links */
-#define TIPC_LINK_REQ_FAST     msecs_to_jiffies(1000)
+#define TIPC_DISC_FAST msecs_to_jiffies(1000)
 /* max delay if bearer has links */
-#define TIPC_LINK_REQ_SLOW     msecs_to_jiffies(60000)
+#define TIPC_DISC_SLOW msecs_to_jiffies(60000)
 /* indicates no timer in use */
-#define TIPC_LINK_REQ_INACTIVE 0xffffffff
+#define TIPC_DISC_INACTIVE     0xffffffff
 
 /**
- * struct tipc_link_req - information about an ongoing link setup request
+ * struct tipc_discoverer - information about an ongoing link setup request
  * @bearer_id: identity of bearer issuing requests
  * @net: network namespace instance
  * @dest: destination address for request messages
  * @domain: network domain to which links can be established
  * @num_nodes: number of nodes currently discovered (i.e. with an active link)
  * @lock: spinlock for controlling access to requests
- * @buf: request message to be (repeatedly) sent
+ * @skb: request message to be (repeatedly) sent
  * @timer: timer governing period between requests
  * @timer_intv: current interval between requests (in ms)
  */
-struct tipc_link_req {
+struct tipc_discoverer {
        u32 bearer_id;
        struct tipc_media_addr dest;
        struct net *net;
        u32 domain;
        int num_nodes;
        spinlock_t lock;
-       struct sk_buff *buf;
+       struct sk_buff *skb;
        struct timer_list timer;
        unsigned long timer_intv;
 };
@@ -77,22 +77,42 @@ struct tipc_link_req {
  * @type: message type (request or response)
  * @b: ptr to bearer issuing message
  */
-static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
-                              struct tipc_bearer *b)
+static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
+                              u32 mtyp,  struct tipc_bearer *b)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-       struct tipc_msg *msg;
+       struct tipc_net *tn = tipc_net(net);
        u32 dest_domain = b->domain;
+       struct tipc_msg *hdr;
 
-       msg = buf_msg(buf);
-       tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type,
+       hdr = buf_msg(skb);
+       tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp,
                      MAX_H_SIZE, dest_domain);
-       msg_set_non_seq(msg, 1);
-       msg_set_node_sig(msg, tn->random);
-       msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES);
-       msg_set_dest_domain(msg, dest_domain);
-       msg_set_bc_netid(msg, tn->net_id);
-       b->media->addr2msg(msg_media_addr(msg), &b->addr);
+       msg_set_size(hdr, MAX_H_SIZE + NODE_ID_LEN);
+       msg_set_non_seq(hdr, 1);
+       msg_set_node_sig(hdr, tn->random);
+       msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES);
+       msg_set_dest_domain(hdr, dest_domain);
+       msg_set_bc_netid(hdr, tn->net_id);
+       b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+       msg_set_node_id(hdr, tipc_own_id(net));
+}
+
+static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst,
+                              u32 src, u32 sugg_addr,
+                              struct tipc_media_addr *maddr,
+                              struct tipc_bearer *b)
+{
+       struct tipc_msg *hdr;
+       struct sk_buff *skb;
+
+       skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
+       if (!skb)
+               return;
+       hdr = buf_msg(skb);
+       tipc_disc_init_msg(net, skb, mtyp, b);
+       msg_set_sugg_node_addr(hdr, sugg_addr);
+       msg_set_dest_domain(hdr, dst);
+       tipc_bearer_xmit_skb(net, b->identity, skb, maddr);
 }
 
 /**
@@ -104,161 +124,207 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
 static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
                            struct tipc_media_addr *media_addr)
 {
-       char node_addr_str[16];
        char media_addr_str[64];
 
-       tipc_addr_string_fill(node_addr_str, node_addr);
        tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str),
                               media_addr);
-       pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str,
+       pr_warn("Duplicate %x using %s seen on <%s>\n", node_addr,
                media_addr_str, b->name);
 }
 
+/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
+ */
+static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
+                                    struct tipc_media_addr *maddr,
+                                    struct tipc_bearer *b,
+                                    u32 dst, u32 src,
+                                    u32 sugg_addr,
+                                    u8 *peer_id,
+                                    int mtyp)
+{
+       struct net *net = d->net;
+       struct tipc_net *tn = tipc_net(net);
+       bool trial = time_before(jiffies, tn->addr_trial_end);
+       u32 self = tipc_own_addr(net);
+
+       if (mtyp == DSC_TRIAL_FAIL_MSG) {
+               if (!trial)
+                       return true;
+
+               /* Ignore if somebody else already gave new suggestion */
+               if (dst != tn->trial_addr)
+                       return true;
+
+               /* Otherwise update trial address and restart trial period */
+               tn->trial_addr = sugg_addr;
+               msg_set_prevnode(buf_msg(d->skb), sugg_addr);
+               tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+               return true;
+       }
+
+       /* Apply trial address if we just left trial period */
+       if (!trial && !self) {
+               tipc_net_finalize(net, tn->trial_addr);
+               msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+       }
+
+       if (mtyp != DSC_TRIAL_MSG)
+               return false;
+
+       sugg_addr = tipc_node_try_addr(net, peer_id, src);
+       if (sugg_addr)
+               tipc_disc_msg_xmit(net, DSC_TRIAL_FAIL_MSG, src,
+                                  self, sugg_addr, maddr, b);
+       return true;
+}
+
 /**
  * tipc_disc_rcv - handle incoming discovery message (request or response)
- * @net: the applicable net namespace
- * @buf: buffer containing message
- * @bearer: bearer that message arrived on
+ * @net: applicable net namespace
+ * @skb: buffer containing message
+ * @b: bearer that message arrived on
  */
 void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
-                  struct tipc_bearer *bearer)
+                  struct tipc_bearer *b)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-       struct tipc_media_addr maddr;
-       struct sk_buff *rskb;
+       struct tipc_net *tn = tipc_net(net);
        struct tipc_msg *hdr = buf_msg(skb);
-       u32 ddom = msg_dest_domain(hdr);
-       u32 onode = msg_prevnode(hdr);
+       u16 caps = msg_node_capabilities(hdr);
+       bool legacy = tn->legacy_addr_format;
+       u32 sugg = msg_sugg_node_addr(hdr);
+       u32 signature = msg_node_sig(hdr);
+       u8 peer_id[NODE_ID_LEN] = {0,};
+       u32 dst = msg_dest_domain(hdr);
        u32 net_id = msg_bc_netid(hdr);
+       struct tipc_media_addr maddr;
+       u32 src = msg_prevnode(hdr);
        u32 mtyp = msg_type(hdr);
-       u32 signature = msg_node_sig(hdr);
-       u16 caps = msg_node_capabilities(hdr);
-       bool respond = false;
        bool dupl_addr = false;
+       bool respond = false;
+       u32 self;
        int err;
 
-       err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
-       kfree_skb(skb);
-       if (err)
-               return;
+       skb_linearize(skb);
+       hdr = buf_msg(skb);
 
-       /* Ensure message from node is valid and communication is permitted */
-       if (net_id != tn->net_id)
+       if (caps & TIPC_NODE_ID128)
+               memcpy(peer_id, msg_node_id(hdr), NODE_ID_LEN);
+       else
+               sprintf(peer_id, "%x", src);
+
+       err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr));
+       kfree_skb(skb);
+       if (err || maddr.broadcast) {
+               pr_warn_ratelimited("Rcv corrupt discovery message\n");
                return;
-       if (maddr.broadcast)
+       }
+       /* Ignore discovery messages from own node */
+       if (!memcmp(&maddr, &b->addr, sizeof(maddr)))
                return;
-       if (!tipc_addr_domain_valid(ddom))
+       if (net_id != tn->net_id)
                return;
-       if (!tipc_addr_node_valid(onode))
+       if (tipc_disc_addr_trial_msg(b->disc, &maddr, b, dst,
+                                    src, sugg, peer_id, mtyp))
                return;
+       self = tipc_own_addr(net);
 
-       if (in_own_node(net, onode)) {
-               if (memcmp(&maddr, &bearer->addr, sizeof(maddr)))
-                       disc_dupl_alert(bearer, tn->own_addr, &maddr);
+       /* Message from somebody using this node's address */
+       if (in_own_node(net, src)) {
+               disc_dupl_alert(b, self, &maddr);
                return;
        }
-       if (!tipc_in_scope(ddom, tn->own_addr))
+       if (!tipc_in_scope(legacy, dst, self))
                return;
-       if (!tipc_in_scope(bearer->domain, onode))
+       if (!tipc_in_scope(legacy, b->domain, src))
                return;
-
-       tipc_node_check_dest(net, onode, bearer, caps, signature,
+       tipc_node_check_dest(net, src, peer_id, b, caps, signature,
                             &maddr, &respond, &dupl_addr);
        if (dupl_addr)
-               disc_dupl_alert(bearer, onode, &maddr);
-
-       /* Send response, if necessary */
-       if (respond && (mtyp == DSC_REQ_MSG)) {
-               rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
-               if (!rskb)
-                       return;
-               tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
-               tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr);
-       }
+               disc_dupl_alert(b, src, &maddr);
+       if (!respond)
+               return;
+       if (mtyp != DSC_REQ_MSG)
+               return;
+       tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, 0, &maddr, b);
 }
 
-/**
- * disc_update - update frequency of periodic link setup requests
- * @req: ptr to link request structure
- *
- * Reinitiates discovery process if discovery object has no associated nodes
- * and is either not currently searching or is searching at a slow rate
+/* tipc_disc_add_dest - increment set of discovered nodes
  */
-static void disc_update(struct tipc_link_req *req)
+void tipc_disc_add_dest(struct tipc_discoverer *d)
 {
-       if (!req->num_nodes) {
-               if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) ||
-                   (req->timer_intv > TIPC_LINK_REQ_FAST)) {
-                       req->timer_intv = TIPC_LINK_REQ_INIT;
-                       mod_timer(&req->timer, jiffies + req->timer_intv);
-               }
-       }
+       spin_lock_bh(&d->lock);
+       d->num_nodes++;
+       spin_unlock_bh(&d->lock);
 }
 
-/**
- * tipc_disc_add_dest - increment set of discovered nodes
- * @req: ptr to link request structure
+/* tipc_disc_remove_dest - decrement set of discovered nodes
  */
-void tipc_disc_add_dest(struct tipc_link_req *req)
+void tipc_disc_remove_dest(struct tipc_discoverer *d)
 {
-       spin_lock_bh(&req->lock);
-       req->num_nodes++;
-       spin_unlock_bh(&req->lock);
-}
+       int intv, num;
 
-/**
- * tipc_disc_remove_dest - decrement set of discovered nodes
- * @req: ptr to link request structure
- */
-void tipc_disc_remove_dest(struct tipc_link_req *req)
-{
-       spin_lock_bh(&req->lock);
-       req->num_nodes--;
-       disc_update(req);
-       spin_unlock_bh(&req->lock);
+       spin_lock_bh(&d->lock);
+       d->num_nodes--;
+       num = d->num_nodes;
+       intv = d->timer_intv;
+       if (!num && (intv == TIPC_DISC_INACTIVE || intv > TIPC_DISC_FAST))  {
+               d->timer_intv = TIPC_DISC_INIT;
+               mod_timer(&d->timer, jiffies + d->timer_intv);
+       }
+       spin_unlock_bh(&d->lock);
 }
 
-/**
- * disc_timeout - send a periodic link setup request
- * @data: ptr to link request structure
- *
+/* tipc_disc_timeout - send a periodic link setup request
  * Called whenever a link setup request timer associated with a bearer expires.
+ * - Keep doubling time between sent request until limit is reached;
+ * - Hold at fast polling rate if we don't have any associated nodes
+ * - Otherwise hold at slow polling rate
  */
-static void disc_timeout(struct timer_list *t)
+static void tipc_disc_timeout(struct timer_list *t)
 {
-       struct tipc_link_req *req = from_timer(req, t, timer);
-       struct sk_buff *skb;
-       int max_delay;
+       struct tipc_discoverer *d = from_timer(d, t, timer);
+       struct tipc_net *tn = tipc_net(d->net);
+       u32 self = tipc_own_addr(d->net);
+       struct tipc_media_addr maddr;
+       struct sk_buff *skb = NULL;
+       struct net *net = d->net;
+       u32 bearer_id;
 
-       spin_lock_bh(&req->lock);
+       spin_lock_bh(&d->lock);
 
        /* Stop searching if only desired node has been found */
-       if (tipc_node(req->domain) && req->num_nodes) {
-               req->timer_intv = TIPC_LINK_REQ_INACTIVE;
+       if (tipc_node(d->domain) && d->num_nodes) {
+               d->timer_intv = TIPC_DISC_INACTIVE;
                goto exit;
        }
 
-       /*
-        * Send discovery message, then update discovery timer
-        *
-        * Keep doubling time between requests until limit is reached;
-        * hold at fast polling rate if don't have any associated nodes,
-        * otherwise hold at slow polling rate
-        */
-       skb = skb_clone(req->buf, GFP_ATOMIC);
-       if (skb)
-               tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest);
-       req->timer_intv *= 2;
-       if (req->num_nodes)
-               max_delay = TIPC_LINK_REQ_SLOW;
-       else
-               max_delay = TIPC_LINK_REQ_FAST;
-       if (req->timer_intv > max_delay)
-               req->timer_intv = max_delay;
+       /* Did we just leave the address trial period ? */
+       if (!self && !time_before(jiffies, tn->addr_trial_end)) {
+               self = tn->trial_addr;
+               tipc_net_finalize(net, self);
+               msg_set_prevnode(buf_msg(d->skb), self);
+               msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+       }
+
+       /* Adjust timeout interval according to discovery phase */
+       if (time_before(jiffies, tn->addr_trial_end)) {
+               d->timer_intv = TIPC_DISC_INIT;
+       } else {
+               d->timer_intv *= 2;
+               if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW)
+                       d->timer_intv = TIPC_DISC_SLOW;
+               else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
+                       d->timer_intv = TIPC_DISC_FAST;
+       }
 
-       mod_timer(&req->timer, jiffies + req->timer_intv);
+       mod_timer(&d->timer, jiffies + d->timer_intv);
+       memcpy(&maddr, &d->dest, sizeof(maddr));
+       skb = skb_clone(d->skb, GFP_ATOMIC);
+       bearer_id = d->bearer_id;
 exit:
-       spin_unlock_bh(&req->lock);
+       spin_unlock_bh(&d->lock);
+       if (skb)
+               tipc_bearer_xmit_skb(net, bearer_id, skb, &maddr);
 }
 
 /**
@@ -273,41 +339,47 @@ static void disc_timeout(struct timer_list *t)
 int tipc_disc_create(struct net *net, struct tipc_bearer *b,
                     struct tipc_media_addr *dest, struct sk_buff **skb)
 {
-       struct tipc_link_req *req;
+       struct tipc_net *tn = tipc_net(net);
+       struct tipc_discoverer *d;
 
-       req = kmalloc(sizeof(*req), GFP_ATOMIC);
-       if (!req)
+       d = kmalloc(sizeof(*d), GFP_ATOMIC);
+       if (!d)
                return -ENOMEM;
-       req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
-       if (!req->buf) {
-               kfree(req);
+       d->skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
+       if (!d->skb) {
+               kfree(d);
                return -ENOMEM;
        }
+       tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
 
-       tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
-       memcpy(&req->dest, dest, sizeof(*dest));
-       req->net = net;
-       req->bearer_id = b->identity;
-       req->domain = b->domain;
-       req->num_nodes = 0;
-       req->timer_intv = TIPC_LINK_REQ_INIT;
-       spin_lock_init(&req->lock);
-       timer_setup(&req->timer, disc_timeout, 0);
-       mod_timer(&req->timer, jiffies + req->timer_intv);
-       b->link_req = req;
-       *skb = skb_clone(req->buf, GFP_ATOMIC);
+       /* Do we need an address trial period first ? */
+       if (!tipc_own_addr(net)) {
+               tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+               msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG);
+       }
+       memcpy(&d->dest, dest, sizeof(*dest));
+       d->net = net;
+       d->bearer_id = b->identity;
+       d->domain = b->domain;
+       d->num_nodes = 0;
+       d->timer_intv = TIPC_DISC_INIT;
+       spin_lock_init(&d->lock);
+       timer_setup(&d->timer, tipc_disc_timeout, 0);
+       mod_timer(&d->timer, jiffies + d->timer_intv);
+       b->disc = d;
+       *skb = skb_clone(d->skb, GFP_ATOMIC);
        return 0;
 }
 
 /**
  * tipc_disc_delete - destroy object sending periodic link setup requests
- * @req: ptr to link request structure
+ * @d: ptr to link duest structure
  */
-void tipc_disc_delete(struct tipc_link_req *req)
+void tipc_disc_delete(struct tipc_discoverer *d)
 {
-       del_timer_sync(&req->timer);
-       kfree_skb(req->buf);
-       kfree(req);
+       del_timer_sync(&d->timer);
+       kfree_skb(d->skb);
+       kfree(d);
 }
 
 /**
@@ -318,19 +390,21 @@ void tipc_disc_delete(struct tipc_link_req *req)
  */
 void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
 {
-       struct tipc_link_req *req = b->link_req;
+       struct tipc_discoverer *d = b->disc;
+       struct tipc_media_addr maddr;
        struct sk_buff *skb;
 
-       spin_lock_bh(&req->lock);
-       tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
-       req->net = net;
-       req->bearer_id = b->identity;
-       req->domain = b->domain;
-       req->num_nodes = 0;
-       req->timer_intv = TIPC_LINK_REQ_INIT;
-       mod_timer(&req->timer, jiffies + req->timer_intv);
-       skb = skb_clone(req->buf, GFP_ATOMIC);
+       spin_lock_bh(&d->lock);
+       tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
+       d->net = net;
+       d->bearer_id = b->identity;
+       d->domain = b->domain;
+       d->num_nodes = 0;
+       d->timer_intv = TIPC_DISC_INIT;
+       memcpy(&maddr, &d->dest, sizeof(maddr));
+       mod_timer(&d->timer, jiffies + d->timer_intv);
+       skb = skb_clone(d->skb, GFP_ATOMIC);
+       spin_unlock_bh(&d->lock);
        if (skb)
-               tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
-       spin_unlock_bh(&req->lock);
+               tipc_bearer_xmit_skb(net, b->identity, skb, &maddr);
 }
index b80a335389c0e918ed98d559ae239b3f96d8e8c4..521d96c41dfd0552778023c047391ffe98bed924 100644 (file)
 #ifndef _TIPC_DISCOVER_H
 #define _TIPC_DISCOVER_H
 
-struct tipc_link_req;
+struct tipc_discoverer;
 
 int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
                     struct tipc_media_addr *dest, struct sk_buff **skb);
-void tipc_disc_delete(struct tipc_link_req *req);
+void tipc_disc_delete(struct tipc_discoverer *req);
 void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr);
-void tipc_disc_add_dest(struct tipc_link_req *req);
-void tipc_disc_remove_dest(struct tipc_link_req *req);
+void tipc_disc_add_dest(struct tipc_discoverer *req);
+void tipc_disc_remove_dest(struct tipc_discoverer *req);
 void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
                   struct tipc_bearer *b_ptr);
 
index 03086ccb77469f183147599d12249e3c6cd4a3a8..d7a7befeddd42c907bdf88a37e0b77515b3e8705 100644 (file)
@@ -189,6 +189,7 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid,
        grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
        grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
        grp->open = group_is_open;
+       *grp->open = false;
        filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE;
        if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0,
                                    filter, &grp->subid))
index 3c230466804d69a16cb6a168102d5a397db3d6b0..1289b4ba404fdabbd26d785c82aebf677ba21f15 100644 (file)
@@ -434,14 +434,16 @@ char *tipc_link_name(struct tipc_link *l)
  */
 bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
                      int tolerance, char net_plane, u32 mtu, int priority,
-                     int window, u32 session, u32 ownnode, u32 peer,
-                     u16 peer_caps,
+                     int window, u32 session, u32 self,
+                     u32 peer, u8 *peer_id, u16 peer_caps,
                      struct tipc_link *bc_sndlink,
                      struct tipc_link *bc_rcvlink,
                      struct sk_buff_head *inputq,
                      struct sk_buff_head *namedq,
                      struct tipc_link **link)
 {
+       char peer_str[NODE_ID_STR_LEN] = {0,};
+       char self_str[NODE_ID_STR_LEN] = {0,};
        struct tipc_link *l;
 
        l = kzalloc(sizeof(*l), GFP_ATOMIC);
@@ -450,10 +452,18 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
        *link = l;
        l->session = session;
 
-       /* Note: peer i/f name is completed by reset/activate message */
-       sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
-               tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
-               if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
+       /* Set link name for unicast links only */
+       if (peer_id) {
+               tipc_nodeid2string(self_str, tipc_own_id(net));
+               if (strlen(self_str) > 16)
+                       sprintf(self_str, "%x", self);
+               tipc_nodeid2string(peer_str, peer_id);
+               if (strlen(peer_str) > 16)
+                       sprintf(peer_str, "%x", peer);
+       }
+       /* Peer i/f name will be completed by reset/activate message */
+       sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str);
+
        strcpy(l->if_name, if_name);
        l->addr = peer;
        l->peer_caps = peer_caps;
@@ -501,7 +511,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
        struct tipc_link *l;
 
        if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window,
-                             0, ownnode, peer, peer_caps, bc_sndlink,
+                             0, ownnode, peer, NULL, peer_caps, bc_sndlink,
                              NULL, inputq, namedq, link))
                return false;
 
@@ -1938,11 +1948,11 @@ static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s)
 int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
                       struct tipc_link *link, int nlflags)
 {
-       int err;
-       void *hdr;
+       u32 self = tipc_own_addr(net);
        struct nlattr *attrs;
        struct nlattr *prop;
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       void *hdr;
+       int err;
 
        hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
                          nlflags, TIPC_NL_LINK_GET);
@@ -1955,8 +1965,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
 
        if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name))
                goto attr_msg_full;
-       if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST,
-                       tipc_cluster_mask(tn->own_addr)))
+       if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self)))
                goto attr_msg_full;
        if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu))
                goto attr_msg_full;
index d1bd1787a768306e84976c362aada4af99068e3a..ec59348a81e8b7a5311bb9ff25ab778ea91b0354 100644 (file)
@@ -73,8 +73,8 @@ enum {
 
 bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
                      int tolerance, char net_plane, u32 mtu, int priority,
-                     int window, u32 session, u32 ownnode, u32 peer,
-                     u16 peer_caps,
+                     int window, u32 session, u32 ownnode,
+                     u32 peer, u8 *peer_id, u16 peer_caps,
                      struct tipc_link *bc_sndlink,
                      struct tipc_link *bc_rcvlink,
                      struct sk_buff_head *inputq,
index 4e1c6f6450bb96d9f3c34f2ba771d740c4f98c98..b6c45dccba3d2e7a3301167a90eecbf36d8ac07b 100644 (file)
@@ -580,7 +580,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
        msg = buf_msg(skb);
        if (msg_reroute_cnt(msg))
                return false;
-       dnode = addr_domain(net, msg_lookup_scope(msg));
+       dnode = tipc_scope2node(net, msg_lookup_scope(msg));
        dport = tipc_nametbl_translate(net, msg_nametype(msg),
                                       msg_nameinst(msg), &dnode);
        if (!dport)
index b4ba1b4f9ae7fa51aa7c58db8456c1691e2a0839..a4e944d593942042ce1b506d42a4c585e8aafc62 100644 (file)
@@ -550,6 +550,8 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
  */
 #define DSC_REQ_MSG            0
 #define DSC_RESP_MSG           1
+#define DSC_TRIAL_MSG          2
+#define DSC_TRIAL_FAIL_MSG     3
 
 /*
  * Group protocol message types
@@ -627,7 +629,6 @@ static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n)
        msg_set_bits(m, 2, 0, 0xffff, n);
 }
 
-
 /*
  * Word 4
  */
@@ -925,6 +926,26 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
        return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
 }
 
+static inline u32 msg_sugg_node_addr(struct tipc_msg *m)
+{
+       return msg_word(m, 14);
+}
+
+static inline void msg_set_sugg_node_addr(struct tipc_msg *m, u32 n)
+{
+       msg_set_word(m, 14, n);
+}
+
+static inline void msg_set_node_id(struct tipc_msg *hdr, u8 *id)
+{
+       memcpy(msg_data(hdr), id, 16);
+}
+
+static inline u8 *msg_node_id(struct tipc_msg *hdr)
+{
+       return (u8 *)msg_data(hdr);
+}
+
 struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
 bool tipc_msg_validate(struct sk_buff **_skb);
 bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
index 23f8899e0f8c3d7bda50bc57801bcc63ceab448c..8240a85b0d0c00d0c0174dfd1f14228eabc4da40 100644 (file)
@@ -56,7 +56,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
        i->type = htonl(p->type);
        i->lower = htonl(p->lower);
        i->upper = htonl(p->upper);
-       i->ref = htonl(p->ref);
+       i->port = htonl(p->port);
        i->key = htonl(p->key);
 }
 
@@ -68,14 +68,14 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
                                         u32 dest)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
        struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
+       u32 self = tipc_own_addr(net);
        struct tipc_msg *msg;
 
        if (buf != NULL) {
                msg = buf_msg(buf);
-               tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type,
-                             INT_H_SIZE, dest);
+               tipc_msg_init(self, msg, NAME_DISTRIBUTOR,
+                             type, INT_H_SIZE, dest);
                msg_set_size(msg, INT_H_SIZE + size);
        }
        return buf;
@@ -86,25 +86,25 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
  */
 struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-       struct sk_buff *buf;
+       struct name_table *nt = tipc_name_table(net);
        struct distr_item *item;
+       struct sk_buff *skb;
 
-       list_add_tail_rcu(&publ->local_list,
-                         &tn->nametbl->publ_list[publ->scope]);
-
-       if (publ->scope == TIPC_NODE_SCOPE)
+       if (publ->scope == TIPC_NODE_SCOPE) {
+               list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
                return NULL;
+       }
+       list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope);
 
-       buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
-       if (!buf) {
+       skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
+       if (!skb) {
                pr_warn("Publication distribution failure\n");
                return NULL;
        }
 
-       item = (struct distr_item *)msg_data(buf_msg(buf));
+       item = (struct distr_item *)msg_data(buf_msg(skb));
        publ_to_item(item, publ);
-       return buf;
+       return skb;
 }
 
 /**
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
        struct sk_buff *buf;
        struct distr_item *item;
 
-       list_del(&publ->local_list);
+       list_del(&publ->binding_node);
 
        if (publ->scope == TIPC_NODE_SCOPE)
                return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
                        ITEM_SIZE) * ITEM_SIZE;
        u32 msg_rem = msg_dsz;
 
-       list_for_each_entry(publ, pls, local_list) {
+       list_for_each_entry(publ, pls, binding_node) {
                /* Prepare next buffer: */
                if (!skb) {
                        skb = named_prepare_buf(net, PUBLICATION, msg_rem,
@@ -184,16 +184,13 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
  */
 void tipc_named_node_up(struct net *net, u32 dnode)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct name_table *nt = tipc_name_table(net);
        struct sk_buff_head head;
 
        __skb_queue_head_init(&head);
 
        rcu_read_lock();
-       named_distribute(net, &head, dnode,
-                        &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
-       named_distribute(net, &head, dnode,
-                        &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
+       named_distribute(net, &head, dnode, &nt->cluster_scope);
        rcu_read_unlock();
 
        tipc_node_xmit(net, &head, dnode, 0);
@@ -212,15 +209,15 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
 
        spin_lock_bh(&tn->nametbl_lock);
        p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
-                                    publ->node, publ->ref, publ->key);
+                                    publ->node, publ->port, publ->key);
        if (p)
-               tipc_node_unsubscribe(net, &p->nodesub_list, addr);
+               tipc_node_unsubscribe(net, &p->binding_node, addr);
        spin_unlock_bh(&tn->nametbl_lock);
 
        if (p != publ) {
                pr_err("Unable to remove publication from failed node\n"
-                      " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
-                      publ->type, publ->lower, publ->node, publ->ref,
+                      " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
+                      publ->type, publ->lower, publ->node, publ->port,
                       publ->key);
        }
 
@@ -249,7 +246,7 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
 {
        struct publication *publ, *tmp;
 
-       list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list)
+       list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
                tipc_publ_purge(net, publ, addr);
        tipc_dist_queue_purge(net, addr);
 }
@@ -271,18 +268,18 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
                                                ntohl(i->lower),
                                                ntohl(i->upper),
                                                TIPC_CLUSTER_SCOPE, node,
-                                               ntohl(i->ref), ntohl(i->key));
+                                               ntohl(i->port), ntohl(i->key));
                if (publ) {
-                       tipc_node_subscribe(net, &publ->nodesub_list, node);
+                       tipc_node_subscribe(net, &publ->binding_node, node);
                        return true;
                }
        } else if (dtype == WITHDRAWAL) {
                publ = tipc_nametbl_remove_publ(net, ntohl(i->type),
                                                ntohl(i->lower),
-                                               node, ntohl(i->ref),
+                                               node, ntohl(i->port),
                                                ntohl(i->key));
                if (publ) {
-                       tipc_node_unsubscribe(net, &publ->nodesub_list, node);
+                       tipc_node_unsubscribe(net, &publ->binding_node, node);
                        kfree_rcu(publ, rcu);
                        return true;
                }
@@ -321,7 +318,6 @@ void tipc_named_process_backlog(struct net *net)
 {
        struct distr_queue_item *e, *tmp;
        struct tipc_net *tn = net_generic(net, tipc_net_id);
-       char addr[16];
        unsigned long now = get_jiffies_64();
 
        list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) {
@@ -329,12 +325,11 @@ void tipc_named_process_backlog(struct net *net)
                        if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype))
                                continue;
                } else {
-                       tipc_addr_string_fill(addr, e->node);
-                       pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %s key=%u\n",
+                       pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %x key=%u\n",
                                            e->dtype, ntohl(e->i.type),
                                            ntohl(e->i.lower),
                                            ntohl(e->i.upper),
-                                           addr, ntohl(e->i.key));
+                                           e->node, ntohl(e->i.key));
                }
                list_del(&e->next);
                kfree(e);
@@ -382,16 +377,17 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
  */
 void tipc_named_reinit(struct net *net)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct name_table *nt = tipc_name_table(net);
+       struct tipc_net *tn = tipc_net(net);
        struct publication *publ;
-       int scope;
+       u32 self = tipc_own_addr(net);
 
        spin_lock_bh(&tn->nametbl_lock);
 
-       for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
-               list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope],
-                                       local_list)
-                       publ->node = tn->own_addr;
+       list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
+               publ->node = self;
+       list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
+               publ->node = self;
 
        spin_unlock_bh(&tn->nametbl_lock);
 }
index 1264ba0af93758befcccc918b587fa4e9591c2c7..4753e628d7c4a0bbdff917f0c3ec5b07d66b9a48 100644 (file)
@@ -63,7 +63,7 @@ struct distr_item {
        __be32 type;
        __be32 lower;
        __be32 upper;
-       __be32 ref;
+       __be32 port;
        __be32 key;
 };
 
index e01c9c691ba2e115d77e906954f8011eed936c36..4359605b1bec4813a2a4ae96b2212cb86722c135 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.c: TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
  * All rights reserved.
  *
 
 /**
  * struct name_info - name sequence publication info
- * @node_list: circular list of publications made by own node
- * @cluster_list: circular list of publications made by own cluster
- * @zone_list: circular list of publications made by own zone
- * @node_list_size: number of entries in "node_list"
- * @cluster_list_size: number of entries in "cluster_list"
- * @zone_list_size: number of entries in "zone_list"
- *
- * Note: The zone list always contains at least one entry, since all
- *       publications of the associated name sequence belong to it.
- *       (The cluster and node lists may be empty.)
+ * @node_list: list of publications on own node of this <type,lower,upper>
+ * @all_publ: list of all publications of this <type,lower,upper>
  */
 struct name_info {
-       struct list_head node_list;
-       struct list_head cluster_list;
-       struct list_head zone_list;
-       u32 node_list_size;
-       u32 cluster_list_size;
-       u32 zone_list_size;
+       struct list_head local_publ;
+       struct list_head all_publ;
 };
 
 /**
@@ -114,7 +102,7 @@ static int hash(int x)
  * publ_create - create a publication structure
  */
 static struct publication *publ_create(u32 type, u32 lower, u32 upper,
-                                      u32 scope, u32 node, u32 port_ref,
+                                      u32 scope, u32 node, u32 port,
                                       u32 key)
 {
        struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
@@ -128,9 +116,9 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
        publ->upper = upper;
        publ->scope = scope;
        publ->node = node;
-       publ->ref = port_ref;
+       publ->port = port;
        publ->key = key;
-       INIT_LIST_HEAD(&publ->pport_list);
+       INIT_LIST_HEAD(&publ->binding_sock);
        return publ;
 }
 
@@ -249,9 +237,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
                info = sseq->info;
 
                /* Check if an identical publication already exists */
-               list_for_each_entry(publ, &info->zone_list, zone_list) {
-                       if ((publ->ref == port) && (publ->key == key) &&
-                           (!publ->node || (publ->node == node)))
+               list_for_each_entry(publ, &info->all_publ, all_publ) {
+                       if (publ->port == port && publ->key == key &&
+                           (!publ->node || publ->node == node))
                                return NULL;
                }
        } else {
@@ -290,9 +278,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
                        return NULL;
                }
 
-               INIT_LIST_HEAD(&info->node_list);
-               INIT_LIST_HEAD(&info->cluster_list);
-               INIT_LIST_HEAD(&info->zone_list);
+               INIT_LIST_HEAD(&info->local_publ);
+               INIT_LIST_HEAD(&info->all_publ);
 
                /* Insert new sub-sequence */
                sseq = &nseq->sseqs[inspos];
@@ -311,23 +298,15 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
        if (!publ)
                return NULL;
 
-       list_add(&publ->zone_list, &info->zone_list);
-       info->zone_list_size++;
-
-       if (in_own_cluster(net, node)) {
-               list_add(&publ->cluster_list, &info->cluster_list);
-               info->cluster_list_size++;
-       }
+       list_add(&publ->all_publ, &info->all_publ);
 
-       if (in_own_node(net, node)) {
-               list_add(&publ->node_list, &info->node_list);
-               info->node_list_size++;
-       }
+       if (in_own_node(net, node))
+               list_add(&publ->local_publ, &info->local_publ);
 
        /* Any subscriptions waiting for notification?  */
        list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
                tipc_sub_report_overlap(s, publ->lower, publ->upper,
-                                       TIPC_PUBLISHED, publ->ref,
+                                       TIPC_PUBLISHED, publ->port,
                                        publ->node, publ->scope,
                                        created_subseq);
        }
@@ -348,7 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 static struct publication *tipc_nameseq_remove_publ(struct net *net,
                                                    struct name_seq *nseq,
                                                    u32 inst, u32 node,
-                                                   u32 ref, u32 key)
+                                                   u32 port, u32 key)
 {
        struct publication *publ;
        struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
@@ -363,32 +342,20 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net,
        info = sseq->info;
 
        /* Locate publication, if it exists */
-       list_for_each_entry(publ, &info->zone_list, zone_list) {
-               if ((publ->key == key) && (publ->ref == ref) &&
-                   (!publ->node || (publ->node == node)))
+       list_for_each_entry(publ, &info->all_publ, all_publ) {
+               if (publ->key == key && publ->port == port &&
+                   (!publ->node || publ->node == node))
                        goto found;
        }
        return NULL;
 
 found:
-       /* Remove publication from zone scope list */
-       list_del(&publ->zone_list);
-       info->zone_list_size--;
-
-       /* Remove publication from cluster scope list, if present */
-       if (in_own_cluster(net, node)) {
-               list_del(&publ->cluster_list);
-               info->cluster_list_size--;
-       }
-
-       /* Remove publication from node scope list, if present */
-       if (in_own_node(net, node)) {
-               list_del(&publ->node_list);
-               info->node_list_size--;
-       }
+       list_del(&publ->all_publ);
+       if (in_own_node(net, node))
+               list_del(&publ->local_publ);
 
        /* Contract subseq list if no more publications for that subseq */
-       if (list_empty(&info->zone_list)) {
+       if (list_empty(&info->all_publ)) {
                kfree(info);
                free = &nseq->sseqs[nseq->first_free--];
                memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
@@ -398,8 +365,9 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net,
        /* Notify any waiting subscriptions */
        list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
                tipc_sub_report_overlap(s, publ->lower, publ->upper,
-                                       TIPC_WITHDRAWN, publ->ref, publ->node,
-                                       publ->scope, removed_subseq);
+                                       TIPC_WITHDRAWN, publ->port,
+                                       publ->node, publ->scope,
+                                       removed_subseq);
        }
 
        return publ;
@@ -435,11 +403,12 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
                        struct name_info *info = sseq->info;
                        int must_report = 1;
 
-                       list_for_each_entry(crs, &info->zone_list, zone_list) {
+                       list_for_each_entry(crs, &info->all_publ, all_publ) {
                                tipc_sub_report_overlap(sub, sseq->lower,
                                                        sseq->upper,
                                                        TIPC_PUBLISHED,
-                                                       crs->ref, crs->node,
+                                                       crs->port,
+                                                       crs->node,
                                                        crs->scope,
                                                        must_report);
                                must_report = 0;
@@ -473,8 +442,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
        struct name_seq *seq = nametbl_find_seq(net, type);
        int index = hash(type);
 
-       if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
-           (lower > upper)) {
+       if (scope > TIPC_NODE_SCOPE || lower > upper) {
                pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n",
                         type, lower, upper, scope);
                return NULL;
@@ -493,7 +461,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 }
 
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-                                            u32 lower, u32 node, u32 ref,
+                                            u32 lower, u32 node, u32 port,
                                             u32 key)
 {
        struct publication *publ;
@@ -503,7 +471,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
                return NULL;
 
        spin_lock_bh(&seq->lock);
-       publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key);
+       publ = tipc_nameseq_remove_publ(net, seq, lower, node, port, key);
        if (!seq->first_free && list_empty(&seq->subscriptions)) {
                hlist_del_init_rcu(&seq->ns_list);
                kfree(seq->sseqs);
@@ -531,15 +499,17 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
                           u32 *destnode)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct tipc_net *tn = tipc_net(net);
+       bool legacy = tn->legacy_addr_format;
+       u32 self = tipc_own_addr(net);
        struct sub_seq *sseq;
        struct name_info *info;
        struct publication *publ;
        struct name_seq *seq;
-       u32 ref = 0;
+       u32 port = 0;
        u32 node = 0;
 
-       if (!tipc_in_scope(*destnode, tn->own_addr))
+       if (!tipc_in_scope(legacy, *destnode, self))
                return 0;
 
        rcu_read_lock();
@@ -553,55 +523,43 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
        info = sseq->info;
 
        /* Closest-First Algorithm */
-       if (likely(!*destnode)) {
-               if (!list_empty(&info->node_list)) {
-                       publ = list_first_entry(&info->node_list,
+       if (legacy && !*destnode) {
+               if (!list_empty(&info->local_publ)) {
+                       publ = list_first_entry(&info->local_publ,
                                                struct publication,
-                                               node_list);
-                       list_move_tail(&publ->node_list,
-                                      &info->node_list);
-               } else if (!list_empty(&info->cluster_list)) {
-                       publ = list_first_entry(&info->cluster_list,
-                                               struct publication,
-                                               cluster_list);
-                       list_move_tail(&publ->cluster_list,
-                                      &info->cluster_list);
+                                               local_publ);
+                       list_move_tail(&publ->local_publ,
+                                      &info->local_publ);
                } else {
-                       publ = list_first_entry(&info->zone_list,
+                       publ = list_first_entry(&info->all_publ,
                                                struct publication,
-                                               zone_list);
-                       list_move_tail(&publ->zone_list,
-                                      &info->zone_list);
+                                               all_publ);
+                       list_move_tail(&publ->all_publ,
+                                      &info->all_publ);
                }
        }
 
        /* Round-Robin Algorithm */
-       else if (*destnode == tn->own_addr) {
-               if (list_empty(&info->node_list))
-                       goto no_match;
-               publ = list_first_entry(&info->node_list, struct publication,
-                                       node_list);
-               list_move_tail(&publ->node_list, &info->node_list);
-       } else if (in_own_cluster_exact(net, *destnode)) {
-               if (list_empty(&info->cluster_list))
+       else if (*destnode == tipc_own_addr(net)) {
+               if (list_empty(&info->local_publ))
                        goto no_match;
-               publ = list_first_entry(&info->cluster_list, struct publication,
-                                       cluster_list);
-               list_move_tail(&publ->cluster_list, &info->cluster_list);
+               publ = list_first_entry(&info->local_publ, struct publication,
+                                       local_publ);
+               list_move_tail(&publ->local_publ, &info->local_publ);
        } else {
-               publ = list_first_entry(&info->zone_list, struct publication,
-                                       zone_list);
-               list_move_tail(&publ->zone_list, &info->zone_list);
+               publ = list_first_entry(&info->all_publ, struct publication,
+                                       all_publ);
+               list_move_tail(&publ->all_publ, &info->all_publ);
        }
 
-       ref = publ->ref;
+       port = publ->port;
        node = publ->node;
 no_match:
        spin_unlock_bh(&seq->lock);
 not_found:
        rcu_read_unlock();
        *destnode = node;
-       return ref;
+       return port;
 }
 
 bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
@@ -623,16 +581,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
        sseq = nameseq_find_subseq(seq, instance);
        if (likely(sseq)) {
                info = sseq->info;
-               list_for_each_entry(publ, &info->zone_list, zone_list) {
+               list_for_each_entry(publ, &info->all_publ, all_publ) {
                        if (publ->scope != scope)
                                continue;
-                       if (publ->ref == exclude && publ->node == self)
+                       if (publ->port == exclude && publ->node == self)
                                continue;
-                       tipc_dest_push(dsts, publ->node, publ->ref);
+                       tipc_dest_push(dsts, publ->node, publ->port);
                        (*dstcnt)++;
                        if (all)
                                continue;
-                       list_move_tail(&publ->zone_list, &info->zone_list);
+                       list_move_tail(&publ->all_publ, &info->all_publ);
                        break;
                }
        }
@@ -642,15 +600,14 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
        return !list_empty(dsts);
 }
 
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-                          u32 scope, bool exact, struct list_head *dports)
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+                           u32 scope, bool exact, struct list_head *dports)
 {
        struct sub_seq *sseq_stop;
        struct name_info *info;
        struct publication *p;
        struct name_seq *seq;
        struct sub_seq *sseq;
-       int res = 0;
 
        rcu_read_lock();
        seq = nametbl_find_seq(net, type);
@@ -664,18 +621,14 @@ int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
                if (sseq->lower > upper)
                        break;
                info = sseq->info;
-               list_for_each_entry(p, &info->node_list, node_list) {
+               list_for_each_entry(p, &info->local_publ, local_publ) {
                        if (p->scope == scope || (!exact && p->scope < scope))
-                               tipc_dest_push(dports, 0, p->ref);
+                               tipc_dest_push(dports, 0, p->port);
                }
-
-               if (info->cluster_list_size != info->node_list_size)
-                       res = 1;
        }
        spin_unlock_bh(&seq->lock);
 exit:
        rcu_read_unlock();
-       return res;
 }
 
 /* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
@@ -700,7 +653,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
        stop = seq->sseqs + seq->first_free;
        for (; sseq != stop && sseq->lower <= upper; sseq++) {
                info = sseq->info;
-               list_for_each_entry(publ, &info->zone_list, zone_list) {
+               list_for_each_entry(publ, &info->all_publ, all_publ) {
                        tipc_nlist_add(nodes, publ->node);
                }
        }
@@ -729,10 +682,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
        stop = seq->sseqs + seq->first_free;
        for (; sseq != stop; sseq++) {
                info = sseq->info;
-               list_for_each_entry(p, &info->zone_list, zone_list) {
+               list_for_each_entry(p, &info->all_publ, all_publ) {
                        if (p->scope != scope)
                                continue;
-                       tipc_group_add_member(grp, p->node, p->ref, p->lower);
+                       tipc_group_add_member(grp, p->node, p->port, p->lower);
                }
        }
        spin_unlock_bh(&seq->lock);
@@ -760,7 +713,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
        }
 
        publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
-                                       tn->own_addr, port_ref, key);
+                                       tipc_own_addr(net), port_ref, key);
        if (likely(publ)) {
                tn->nametbl->local_publ_count++;
                buf = tipc_named_publish(net, publ);
@@ -777,7 +730,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 /**
  * tipc_nametbl_withdraw - withdraw name publication from network name tables
  */
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port,
                          u32 key)
 {
        struct publication *publ;
@@ -785,19 +738,19 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
        struct tipc_net *tn = net_generic(net, tipc_net_id);
 
        spin_lock_bh(&tn->nametbl_lock);
-       publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr,
-                                       ref, key);
+       publ = tipc_nametbl_remove_publ(net, type, lower, tipc_own_addr(net),
+                                       port, key);
        if (likely(publ)) {
                tn->nametbl->local_publ_count--;
                skb = tipc_named_withdraw(net, publ);
                /* Any pending external events? */
                tipc_named_process_backlog(net);
-               list_del_init(&publ->pport_list);
+               list_del_init(&publ->binding_sock);
                kfree_rcu(publ, rcu);
        } else {
                pr_err("Unable to remove local publication\n"
-                      "(type=%u, lower=%u, ref=%u, key=%u)\n",
-                      type, lower, ref, key);
+                      "(type=%u, lower=%u, port=%u, key=%u)\n",
+                      type, lower, port, key);
        }
        spin_unlock_bh(&tn->nametbl_lock);
 
@@ -879,9 +832,8 @@ int tipc_nametbl_init(struct net *net)
        for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
                INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]);
 
-       INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
-       INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
-       INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]);
+       INIT_LIST_HEAD(&tipc_nametbl->node_scope);
+       INIT_LIST_HEAD(&tipc_nametbl->cluster_scope);
        tn->nametbl = tipc_nametbl;
        spin_lock_init(&tn->nametbl_lock);
        return 0;
@@ -901,9 +853,9 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq)
        spin_lock_bh(&seq->lock);
        sseq = seq->sseqs;
        info = sseq->info;
-       list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
+       list_for_each_entry_safe(publ, safe, &info->all_publ, all_publ) {
                tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node,
-                                        publ->ref, publ->key);
+                                        publ->port, publ->key);
                kfree_rcu(publ, rcu);
        }
        hlist_del_init_rcu(&seq->ns_list);
@@ -950,17 +902,17 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
        struct publication *p;
 
        if (*last_publ) {
-               list_for_each_entry(p, &sseq->info->zone_list, zone_list)
+               list_for_each_entry(p, &sseq->info->all_publ, all_publ)
                        if (p->key == *last_publ)
                                break;
                if (p->key != *last_publ)
                        return -EPIPE;
        } else {
-               p = list_first_entry(&sseq->info->zone_list, struct publication,
-                                    zone_list);
+               p = list_first_entry(&sseq->info->all_publ, struct publication,
+                                    all_publ);
        }
 
-       list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) {
+       list_for_each_entry_from(p, &sseq->info->all_publ, all_publ) {
                *last_publ = p->key;
 
                hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
@@ -987,7 +939,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
                        goto publ_msg_full;
                if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node))
                        goto publ_msg_full;
-               if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref))
+               if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port))
                        goto publ_msg_full;
                if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
                        goto publ_msg_full;
index 17652602d5e2df92f22b1184e4f9d0e3d7d68bd0..34a4ccb907aa2c9174bc0ce60ac751d09f38d705 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.h: Include file for TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -54,19 +54,22 @@ struct tipc_group;
  * @type: name sequence type
  * @lower: name sequence lower bound
  * @upper: name sequence upper bound
- * @scope: scope of publication
- * @node: network address of publishing port's node
- * @ref: publishing port
- * @key: publication key
- * @nodesub_list: subscription to "node down" event (off-node publication only)
- * @local_list: adjacent entries in list of publications made by this node
- * @pport_list: adjacent entries in list of publications made by this port
- * @node_list: adjacent matching name seq publications with >= node scope
- * @cluster_list: adjacent matching name seq publications with >= cluster scope
- * @zone_list: adjacent matching name seq publications with >= zone scope
+ * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
+ * @node: network address of publishing socket's node
+ * @port: publishing port
+ * @key: publication key, unique across the cluster
+ * @binding_node: all publications from the same node which bound this one
+ * - Remote publications: in node->publ_list
+ *   Used by node/name distr to withdraw publications when node is lost
+ * - Local/node scope publications: in name_table->node_scope list
+ * - Local/cluster scope publications: in name_table->cluster_scope list
+ * @binding_sock: all publications from the same socket which bound this one
+ *   Used by socket to withdraw publications when socket is unbound/released
+ * @local_publ: list of identical publications made from this node
+ *   Used by closest_first and multicast receive lookup algorithms
+ * @all_publ: all publications identical to this one, whatever node and scope
+ *   Used by round-robin lookup algorithm
  * @rcu: RCU callback head used for deferred freeing
- *
- * Note that the node list, cluster list, and zone list are circular lists.
  */
 struct publication {
        u32 type;
@@ -74,34 +77,37 @@ struct publication {
        u32 upper;
        u32 scope;
        u32 node;
-       u32 ref;
+       u32 port;
        u32 key;
-       struct list_head nodesub_list;
-       struct list_head local_list;
-       struct list_head pport_list;
-       struct list_head node_list;
-       struct list_head cluster_list;
-       struct list_head zone_list;
+       struct list_head binding_node;
+       struct list_head binding_sock;
+       struct list_head local_publ;
+       struct list_head all_publ;
        struct rcu_head rcu;
 };
 
 /**
  * struct name_table - table containing all existing port name publications
  * @seq_hlist: name sequence hash lists
- * @publ_list: pulication lists
+ * @node_scope: all local publications with node scope
+ *               - used by name_distr during re-init of name table
+ * @cluster_scope: all local publications with cluster scope
+ *               - used by name_distr to send bulk updates to new nodes
+ *               - used by name_distr during re-init of name table
  * @local_publ_count: number of publications issued by this node
  */
 struct name_table {
        struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE];
-       struct list_head publ_list[TIPC_PUBL_SCOPE_NUM];
+       struct list_head node_scope;
+       struct list_head cluster_scope;
        u32 local_publ_count;
 };
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-                          u32 scope, bool exact, struct list_head *dports);
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+                           u32 scope, bool exact, struct list_head *dports);
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
                              u32 type, u32 domain);
 void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
index 1a2fde0d6f61398f5552750d782eded94668f56f..29538dc0085762265c8cec10d31fc0d5b26ce0ff 100644 (file)
  *     - A local spin_lock protecting the queue of subscriber events.
 */
 
-int tipc_net_start(struct net *net, u32 addr)
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-       char addr_string[16];
+       if (tipc_own_id(net)) {
+               pr_info("Cannot configure node identity twice\n");
+               return -1;
+       }
+       pr_info("Started in network mode\n");
 
-       tn->own_addr = addr;
+       if (node_id)
+               tipc_set_node_id(net, node_id);
+       if (addr)
+               tipc_net_finalize(net, addr);
+       return 0;
+}
 
-       /* Ensure that the new address is visible before we reinit. */
+void tipc_net_finalize(struct net *net, u32 addr)
+{
+       tipc_set_node_addr(net, addr);
        smp_mb();
-
        tipc_named_reinit(net);
        tipc_sk_reinit(net);
-
-       tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
-                            TIPC_ZONE_SCOPE, 0, tn->own_addr);
-
-       pr_info("Started in network mode\n");
-       pr_info("Own node address %s, network identity %u\n",
-               tipc_addr_string_fill(addr_string, tn->own_addr),
-               tn->net_id);
-       return 0;
+       tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+                            TIPC_CLUSTER_SCOPE, 0, addr);
 }
 
 void tipc_net_stop(struct net *net)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       u32 self = tipc_own_addr(net);
 
-       if (!tn->own_addr)
+       if (!self)
                return;
 
-       tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0,
-                             tn->own_addr);
+       tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, 0, self);
        rtnl_lock();
        tipc_bearer_stop(net);
        tipc_node_stop(net);
@@ -147,8 +148,10 @@ void tipc_net_stop(struct net *net)
 static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
 {
        struct tipc_net *tn = net_generic(net, tipc_net_id);
-       void *hdr;
+       u64 *w0 = (u64 *)&tn->node_id[0];
+       u64 *w1 = (u64 *)&tn->node_id[8];
        struct nlattr *attrs;
+       void *hdr;
 
        hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
                          NLM_F_MULTI, TIPC_NL_NET_GET);
@@ -161,7 +164,10 @@ static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
 
        if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id))
                goto attr_msg_full;
-
+       if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID, *w0, 0))
+               goto attr_msg_full;
+       if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID_W1, *w1, 0))
+               goto attr_msg_full;
        nla_nest_end(msg->skb, attrs);
        genlmsg_end(msg->skb, hdr);
 
@@ -202,9 +208,9 @@ int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 {
-       struct net *net = sock_net(skb->sk);
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
        struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+       struct net *net = sock_net(skb->sk);
+       struct tipc_net *tn = tipc_net(net);
        int err;
 
        if (!info->attrs[TIPC_NLA_NET])
@@ -213,16 +219,17 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
        err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
                               info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
                               info->extack);
+
        if (err)
                return err;
 
+       /* Can't change net id once TIPC has joined a network */
+       if (tipc_own_addr(net))
+               return -EPERM;
+
        if (attrs[TIPC_NLA_NET_ID]) {
                u32 val;
 
-               /* Can't change net id once TIPC has joined a network */
-               if (tn->own_addr)
-                       return -EPERM;
-
                val = nla_get_u32(attrs[TIPC_NLA_NET_ID]);
                if (val < 1 || val > 9999)
                        return -EINVAL;
@@ -233,17 +240,22 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
        if (attrs[TIPC_NLA_NET_ADDR]) {
                u32 addr;
 
-               /* Can't change net addr once TIPC has joined a network */
-               if (tn->own_addr)
-                       return -EPERM;
-
                addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
-               if (!tipc_addr_node_valid(addr))
+               if (!addr)
                        return -EINVAL;
-
-               tipc_net_start(net, addr);
+               tn->legacy_addr_format = true;
+               tipc_net_init(net, NULL, addr);
        }
 
+       if (attrs[TIPC_NLA_NET_NODEID]) {
+               u8 node_id[NODE_ID_LEN];
+               u64 *w0 = (u64 *)&node_id[0];
+               u64 *w1 = (u64 *)&node_id[8];
+
+               *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
+               *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
+               tipc_net_init(net, node_id, 0);
+       }
        return 0;
 }
 
index c0306aa2374b7c1c845bc6b8182740ae26eea9e4..09ad02b50bb1ba1e9798c41810c843d74b5e0185 100644 (file)
 
 extern const struct nla_policy tipc_nl_net_policy[];
 
-int tipc_net_start(struct net *net, u32 addr);
-
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
+void tipc_net_finalize(struct net *net, u32 addr);
 void tipc_net_stop(struct net *net);
-
 int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
 int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
index 389193d7cf672ab04accb270b888eacf0b888158..4a95c8c155c6a5e48ca56c18909c036d37327af7 100644 (file)
@@ -115,6 +115,7 @@ struct tipc_node {
        u16 capabilities;
        u32 signature;
        u32 link_id;
+       u8 peer_id[16];
        struct list_head publ_list;
        struct list_head conn_sks;
        unsigned long keepalive_intv;
@@ -156,6 +157,7 @@ static void tipc_node_delete(struct tipc_node *node);
 static void tipc_node_timeout(struct timer_list *t);
 static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
 static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
 static void tipc_node_put(struct tipc_node *node);
 static bool node_is_up(struct tipc_node *n);
 
@@ -233,9 +235,6 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
        struct tipc_node *node;
        unsigned int thash = tipc_hashfn(addr);
 
-       if (unlikely(!in_own_cluster_exact(net, addr)))
-               return NULL;
-
        rcu_read_lock();
        hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
                if (node->addr != addr)
@@ -248,6 +247,30 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
        return node;
 }
 
+/* tipc_node_find_by_id - locate specified node object by its 128-bit id
+ * Note: this function is called only when a discovery request failed
+ * to find the node by its 32-bit id, and is not time critical
+ */
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id)
+{
+       struct tipc_net *tn = tipc_net(net);
+       struct tipc_node *n;
+       bool found = false;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(n, &tn->node_list, list) {
+               read_lock_bh(&n->lock);
+               if (!memcmp(id, n->peer_id, 16) &&
+                   kref_get_unless_zero(&n->kref))
+                       found = true;
+               read_unlock_bh(&n->lock);
+               if (found)
+                       break;
+       }
+       rcu_read_unlock();
+       return found ? n : NULL;
+}
+
 static void tipc_node_read_lock(struct tipc_node *n)
 {
        read_lock_bh(&n->lock);
@@ -310,7 +333,8 @@ static void tipc_node_write_unlock(struct tipc_node *n)
        }
 }
 
-struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
+struct tipc_node *tipc_node_create(struct net *net, u32 addr,
+                                  u8 *peer_id, u16 capabilities)
 {
        struct tipc_net *tn = net_generic(net, tipc_net_id);
        struct tipc_node *n, *temp_node;
@@ -329,6 +353,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
                goto exit;
        }
        n->addr = addr;
+       memcpy(&n->peer_id, peer_id, 16);
        n->net = net;
        n->capabilities = capabilities;
        kref_init(&n->kref);
@@ -347,8 +372,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
        n->signature = INVALID_NODE_SIG;
        n->active_links[0] = INVALID_BEARER_ID;
        n->active_links[1] = INVALID_BEARER_ID;
-       if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr,
-                                U16_MAX,
+       if (!tipc_link_bc_create(net, tipc_own_addr(net),
+                                addr, U16_MAX,
                                 tipc_link_window(tipc_bc_sndlink(net)),
                                 n->capabilities,
                                 &n->bc_entry.inputq1,
@@ -738,8 +763,51 @@ bool tipc_node_is_up(struct net *net, u32 addr)
        return retval;
 }
 
-void tipc_node_check_dest(struct net *net, u32 onode,
-                         struct tipc_bearer *b,
+static u32 tipc_node_suggest_addr(struct net *net, u32 addr)
+{
+       struct tipc_node *n;
+
+       addr ^= tipc_net(net)->random;
+       while ((n = tipc_node_find(net, addr))) {
+               tipc_node_put(n);
+               addr++;
+       }
+       return addr;
+}
+
+/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not
+ */
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
+{
+       struct tipc_net *tn = tipc_net(net);
+       struct tipc_node *n;
+
+       /* Suggest new address if some other peer is using this one */
+       n = tipc_node_find(net, addr);
+       if (n) {
+               if (!memcmp(n->peer_id, id, NODE_ID_LEN))
+                       addr = 0;
+               tipc_node_put(n);
+               if (!addr)
+                       return 0;
+               return tipc_node_suggest_addr(net, addr);
+       }
+
+       /* Suggest previously used address if peer is known */
+       n = tipc_node_find_by_id(net, id);
+       if (n) {
+               addr = n->addr;
+               tipc_node_put(n);
+       }
+       /* Even this node may be in trial phase */
+       if (tn->trial_addr == addr)
+               return tipc_node_suggest_addr(net, addr);
+
+       return addr;
+}
+
+void tipc_node_check_dest(struct net *net, u32 addr,
+                         u8 *peer_id, struct tipc_bearer *b,
                          u16 capabilities, u32 signature,
                          struct tipc_media_addr *maddr,
                          bool *respond, bool *dupl_addr)
@@ -758,7 +826,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
        *dupl_addr = false;
        *respond = false;
 
-       n = tipc_node_create(net, onode, capabilities);
+       n = tipc_node_create(net, addr, peer_id, capabilities);
        if (!n)
                return;
 
@@ -836,15 +904,14 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 
        /* Now create new link if not already existing */
        if (!l) {
-               if (n->link_cnt == 2) {
-                       pr_warn("Cannot establish 3rd link to %x\n", n->addr);
+               if (n->link_cnt == 2)
                        goto exit;
-               }
+
                if_name = strchr(b->name, ':') + 1;
                if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
                                      b->net_plane, b->mtu, b->priority,
                                      b->window, mod(tipc_net(net)->random),
-                                     tipc_own_addr(net), onode,
+                                     tipc_own_addr(net), addr, peer_id,
                                      n->capabilities,
                                      tipc_bc_sndlink(n->net), n->bc_entry.link,
                                      &le->inputq,
@@ -887,11 +954,9 @@ void tipc_node_delete_links(struct net *net, int bearer_id)
 
 static void tipc_node_reset_links(struct tipc_node *n)
 {
-       char addr_string[16];
        int i;
 
-       pr_warn("Resetting all links to %s\n",
-               tipc_addr_string_fill(addr_string, n->addr));
+       pr_warn("Resetting all links to %x\n", n->addr);
 
        for (i = 0; i < MAX_BEARERS; i++) {
                tipc_node_link_down(n, i, false);
@@ -1078,15 +1143,13 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
 static void node_lost_contact(struct tipc_node *n,
                              struct sk_buff_head *inputq)
 {
-       char addr_string[16];
        struct tipc_sock_conn *conn, *safe;
        struct tipc_link *l;
        struct list_head *conns = &n->conn_sks;
        struct sk_buff *skb;
        uint i;
 
-       pr_debug("Lost contact with %s\n",
-                tipc_addr_string_fill(addr_string, n->addr));
+       pr_debug("Lost contact with %x\n", n->addr);
 
        /* Clean up broadcast state */
        tipc_bcast_remove_peer(n->net, n->bc_entry.link);
index 4ce5e3a185c098abffb15732ff02027a4540d5cd..f24b83500df151cfcda9e063523c2b015518449e 100644 (file)
@@ -49,17 +49,19 @@ enum {
        TIPC_BCAST_STATE_NACK = (1 << 2),
        TIPC_BLOCK_FLOWCTL    = (1 << 3),
        TIPC_BCAST_RCAST      = (1 << 4),
-       TIPC_MCAST_GROUPS     = (1 << 5)
+       TIPC_NODE_ID128       = (1 << 5)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
                                TIPC_BCAST_STATE_NACK | \
                                TIPC_BCAST_RCAST | \
-                               TIPC_BLOCK_FLOWCTL)
+                               TIPC_BLOCK_FLOWCTL | \
+                               TIPC_NODE_ID128)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
-void tipc_node_check_dest(struct net *net, u32 onode,
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
+void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
                          struct tipc_bearer *bearer,
                          u16 capabilities, u32 signature,
                          struct tipc_media_addr *maddr,
index f93477187a9081ed79f1bbda19769a1ea8a84833..275b666f6231b97ff812888641cb43abc7f1a9ba 100644 (file)
@@ -289,10 +289,9 @@ static bool tipc_sk_type_connectionless(struct sock *sk)
 static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 {
        struct sock *sk = &tsk->sk;
-       struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
+       u32 self = tipc_own_addr(sock_net(sk));
        u32 peer_port = tsk_peer_port(tsk);
-       u32 orig_node;
-       u32 peer_node;
+       u32 orig_node, peer_node;
 
        if (unlikely(!tipc_sk_connected(sk)))
                return false;
@@ -306,10 +305,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
        if (likely(orig_node == peer_node))
                return true;
 
-       if (!orig_node && (peer_node == tn->own_addr))
+       if (!orig_node && peer_node == self)
                return true;
 
-       if (!peer_node && (orig_node == tn->own_addr))
+       if (!peer_node && orig_node == self)
                return true;
 
        return false;
@@ -461,8 +460,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
        /* Ensure tsk is visible before we read own_addr. */
        smp_mb();
 
-       tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
-                     NAMED_H_SIZE, 0);
+       tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE,
+                     TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
 
        msg_set_origport(msg, tsk->portid);
        timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
@@ -473,6 +472,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
        sk->sk_write_space = tipc_write_space;
        sk->sk_destruct = tipc_sock_destruct;
        tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
+       tsk->group_is_open = true;
        atomic_set(&tsk->dupl_rcvcnt, 0);
 
        /* Start out with safe limits until we receive an advertised window */
@@ -643,7 +643,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
                goto exit;
        }
 
-       res = (addr->scope > 0) ?
+       res = (addr->scope >= 0) ?
                tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
                tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
 exit:
@@ -670,7 +670,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
        struct sock *sk = sock->sk;
        struct tipc_sock *tsk = tipc_sk(sk);
-       struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
 
        memset(addr, 0, sizeof(*addr));
        if (peer) {
@@ -681,7 +680,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
                addr->addr.id.node = tsk_peer_node(tsk);
        } else {
                addr->addr.id.ref = tsk->portid;
-               addr->addr.id.node = tn->own_addr;
+               addr->addr.id.node = tipc_own_addr(sock_net(sk));
        }
 
        addr->addrtype = TIPC_ADDR_ID;
@@ -1279,8 +1278,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
        struct tipc_msg *hdr = &tsk->phdr;
        struct tipc_name_seq *seq;
        struct sk_buff_head pkts;
-       u32 type, inst, domain;
        u32 dnode, dport;
+       u32 type, inst;
        int mtu, rc;
 
        if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
@@ -1331,13 +1330,12 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
        if (dest->addrtype == TIPC_ADDR_NAME) {
                type = dest->addr.name.name.type;
                inst = dest->addr.name.name.instance;
-               domain = dest->addr.name.domain;
-               dnode = domain;
+               dnode = dest->addr.name.domain;
                msg_set_type(hdr, TIPC_NAMED_MSG);
                msg_set_hdr_sz(hdr, NAMED_H_SIZE);
                msg_set_nametype(hdr, type);
                msg_set_nameinst(hdr, inst);
-               msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
+               msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
                dport = tipc_nametbl_translate(net, type, inst, &dnode);
                msg_set_destnode(hdr, dnode);
                msg_set_destport(hdr, dport);
@@ -2122,8 +2120,10 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
                    (!sk_conn && msg_connected(hdr)) ||
                    (!grp && msg_in_group(hdr)))
                        err = TIPC_ERR_NO_PORT;
-               else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
+               else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
+                       atomic_inc(&sk->sk_drops);
                        err = TIPC_ERR_OVERLOAD;
+               }
 
                if (unlikely(err)) {
                        tipc_skb_reject(net, err, skb, xmitq);
@@ -2202,6 +2202,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 
                /* Overload => reject message back to sender */
                onode = tipc_own_addr(sock_net(sk));
+               atomic_inc(&sk->sk_drops);
                if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
                        __skb_queue_tail(xmitq, skb);
                break;
@@ -2591,6 +2592,9 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
        struct publication *publ;
        u32 key;
 
+       if (scope != TIPC_NODE_SCOPE)
+               scope = TIPC_CLUSTER_SCOPE;
+
        if (tipc_sk_connected(sk))
                return -EINVAL;
        key = tsk->portid + tsk->pub_count + 1;
@@ -2602,7 +2606,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
        if (unlikely(!publ))
                return -EINVAL;
 
-       list_add(&publ->pport_list, &tsk->publications);
+       list_add(&publ->binding_sock, &tsk->publications);
        tsk->pub_count++;
        tsk->published = 1;
        return 0;
@@ -2616,7 +2620,10 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
        struct publication *safe;
        int rc = -EINVAL;
 
-       list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
+       if (scope != TIPC_NODE_SCOPE)
+               scope = TIPC_CLUSTER_SCOPE;
+
+       list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
                if (seq) {
                        if (publ->scope != scope)
                                continue;
@@ -2627,12 +2634,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
                        if (publ->upper != seq->upper)
                                break;
                        tipc_nametbl_withdraw(net, publ->type, publ->lower,
-                                             publ->ref, publ->key);
+                                             publ->port, publ->key);
                        rc = 0;
                        break;
                }
                tipc_nametbl_withdraw(net, publ->type, publ->lower,
-                                     publ->ref, publ->key);
+                                     publ->port, publ->key);
                rc = 0;
        }
        if (list_empty(&tsk->publications))
@@ -2658,8 +2665,8 @@ void tipc_sk_reinit(struct net *net)
                while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
                        spin_lock_bh(&tsk->sk.sk_lock.slock);
                        msg = &tsk->phdr;
-                       msg_set_prevnode(msg, tn->own_addr);
-                       msg_set_orignode(msg, tn->own_addr);
+                       msg_set_prevnode(msg, tipc_own_addr(net));
+                       msg_set_orignode(msg, tipc_own_addr(net));
                        spin_unlock_bh(&tsk->sk.sk_lock.slock);
                }
 
@@ -3154,16 +3161,32 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
        return -EMSGSIZE;
 }
 
+static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
+                         *tsk)
+{
+       struct net *net = sock_net(skb->sk);
+       struct sock *sk = &tsk->sk;
+
+       if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
+           nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net)))
+               return -EMSGSIZE;
+
+       if (tipc_sk_connected(sk)) {
+               if (__tipc_nl_add_sk_con(skb, tsk))
+                       return -EMSGSIZE;
+       } else if (!list_empty(&tsk->publications)) {
+               if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
+                       return -EMSGSIZE;
+       }
+       return 0;
+}
+
 /* Caller should hold socket lock for the passed tipc socket. */
 static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
                            struct tipc_sock *tsk)
 {
-       int err;
-       void *hdr;
        struct nlattr *attrs;
-       struct net *net = sock_net(skb->sk);
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-       struct sock *sk = &tsk->sk;
+       void *hdr;
 
        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
                          &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
@@ -3173,19 +3196,10 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
        attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
        if (!attrs)
                goto genlmsg_cancel;
-       if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
-               goto attr_msg_cancel;
-       if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
+
+       if (__tipc_nl_add_sk_info(skb, tsk))
                goto attr_msg_cancel;
 
-       if (tipc_sk_connected(sk)) {
-               err = __tipc_nl_add_sk_con(skb, tsk);
-               if (err)
-                       goto attr_msg_cancel;
-       } else if (!list_empty(&tsk->publications)) {
-               if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
-                       goto attr_msg_cancel;
-       }
        nla_nest_end(skb, attrs);
        genlmsg_end(skb, hdr);
 
@@ -3199,16 +3213,19 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
        return -EMSGSIZE;
 }
 
-int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+                   int (*skb_handler)(struct sk_buff *skb,
+                                      struct netlink_callback *cb,
+                                      struct tipc_sock *tsk))
 {
-       int err;
-       struct tipc_sock *tsk;
-       const struct bucket_table *tbl;
-       struct rhash_head *pos;
        struct net *net = sock_net(skb->sk);
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
-       u32 tbl_id = cb->args[0];
+       struct tipc_net *tn = tipc_net(net);
+       const struct bucket_table *tbl;
        u32 prev_portid = cb->args[1];
+       u32 tbl_id = cb->args[0];
+       struct rhash_head *pos;
+       struct tipc_sock *tsk;
+       int err;
 
        rcu_read_lock();
        tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
@@ -3220,12 +3237,13 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
                                continue;
                        }
 
-                       err = __tipc_nl_add_sk(skb, cb, tsk);
+                       err = skb_handler(skb, cb, tsk);
                        if (err) {
                                prev_portid = tsk->portid;
                                spin_unlock_bh(&tsk->sk.sk_lock.slock);
                                goto out;
                        }
+
                        prev_portid = 0;
                        spin_unlock_bh(&tsk->sk.sk_lock.slock);
                }
@@ -3237,6 +3255,75 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
        return skb->len;
 }
+EXPORT_SYMBOL(tipc_nl_sk_walk);
+
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
+                          u32 sk_filter_state,
+                          u64 (*tipc_diag_gen_cookie)(struct sock *sk))
+{
+       struct sock *sk = &tsk->sk;
+       struct nlattr *attrs;
+       struct nlattr *stat;
+
+       /*filter response w.r.t sk_state*/
+       if (!(sk_filter_state & (1 << sk->sk_state)))
+               return 0;
+
+       attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
+       if (!attrs)
+               goto msg_cancel;
+
+       if (__tipc_nl_add_sk_info(skb, tsk))
+               goto attr_msg_cancel;
+
+       if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) ||
+           nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
+           nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
+           nla_put_u32(skb, TIPC_NLA_SOCK_UID,
+                       from_kuid_munged(sk_user_ns(sk), sock_i_uid(sk))) ||
+           nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
+                             tipc_diag_gen_cookie(sk),
+                             TIPC_NLA_SOCK_PAD))
+               goto attr_msg_cancel;
+
+       stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
+       if (!stat)
+               goto attr_msg_cancel;
+
+       if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
+                       skb_queue_len(&sk->sk_receive_queue)) ||
+           nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
+                       skb_queue_len(&sk->sk_write_queue)) ||
+           nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
+                       atomic_read(&sk->sk_drops)))
+               goto stat_msg_cancel;
+
+       if (tsk->cong_link_cnt &&
+           nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG))
+               goto stat_msg_cancel;
+
+       if (tsk_conn_cong(tsk) &&
+           nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG))
+               goto stat_msg_cancel;
+
+       nla_nest_end(skb, stat);
+       nla_nest_end(skb, attrs);
+
+       return 0;
+
+stat_msg_cancel:
+       nla_nest_cancel(skb, stat);
+attr_msg_cancel:
+       nla_nest_cancel(skb, attrs);
+msg_cancel:
+       return -EMSGSIZE;
+}
+EXPORT_SYMBOL(tipc_sk_fill_sock_diag);
+
+int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
+}
 
 /* Caller should hold socket lock for the passed tipc socket. */
 static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
@@ -3286,7 +3373,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
        struct publication *p;
 
        if (*last_publ) {
-               list_for_each_entry(p, &tsk->publications, pport_list) {
+               list_for_each_entry(p, &tsk->publications, binding_sock) {
                        if (p->key == *last_publ)
                                break;
                }
@@ -3303,10 +3390,10 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
                }
        } else {
                p = list_first_entry(&tsk->publications, struct publication,
-                                    pport_list);
+                                    binding_sock);
        }
 
-       list_for_each_entry_from(p, &tsk->publications, pport_list) {
+       list_for_each_entry_from(p, &tsk->publications, binding_sock) {
                err = __tipc_nl_add_sk_publ(skb, cb, p);
                if (err) {
                        *last_publ = p->key;
index 06fb5944cf76947cab964fbcf2b53b52b1f517aa..aae3fd4cd06c4b44b94194327faac1aaaf183ddc 100644 (file)
@@ -49,6 +49,8 @@
 #define RCVBUF_DEF  (FLOWCTL_BLK_SZ * 1024 * 2)
 #define RCVBUF_MAX  (FLOWCTL_BLK_SZ * 1024 * 16)
 
+struct tipc_sock;
+
 int tipc_socket_init(void);
 void tipc_socket_stop(void);
 void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
@@ -59,5 +61,11 @@ int tipc_sk_rht_init(struct net *net);
 void tipc_sk_rht_destroy(struct net *net);
 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
+                          u32 sk_filter_state,
+                          u64 (*tipc_diag_gen_cookie)(struct sock *sk));
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+                   int (*skb_handler)(struct sk_buff *skb,
+                                      struct netlink_callback *cb,
+                                      struct tipc_sock *tsk));
 #endif
index 3deabcab4882165b668f65319a3555027bf3b292..2c13b18426d9db3f16ec79f2bea86ddc59601dc4 100644 (file)
@@ -47,6 +47,8 @@
 #include <net/addrconf.h>
 #include <linux/tipc_netlink.h>
 #include "core.h"
+#include "addr.h"
+#include "net.h"
 #include "bearer.h"
 #include "netlink.h"
 #include "msg.h"
@@ -647,6 +649,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
        struct udp_port_cfg udp_conf = {0};
        struct udp_tunnel_sock_cfg tuncfg = {NULL};
        struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+       u8 node_id[NODE_ID_LEN] = {0,};
 
        ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
        if (!ub)
@@ -677,6 +680,16 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
        if (err)
                goto err;
 
+       /* Autoconfigure own node identity if needed */
+       if (!tipc_own_id(net)) {
+               memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16);
+               tipc_net_init(net, node_id, 0);
+       }
+       if (!tipc_own_id(net)) {
+               pr_warn("Failed to set node id, please configure manually\n");
+               return -EINVAL;
+       }
+
        b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP;
        b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
        rcu_assign_pointer(b->media_ptr, ub);
index eb583038c67e05c8c28ad86720d375c06abe9443..89b8745a986f06c00bd35f7a4fc6fff47c25120a 100644 (file)
@@ -7,6 +7,7 @@ config TLS
        select CRYPTO
        select CRYPTO_AES
        select CRYPTO_GCM
+       select STREAM_PARSER
        default n
        ---help---
        Enable kernel support for TLS protocol. This allows symmetric
index e9b4b53ab53e08b2b8ddaf7e57da9d0a9862a4b1..6f5c1146da4a5a85a1188356bff0198052d85674 100644 (file)
@@ -46,16 +46,29 @@ MODULE_DESCRIPTION("Transport Layer Security Support");
 MODULE_LICENSE("Dual BSD/GPL");
 
 enum {
-       TLS_BASE_TX,
+       TLSV4,
+       TLSV6,
+       TLS_NUM_PROTS,
+};
+
+enum {
+       TLS_BASE,
        TLS_SW_TX,
+       TLS_SW_RX,
+       TLS_SW_RXTX,
        TLS_NUM_CONFIG,
 };
 
-static struct proto tls_prots[TLS_NUM_CONFIG];
+static struct proto *saved_tcpv6_prot;
+static DEFINE_MUTEX(tcpv6_prot_mutex);
+static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
+static struct proto_ops tls_sw_proto_ops;
 
 static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
-       sk->sk_prot = &tls_prots[ctx->tx_conf];
+       int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
+
+       sk->sk_prot = &tls_prots[ip_ver][ctx->conf];
 }
 
 int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -228,7 +241,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
        lock_sock(sk);
        sk_proto_close = ctx->sk_proto_close;
 
-       if (ctx->tx_conf == TLS_BASE_TX) {
+       if (ctx->conf == TLS_BASE) {
                kfree(ctx);
                goto skip_tx_cleanup;
        }
@@ -249,11 +262,16 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
                }
        }
 
-       kfree(ctx->rec_seq);
-       kfree(ctx->iv);
+       kfree(ctx->tx.rec_seq);
+       kfree(ctx->tx.iv);
+       kfree(ctx->rx.rec_seq);
+       kfree(ctx->rx.iv);
 
-       if (ctx->tx_conf == TLS_SW_TX)
-               tls_sw_free_tx_resources(sk);
+       if (ctx->conf == TLS_SW_TX ||
+           ctx->conf == TLS_SW_RX ||
+           ctx->conf == TLS_SW_RXTX) {
+               tls_sw_free_resources(sk);
+       }
 
 skip_tx_cleanup:
        release_sock(sk);
@@ -309,9 +327,9 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
                }
                lock_sock(sk);
                memcpy(crypto_info_aes_gcm_128->iv,
-                      ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+                      ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
                       TLS_CIPHER_AES_GCM_128_IV_SIZE);
-               memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->rec_seq,
+               memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->tx.rec_seq,
                       TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
                release_sock(sk);
                if (copy_to_user(optval,
@@ -355,20 +373,24 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
        return do_tls_getsockopt(sk, optname, optval, optlen);
 }
 
-static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
-                               unsigned int optlen)
+static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
+                                 unsigned int optlen, int tx)
 {
        struct tls_crypto_info *crypto_info;
        struct tls_context *ctx = tls_get_ctx(sk);
        int rc = 0;
-       int tx_conf;
+       int conf;
 
        if (!optval || (optlen < sizeof(*crypto_info))) {
                rc = -EINVAL;
                goto out;
        }
 
-       crypto_info = &ctx->crypto_send;
+       if (tx)
+               crypto_info = &ctx->crypto_send;
+       else
+               crypto_info = &ctx->crypto_recv;
+
        /* Currently we don't support set crypto info more than one time */
        if (TLS_CRYPTO_INFO_READY(crypto_info)) {
                rc = -EBUSY;
@@ -407,15 +429,31 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
        }
 
        /* currently SW is default, we will have ethtool in future */
-       rc = tls_set_sw_offload(sk, ctx);
-       tx_conf = TLS_SW_TX;
+       if (tx) {
+               rc = tls_set_sw_offload(sk, ctx, 1);
+               if (ctx->conf == TLS_SW_RX)
+                       conf = TLS_SW_RXTX;
+               else
+                       conf = TLS_SW_TX;
+       } else {
+               rc = tls_set_sw_offload(sk, ctx, 0);
+               if (ctx->conf == TLS_SW_TX)
+                       conf = TLS_SW_RXTX;
+               else
+                       conf = TLS_SW_RX;
+       }
+
        if (rc)
                goto err_crypto_info;
 
-       ctx->tx_conf = tx_conf;
+       ctx->conf = conf;
        update_sk_prot(sk, ctx);
-       ctx->sk_write_space = sk->sk_write_space;
-       sk->sk_write_space = tls_write_space;
+       if (tx) {
+               ctx->sk_write_space = sk->sk_write_space;
+               sk->sk_write_space = tls_write_space;
+       } else {
+               sk->sk_socket->ops = &tls_sw_proto_ops;
+       }
        goto out;
 
 err_crypto_info:
@@ -431,8 +469,10 @@ static int do_tls_setsockopt(struct sock *sk, int optname,
 
        switch (optname) {
        case TLS_TX:
+       case TLS_RX:
                lock_sock(sk);
-               rc = do_tls_setsockopt_tx(sk, optval, optlen);
+               rc = do_tls_setsockopt_conf(sk, optval, optlen,
+                                           optname == TLS_TX);
                release_sock(sk);
                break;
        default:
@@ -453,8 +493,29 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
        return do_tls_setsockopt(sk, optname, optval, optlen);
 }
 
+static void build_protos(struct proto *prot, struct proto *base)
+{
+       prot[TLS_BASE] = *base;
+       prot[TLS_BASE].setsockopt       = tls_setsockopt;
+       prot[TLS_BASE].getsockopt       = tls_getsockopt;
+       prot[TLS_BASE].close            = tls_sk_proto_close;
+
+       prot[TLS_SW_TX] = prot[TLS_BASE];
+       prot[TLS_SW_TX].sendmsg         = tls_sw_sendmsg;
+       prot[TLS_SW_TX].sendpage        = tls_sw_sendpage;
+
+       prot[TLS_SW_RX] = prot[TLS_BASE];
+       prot[TLS_SW_RX].recvmsg         = tls_sw_recvmsg;
+       prot[TLS_SW_RX].close           = tls_sk_proto_close;
+
+       prot[TLS_SW_RXTX] = prot[TLS_SW_TX];
+       prot[TLS_SW_RXTX].recvmsg       = tls_sw_recvmsg;
+       prot[TLS_SW_RXTX].close         = tls_sk_proto_close;
+}
+
 static int tls_init(struct sock *sk)
 {
+       int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tls_context *ctx;
        int rc = 0;
@@ -479,7 +540,18 @@ static int tls_init(struct sock *sk)
        ctx->getsockopt = sk->sk_prot->getsockopt;
        ctx->sk_proto_close = sk->sk_prot->close;
 
-       ctx->tx_conf = TLS_BASE_TX;
+       /* Build IPv6 TLS whenever the address of tcpv6_prot changes */
+       if (ip_ver == TLSV6 &&
+           unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
+               mutex_lock(&tcpv6_prot_mutex);
+               if (likely(sk->sk_prot != saved_tcpv6_prot)) {
+                       build_protos(tls_prots[TLSV6], sk->sk_prot);
+                       smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
+               }
+               mutex_unlock(&tcpv6_prot_mutex);
+       }
+
+       ctx->conf = TLS_BASE;
        update_sk_prot(sk, ctx);
 out:
        return rc;
@@ -493,21 +565,13 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
        .init                   = tls_init,
 };
 
-static void build_protos(struct proto *prot, struct proto *base)
-{
-       prot[TLS_BASE_TX] = *base;
-       prot[TLS_BASE_TX].setsockopt    = tls_setsockopt;
-       prot[TLS_BASE_TX].getsockopt    = tls_getsockopt;
-       prot[TLS_BASE_TX].close         = tls_sk_proto_close;
-
-       prot[TLS_SW_TX] = prot[TLS_BASE_TX];
-       prot[TLS_SW_TX].sendmsg         = tls_sw_sendmsg;
-       prot[TLS_SW_TX].sendpage        = tls_sw_sendpage;
-}
-
 static int __init tls_register(void)
 {
-       build_protos(tls_prots, &tcp_prot);
+       build_protos(tls_prots[TLSV4], &tcp_prot);
+
+       tls_sw_proto_ops = inet_stream_ops;
+       tls_sw_proto_ops.poll = tls_sw_poll;
+       tls_sw_proto_ops.splice_read = tls_sw_splice_read;
 
        tcp_register_ulp(&tcp_tls_ulp_ops);
 
index f26376e954aeccadc0162a74b3c37af2d4ab0051..4dc766b03f0056ae87f36422fe592078424210a3 100644 (file)
  * SOFTWARE.
  */
 
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <crypto/aead.h>
 
+#include <net/strparser.h>
 #include <net/tls.h>
 
+static int tls_do_decryption(struct sock *sk,
+                            struct scatterlist *sgin,
+                            struct scatterlist *sgout,
+                            char *iv_recv,
+                            size_t data_len,
+                            struct sk_buff *skb,
+                            gfp_t flags)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct strp_msg *rxm = strp_msg(skb);
+       struct aead_request *aead_req;
+
+       int ret;
+       unsigned int req_size = sizeof(struct aead_request) +
+               crypto_aead_reqsize(ctx->aead_recv);
+
+       aead_req = kzalloc(req_size, flags);
+       if (!aead_req)
+               return -ENOMEM;
+
+       aead_request_set_tfm(aead_req, ctx->aead_recv);
+       aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+       aead_request_set_crypt(aead_req, sgin, sgout,
+                              data_len + tls_ctx->rx.tag_size,
+                              (u8 *)iv_recv);
+       aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                 crypto_req_done, &ctx->async_wait);
+
+       ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
+
+       if (ret < 0)
+               goto out;
+
+       rxm->offset += tls_ctx->rx.prepend_size;
+       rxm->full_len -= tls_ctx->rx.overhead_size;
+       tls_advance_record_sn(sk, &tls_ctx->rx);
+
+       ctx->decrypted = true;
+
+       ctx->saved_data_ready(sk);
+
+out:
+       kfree(aead_req);
+       return ret;
+}
+
 static void trim_sg(struct sock *sk, struct scatterlist *sg,
                    int *sg_num_elem, unsigned int *sg_size, int target_size)
 {
@@ -79,7 +128,7 @@ static void trim_both_sgl(struct sock *sk, int target_size)
                target_size);
 
        if (target_size > 0)
-               target_size += tls_ctx->overhead_size;
+               target_size += tls_ctx->tx.overhead_size;
 
        trim_sg(sk, ctx->sg_encrypted_data,
                &ctx->sg_encrypted_num_elem,
@@ -87,71 +136,16 @@ static void trim_both_sgl(struct sock *sk, int target_size)
                target_size);
 }
 
-static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-                   int *sg_num_elem, unsigned int *sg_size,
-                   int first_coalesce)
-{
-       struct page_frag *pfrag;
-       unsigned int size = *sg_size;
-       int num_elem = *sg_num_elem, use = 0, rc = 0;
-       struct scatterlist *sge;
-       unsigned int orig_offset;
-
-       len -= size;
-       pfrag = sk_page_frag(sk);
-
-       while (len > 0) {
-               if (!sk_page_frag_refill(sk, pfrag)) {
-                       rc = -ENOMEM;
-                       goto out;
-               }
-
-               use = min_t(int, len, pfrag->size - pfrag->offset);
-
-               if (!sk_wmem_schedule(sk, use)) {
-                       rc = -ENOMEM;
-                       goto out;
-               }
-
-               sk_mem_charge(sk, use);
-               size += use;
-               orig_offset = pfrag->offset;
-               pfrag->offset += use;
-
-               sge = sg + num_elem - 1;
-               if (num_elem > first_coalesce && sg_page(sg) == pfrag->page &&
-                   sg->offset + sg->length == orig_offset) {
-                       sg->length += use;
-               } else {
-                       sge++;
-                       sg_unmark_end(sge);
-                       sg_set_page(sge, pfrag->page, use, orig_offset);
-                       get_page(pfrag->page);
-                       ++num_elem;
-                       if (num_elem == MAX_SKB_FRAGS) {
-                               rc = -ENOSPC;
-                               break;
-                       }
-               }
-
-               len -= use;
-       }
-       goto out;
-
-out:
-       *sg_size = size;
-       *sg_num_elem = num_elem;
-       return rc;
-}
-
 static int alloc_encrypted_sg(struct sock *sk, int len)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
        int rc = 0;
 
-       rc = alloc_sg(sk, len, ctx->sg_encrypted_data,
-                     &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0);
+       rc = sk_alloc_sg(sk, len,
+                        ctx->sg_encrypted_data, 0,
+                        &ctx->sg_encrypted_num_elem,
+                        &ctx->sg_encrypted_size, 0);
 
        return rc;
 }
@@ -162,9 +156,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len)
        struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
        int rc = 0;
 
-       rc = alloc_sg(sk, len, ctx->sg_plaintext_data,
-                     &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
-                     tls_ctx->pending_open_record_frags);
+       rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0,
+                        &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
+                        tls_ctx->pending_open_record_frags);
 
        return rc;
 }
@@ -207,21 +201,21 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
        if (!aead_req)
                return -ENOMEM;
 
-       ctx->sg_encrypted_data[0].offset += tls_ctx->prepend_size;
-       ctx->sg_encrypted_data[0].length -= tls_ctx->prepend_size;
+       ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size;
+       ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size;
 
        aead_request_set_tfm(aead_req, ctx->aead_send);
        aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
        aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out,
-                              data_len, tls_ctx->iv);
+                              data_len, tls_ctx->tx.iv);
 
        aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
                                  crypto_req_done, &ctx->async_wait);
 
        rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait);
 
-       ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size;
-       ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size;
+       ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size;
+       ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size;
 
        kfree(aead_req);
        return rc;
@@ -238,7 +232,7 @@ static int tls_push_record(struct sock *sk, int flags,
        sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
 
        tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
-                    tls_ctx->rec_seq, tls_ctx->rec_seq_size,
+                    tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
                     record_type);
 
        tls_fill_prepend(tls_ctx,
@@ -269,9 +263,9 @@ static int tls_push_record(struct sock *sk, int flags,
        /* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */
        rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags);
        if (rc < 0 && rc != -EAGAIN)
-               tls_err_abort(sk);
+               tls_err_abort(sk, EBADMSG);
 
-       tls_advance_record_sn(sk, tls_ctx);
+       tls_advance_record_sn(sk, &tls_ctx->tx);
        return rc;
 }
 
@@ -281,23 +275,24 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags)
 }
 
 static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
-                             int length)
+                             int length, int *pages_used,
+                             unsigned int *size_used,
+                             struct scatterlist *to, int to_max_pages,
+                             bool charge)
 {
-       struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
        struct page *pages[MAX_SKB_FRAGS];
 
        size_t offset;
        ssize_t copied, use;
        int i = 0;
-       unsigned int size = ctx->sg_plaintext_size;
-       int num_elem = ctx->sg_plaintext_num_elem;
+       unsigned int size = *size_used;
+       int num_elem = *pages_used;
        int rc = 0;
        int maxpages;
 
        while (length > 0) {
                i = 0;
-               maxpages = ARRAY_SIZE(ctx->sg_plaintext_data) - num_elem;
+               maxpages = to_max_pages - num_elem;
                if (maxpages == 0) {
                        rc = -EFAULT;
                        goto out;
@@ -317,10 +312,11 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
                while (copied) {
                        use = min_t(int, copied, PAGE_SIZE - offset);
 
-                       sg_set_page(&ctx->sg_plaintext_data[num_elem],
+                       sg_set_page(&to[num_elem],
                                    pages[i], use, offset);
-                       sg_unmark_end(&ctx->sg_plaintext_data[num_elem]);
-                       sk_mem_charge(sk, use);
+                       sg_unmark_end(&to[num_elem]);
+                       if (charge)
+                               sk_mem_charge(sk, use);
 
                        offset = 0;
                        copied -= use;
@@ -331,8 +327,9 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
        }
 
 out:
-       ctx->sg_plaintext_size = size;
-       ctx->sg_plaintext_num_elem = num_elem;
+       *size_used = size;
+       *pages_used = num_elem;
+
        return rc;
 }
 
@@ -409,7 +406,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
                }
 
                required_size = ctx->sg_plaintext_size + try_to_copy +
-                               tls_ctx->overhead_size;
+                               tls_ctx->tx.overhead_size;
 
                if (!sk_stream_memory_free(sk))
                        goto wait_for_sndbuf;
@@ -429,7 +426,11 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 
                if (full_record || eor) {
                        ret = zerocopy_from_iter(sk, &msg->msg_iter,
-                                                try_to_copy);
+                               try_to_copy, &ctx->sg_plaintext_num_elem,
+                               &ctx->sg_plaintext_size,
+                               ctx->sg_plaintext_data,
+                               ARRAY_SIZE(ctx->sg_plaintext_data),
+                               true);
                        if (ret)
                                goto fallback_to_reg_send;
 
@@ -468,7 +469,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
                                &ctx->sg_encrypted_num_elem,
                                &ctx->sg_encrypted_size,
                                ctx->sg_plaintext_size +
-                               tls_ctx->overhead_size);
+                               tls_ctx->tx.overhead_size);
                }
 
                ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy);
@@ -560,7 +561,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
                        full_record = true;
                }
                required_size = ctx->sg_plaintext_size + copy +
-                             tls_ctx->overhead_size;
+                             tls_ctx->tx.overhead_size;
 
                if (!sk_stream_memory_free(sk))
                        goto wait_for_sndbuf;
@@ -629,13 +630,404 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
        return ret;
 }
 
-void tls_sw_free_tx_resources(struct sock *sk)
+static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
+                                    long timeo, int *err)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct sk_buff *skb;
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+       while (!(skb = ctx->recv_pkt)) {
+               if (sk->sk_err) {
+                       *err = sock_error(sk);
+                       return NULL;
+               }
+
+               if (sock_flag(sk, SOCK_DONE))
+                       return NULL;
+
+               if ((flags & MSG_DONTWAIT) || !timeo) {
+                       *err = -EAGAIN;
+                       return NULL;
+               }
+
+               add_wait_queue(sk_sleep(sk), &wait);
+               sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+               sk_wait_event(sk, &timeo, ctx->recv_pkt != skb, &wait);
+               sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+               remove_wait_queue(sk_sleep(sk), &wait);
+
+               /* Handle signals */
+               if (signal_pending(current)) {
+                       *err = sock_intr_errno(timeo);
+                       return NULL;
+               }
+       }
+
+       return skb;
+}
+
+static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+                      struct scatterlist *sgout)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + tls_ctx->rx.iv_size];
+       struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2];
+       struct scatterlist *sgin = &sgin_arr[0];
+       struct strp_msg *rxm = strp_msg(skb);
+       int ret, nsg = ARRAY_SIZE(sgin_arr);
+       char aad_recv[TLS_AAD_SPACE_SIZE];
+       struct sk_buff *unused;
+
+       ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
+                           iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+                           tls_ctx->rx.iv_size);
+       if (ret < 0)
+               return ret;
+
+       memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+       if (!sgout) {
+               nsg = skb_cow_data(skb, 0, &unused) + 1;
+               sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation);
+               if (!sgout)
+                       sgout = sgin;
+       }
+
+       sg_init_table(sgin, nsg);
+       sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv));
+
+       nsg = skb_to_sgvec(skb, &sgin[1],
+                          rxm->offset + tls_ctx->rx.prepend_size,
+                          rxm->full_len - tls_ctx->rx.prepend_size);
+
+       tls_make_aad(aad_recv,
+                    rxm->full_len - tls_ctx->rx.overhead_size,
+                    tls_ctx->rx.rec_seq,
+                    tls_ctx->rx.rec_seq_size,
+                    ctx->control);
+
+       ret = tls_do_decryption(sk, sgin, sgout, iv,
+                               rxm->full_len - tls_ctx->rx.overhead_size,
+                               skb, sk->sk_allocation);
+
+       if (sgin != &sgin_arr[0])
+               kfree(sgin);
+
+       return ret;
+}
+
+static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
+                              unsigned int len)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct strp_msg *rxm = strp_msg(skb);
+
+       if (len < rxm->full_len) {
+               rxm->offset += len;
+               rxm->full_len -= len;
+
+               return false;
+       }
+
+       /* Finished with message */
+       ctx->recv_pkt = NULL;
+       kfree_skb(skb);
+       strp_unpause(&ctx->strp);
+
+       return true;
+}
+
+int tls_sw_recvmsg(struct sock *sk,
+                  struct msghdr *msg,
+                  size_t len,
+                  int nonblock,
+                  int flags,
+                  int *addr_len)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       unsigned char control;
+       struct strp_msg *rxm;
+       struct sk_buff *skb;
+       ssize_t copied = 0;
+       bool cmsg = false;
+       int err = 0;
+       long timeo;
+
+       flags |= nonblock;
+
+       if (unlikely(flags & MSG_ERRQUEUE))
+               return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
+
+       lock_sock(sk);
+
+       timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+       do {
+               bool zc = false;
+               int chunk = 0;
+
+               skb = tls_wait_data(sk, flags, timeo, &err);
+               if (!skb)
+                       goto recv_end;
+
+               rxm = strp_msg(skb);
+               if (!cmsg) {
+                       int cerr;
+
+                       cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+                                       sizeof(ctx->control), &ctx->control);
+                       cmsg = true;
+                       control = ctx->control;
+                       if (ctx->control != TLS_RECORD_TYPE_DATA) {
+                               if (cerr || msg->msg_flags & MSG_CTRUNC) {
+                                       err = -EIO;
+                                       goto recv_end;
+                               }
+                       }
+               } else if (control != ctx->control) {
+                       goto recv_end;
+               }
+
+               if (!ctx->decrypted) {
+                       int page_count;
+                       int to_copy;
+
+                       page_count = iov_iter_npages(&msg->msg_iter,
+                                                    MAX_SKB_FRAGS);
+                       to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
+                       if (to_copy <= len && page_count < MAX_SKB_FRAGS &&
+                           likely(!(flags & MSG_PEEK)))  {
+                               struct scatterlist sgin[MAX_SKB_FRAGS + 1];
+                               char unused[21];
+                               int pages = 0;
+
+                               zc = true;
+                               sg_init_table(sgin, MAX_SKB_FRAGS + 1);
+                               sg_set_buf(&sgin[0], unused, 13);
+
+                               err = zerocopy_from_iter(sk, &msg->msg_iter,
+                                                        to_copy, &pages,
+                                                        &chunk, &sgin[1],
+                                                        MAX_SKB_FRAGS, false);
+                               if (err < 0)
+                                       goto fallback_to_reg_recv;
+
+                               err = decrypt_skb(sk, skb, sgin);
+                               for (; pages > 0; pages--)
+                                       put_page(sg_page(&sgin[pages]));
+                               if (err < 0) {
+                                       tls_err_abort(sk, EBADMSG);
+                                       goto recv_end;
+                               }
+                       } else {
+fallback_to_reg_recv:
+                               err = decrypt_skb(sk, skb, NULL);
+                               if (err < 0) {
+                                       tls_err_abort(sk, EBADMSG);
+                                       goto recv_end;
+                               }
+                       }
+                       ctx->decrypted = true;
+               }
+
+               if (!zc) {
+                       chunk = min_t(unsigned int, rxm->full_len, len);
+                       err = skb_copy_datagram_msg(skb, rxm->offset, msg,
+                                                   chunk);
+                       if (err < 0)
+                               goto recv_end;
+               }
+
+               copied += chunk;
+               len -= chunk;
+               if (likely(!(flags & MSG_PEEK))) {
+                       u8 control = ctx->control;
+
+                       if (tls_sw_advance_skb(sk, skb, chunk)) {
+                               /* Return full control message to
+                                * userspace before trying to parse
+                                * another message type
+                                */
+                               msg->msg_flags |= MSG_EOR;
+                               if (control != TLS_RECORD_TYPE_DATA)
+                                       goto recv_end;
+                       }
+               }
+       } while (len);
+
+recv_end:
+       release_sock(sk);
+       return copied ? : err;
+}
+
+ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
+                          struct pipe_inode_info *pipe,
+                          size_t len, unsigned int flags)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sock->sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct strp_msg *rxm = NULL;
+       struct sock *sk = sock->sk;
+       struct sk_buff *skb;
+       ssize_t copied = 0;
+       int err = 0;
+       long timeo;
+       int chunk;
+
+       lock_sock(sk);
+
+       timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+       skb = tls_wait_data(sk, flags, timeo, &err);
+       if (!skb)
+               goto splice_read_end;
+
+       /* splice does not support reading control messages */
+       if (ctx->control != TLS_RECORD_TYPE_DATA) {
+               err = -ENOTSUPP;
+               goto splice_read_end;
+       }
+
+       if (!ctx->decrypted) {
+               err = decrypt_skb(sk, skb, NULL);
+
+               if (err < 0) {
+                       tls_err_abort(sk, EBADMSG);
+                       goto splice_read_end;
+               }
+               ctx->decrypted = true;
+       }
+       rxm = strp_msg(skb);
+
+       chunk = min_t(unsigned int, rxm->full_len, len);
+       copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags);
+       if (copied < 0)
+               goto splice_read_end;
+
+       if (likely(!(flags & MSG_PEEK)))
+               tls_sw_advance_skb(sk, skb, copied);
+
+splice_read_end:
+       release_sock(sk);
+       return copied ? : err;
+}
+
+unsigned int tls_sw_poll(struct file *file, struct socket *sock,
+                        struct poll_table_struct *wait)
+{
+       unsigned int ret;
+       struct sock *sk = sock->sk;
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+       /* Grab POLLOUT and POLLHUP from the underlying socket */
+       ret = ctx->sk_poll(file, sock, wait);
+
+       /* Clear POLLIN bits, and set based on recv_pkt */
+       ret &= ~(POLLIN | POLLRDNORM);
+       if (ctx->recv_pkt)
+               ret |= POLLIN | POLLRDNORM;
+
+       return ret;
+}
+
+static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       char header[tls_ctx->rx.prepend_size];
+       struct strp_msg *rxm = strp_msg(skb);
+       size_t cipher_overhead;
+       size_t data_len = 0;
+       int ret;
+
+       /* Verify that we have a full TLS header, or wait for more data */
+       if (rxm->offset + tls_ctx->rx.prepend_size > skb->len)
+               return 0;
+
+       /* Linearize header to local buffer */
+       ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size);
+
+       if (ret < 0)
+               goto read_failure;
+
+       ctx->control = header[0];
+
+       data_len = ((header[4] & 0xFF) | (header[3] << 8));
+
+       cipher_overhead = tls_ctx->rx.tag_size + tls_ctx->rx.iv_size;
+
+       if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead) {
+               ret = -EMSGSIZE;
+               goto read_failure;
+       }
+       if (data_len < cipher_overhead) {
+               ret = -EBADMSG;
+               goto read_failure;
+       }
+
+       if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.version) ||
+           header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.version)) {
+               ret = -EINVAL;
+               goto read_failure;
+       }
+
+       return data_len + TLS_HEADER_SIZE;
+
+read_failure:
+       tls_err_abort(strp->sk, ret);
+
+       return ret;
+}
+
+static void tls_queue(struct strparser *strp, struct sk_buff *skb)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+       struct strp_msg *rxm;
+
+       rxm = strp_msg(skb);
+
+       ctx->decrypted = false;
+
+       ctx->recv_pkt = skb;
+       strp_pause(strp);
+
+       strp->sk->sk_state_change(strp->sk);
+}
+
+static void tls_data_ready(struct sock *sk)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+       strp_data_ready(&ctx->strp);
+}
+
+void tls_sw_free_resources(struct sock *sk)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 
        if (ctx->aead_send)
                crypto_free_aead(ctx->aead_send);
+       if (ctx->aead_recv) {
+               if (ctx->recv_pkt) {
+                       kfree_skb(ctx->recv_pkt);
+                       ctx->recv_pkt = NULL;
+               }
+               crypto_free_aead(ctx->aead_recv);
+               strp_stop(&ctx->strp);
+               write_lock_bh(&sk->sk_callback_lock);
+               sk->sk_data_ready = ctx->saved_data_ready;
+               write_unlock_bh(&sk->sk_callback_lock);
+               release_sock(sk);
+               strp_done(&ctx->strp);
+               lock_sock(sk);
+       }
 
        tls_free_both_sg(sk);
 
@@ -643,12 +1035,15 @@ void tls_sw_free_tx_resources(struct sock *sk)
        kfree(tls_ctx);
 }
 
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
        char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
        struct tls_crypto_info *crypto_info;
        struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
        struct tls_sw_context *sw_ctx;
+       struct cipher_context *cctx;
+       struct crypto_aead **aead;
+       struct strp_callbacks cb;
        u16 nonce_size, tag_size, iv_size, rec_seq_size;
        char *iv, *rec_seq;
        int rc = 0;
@@ -658,22 +1053,29 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
                goto out;
        }
 
-       if (ctx->priv_ctx) {
-               rc = -EEXIST;
-               goto out;
-       }
-
-       sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
-       if (!sw_ctx) {
-               rc = -ENOMEM;
-               goto out;
+       if (!ctx->priv_ctx) {
+               sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
+               if (!sw_ctx) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+               crypto_init_wait(&sw_ctx->async_wait);
+       } else {
+               sw_ctx = ctx->priv_ctx;
        }
 
-       crypto_init_wait(&sw_ctx->async_wait);
-
        ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
 
-       crypto_info = &ctx->crypto_send;
+       if (tx) {
+               crypto_info = &ctx->crypto_send;
+               cctx = &ctx->tx;
+               aead = &sw_ctx->aead_send;
+       } else {
+               crypto_info = &ctx->crypto_recv;
+               cctx = &ctx->rx;
+               aead = &sw_ctx->aead_recv;
+       }
+
        switch (crypto_info->cipher_type) {
        case TLS_CIPHER_AES_GCM_128: {
                nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
@@ -692,46 +1094,49 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
                goto free_priv;
        }
 
-       ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
-       ctx->tag_size = tag_size;
-       ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
-       ctx->iv_size = iv_size;
-       ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
-       if (!ctx->iv) {
+       cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
+       cctx->tag_size = tag_size;
+       cctx->overhead_size = cctx->prepend_size + cctx->tag_size;
+       cctx->iv_size = iv_size;
+       cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+                          GFP_KERNEL);
+       if (!cctx->iv) {
                rc = -ENOMEM;
                goto free_priv;
        }
-       memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
-       memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
-       ctx->rec_seq_size = rec_seq_size;
-       ctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
-       if (!ctx->rec_seq) {
+       memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+       memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
+       cctx->rec_seq_size = rec_seq_size;
+       cctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+       if (!cctx->rec_seq) {
                rc = -ENOMEM;
                goto free_iv;
        }
-       memcpy(ctx->rec_seq, rec_seq, rec_seq_size);
-
-       sg_init_table(sw_ctx->sg_encrypted_data,
-                     ARRAY_SIZE(sw_ctx->sg_encrypted_data));
-       sg_init_table(sw_ctx->sg_plaintext_data,
-                     ARRAY_SIZE(sw_ctx->sg_plaintext_data));
-
-       sg_init_table(sw_ctx->sg_aead_in, 2);
-       sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
-                  sizeof(sw_ctx->aad_space));
-       sg_unmark_end(&sw_ctx->sg_aead_in[1]);
-       sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
-       sg_init_table(sw_ctx->sg_aead_out, 2);
-       sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
-                  sizeof(sw_ctx->aad_space));
-       sg_unmark_end(&sw_ctx->sg_aead_out[1]);
-       sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
-
-       if (!sw_ctx->aead_send) {
-               sw_ctx->aead_send = crypto_alloc_aead("gcm(aes)", 0, 0);
-               if (IS_ERR(sw_ctx->aead_send)) {
-                       rc = PTR_ERR(sw_ctx->aead_send);
-                       sw_ctx->aead_send = NULL;
+       memcpy(cctx->rec_seq, rec_seq, rec_seq_size);
+
+       if (tx) {
+               sg_init_table(sw_ctx->sg_encrypted_data,
+                             ARRAY_SIZE(sw_ctx->sg_encrypted_data));
+               sg_init_table(sw_ctx->sg_plaintext_data,
+                             ARRAY_SIZE(sw_ctx->sg_plaintext_data));
+
+               sg_init_table(sw_ctx->sg_aead_in, 2);
+               sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
+                          sizeof(sw_ctx->aad_space));
+               sg_unmark_end(&sw_ctx->sg_aead_in[1]);
+               sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
+               sg_init_table(sw_ctx->sg_aead_out, 2);
+               sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
+                          sizeof(sw_ctx->aad_space));
+               sg_unmark_end(&sw_ctx->sg_aead_out[1]);
+               sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
+       }
+
+       if (!*aead) {
+               *aead = crypto_alloc_aead("gcm(aes)", 0, 0);
+               if (IS_ERR(*aead)) {
+                       rc = PTR_ERR(*aead);
+                       *aead = NULL;
                        goto free_rec_seq;
                }
        }
@@ -740,24 +1145,44 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 
        memcpy(keyval, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
 
-       rc = crypto_aead_setkey(sw_ctx->aead_send, keyval,
+       rc = crypto_aead_setkey(*aead, keyval,
                                TLS_CIPHER_AES_GCM_128_KEY_SIZE);
        if (rc)
                goto free_aead;
 
-       rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
-       if (!rc)
-               return 0;
+       rc = crypto_aead_setauthsize(*aead, cctx->tag_size);
+       if (rc)
+               goto free_aead;
+
+       if (!tx) {
+               /* Set up strparser */
+               memset(&cb, 0, sizeof(cb));
+               cb.rcv_msg = tls_queue;
+               cb.parse_msg = tls_read_size;
+
+               strp_init(&sw_ctx->strp, sk, &cb);
+
+               write_lock_bh(&sk->sk_callback_lock);
+               sw_ctx->saved_data_ready = sk->sk_data_ready;
+               sk->sk_data_ready = tls_data_ready;
+               write_unlock_bh(&sk->sk_callback_lock);
+
+               sw_ctx->sk_poll = sk->sk_socket->ops->poll;
+
+               strp_check_rcv(&sw_ctx->strp);
+       }
+
+       goto out;
 
 free_aead:
-       crypto_free_aead(sw_ctx->aead_send);
-       sw_ctx->aead_send = NULL;
+       crypto_free_aead(*aead);
+       *aead = NULL;
 free_rec_seq:
-       kfree(ctx->rec_seq);
-       ctx->rec_seq = NULL;
+       kfree(cctx->rec_seq);
+       cctx->rec_seq = NULL;
 free_iv:
-       kfree(ctx->iv);
-       ctx->iv = NULL;
+       kfree(ctx->tx.iv);
+       ctx->tx.iv = NULL;
 free_priv:
        kfree(ctx->priv_ctx);
        ctx->priv_ctx = NULL;
index 1abcc4fc4df18e81df5cfb072e81ce6970f5e6bc..41722046b9370c1861a51787dceabfe2b6215274 100644 (file)
@@ -34,9 +34,10 @@ config CFG80211
 
          When built as a module it will be called cfg80211.
 
+if CFG80211
+
 config NL80211_TESTMODE
        bool "nl80211 testmode command"
-       depends on CFG80211
        help
          The nl80211 testmode command helps implementing things like
          factory calibration or validation tools for wireless chips.
@@ -51,7 +52,6 @@ config NL80211_TESTMODE
 
 config CFG80211_DEVELOPER_WARNINGS
        bool "enable developer warnings"
-       depends on CFG80211
        default n
        help
          This option enables some additional warnings that help
@@ -68,7 +68,7 @@ config CFG80211_DEVELOPER_WARNINGS
 
 config CFG80211_CERTIFICATION_ONUS
        bool "cfg80211 certification onus"
-       depends on CFG80211 && EXPERT
+       depends on EXPERT
        default n
        ---help---
          You should disable this option unless you are both capable
@@ -159,7 +159,6 @@ config CFG80211_REG_RELAX_NO_IR
 
 config CFG80211_DEFAULT_PS
        bool "enable powersave by default"
-       depends on CFG80211
        default y
        help
          This option enables powersave mode by default.
@@ -170,7 +169,6 @@ config CFG80211_DEFAULT_PS
 
 config CFG80211_DEBUGFS
        bool "cfg80211 DebugFS entries"
-       depends on CFG80211
        depends on DEBUG_FS
        ---help---
          You can enable this if you want debugfs entries for cfg80211.
@@ -180,7 +178,6 @@ config CFG80211_DEBUGFS
 config CFG80211_CRDA_SUPPORT
        bool "support CRDA" if EXPERT
        default y
-       depends on CFG80211
        help
          You should enable this option unless you know for sure you have no
          need for it, for example when using internal regdb (above) or the
@@ -190,7 +187,6 @@ config CFG80211_CRDA_SUPPORT
 
 config CFG80211_WEXT
        bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT
-       depends on CFG80211
        select WEXT_CORE
        default y if CFG80211_WEXT_EXPORT
        help
@@ -199,11 +195,12 @@ config CFG80211_WEXT
 
 config CFG80211_WEXT_EXPORT
        bool
-       depends on CFG80211
        help
          Drivers should select this option if they require cfg80211's
          wext compatibility symbols to be exported.
 
+endif # CFG80211
+
 config LIB80211
        tristate
        default n
index c69160694b6c0c173b5805f09a588083f0070b0e..d112e9a89364518fbd9ac748f9391022601669b7 100644 (file)
@@ -420,7 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
 EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
 
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-                                 const u8 *addr, enum nl80211_iftype iftype)
+                                 const u8 *addr, enum nl80211_iftype iftype,
+                                 u8 data_offset)
 {
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
        struct {
@@ -434,7 +435,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
        if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
                return -1;
 
-       hdrlen = ieee80211_hdrlen(hdr->frame_control);
+       hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset;
        if (skb->len < hdrlen + 8)
                return -1;
 
index 8e70291e586a97cab0a57e94a01d2fb7942b08b4..e87d6c4dd5b6a8269b450abf9b84bb6f0c4a6ad3 100644 (file)
@@ -217,7 +217,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
                if (skb->len <= mtu)
                        goto ok;
 
-               if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+               if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
                        goto ok;
        }
 
index ccfdc7115a83f709e2a5980c5dc0d65cd5859467..a00ec715aa4681a89e348a058450320bbafe6cfb 100644 (file)
@@ -283,7 +283,7 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name)
                struct crypto_comp *tfm;
 
                /* This can be any valid CPU ID so we don't need locking. */
-               tfm = __this_cpu_read(*pos->tfms);
+               tfm = this_cpu_read(*pos->tfms);
 
                if (!strcmp(crypto_comp_name(tfm), alg_name)) {
                        pos->users++;
index 77d9d1ab05ce17838dba0322a0e222add2ed1dc2..cb3bb9ae440749a6903fcc876bfaee08a1e6ec2d 100644 (file)
@@ -1458,10 +1458,13 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
 static int xfrm_get_tos(const struct flowi *fl, int family)
 {
        const struct xfrm_policy_afinfo *afinfo;
-       int tos = 0;
+       int tos;
 
        afinfo = xfrm_policy_get_afinfo(family);
-       tos = afinfo ? afinfo->get_tos(fl) : 0;
+       if (!afinfo)
+               return 0;
+
+       tos = afinfo->get_tos(fl);
 
        rcu_read_unlock();
 
@@ -1891,7 +1894,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
        spin_unlock(&pq->hold_queue.lock);
 
        dst_hold(xfrm_dst_path(dst));
-       dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
+       dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE);
        if (IS_ERR(dst))
                goto purge_queue;
 
@@ -2729,14 +2732,14 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
        while (dst->xfrm) {
                const struct xfrm_state *xfrm = dst->xfrm;
 
+               dst = xfrm_dst_child(dst);
+
                if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
                        continue;
                if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR)
                        daddr = xfrm->coaddr;
                else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
                        daddr = &xfrm->id.daddr;
-
-               dst = xfrm_dst_child(dst);
        }
        return daddr;
 }
index 1d38c6acf8afbbab13a1ad1d05b4f67d6816d38b..9e3a5e85f8285e67323e6069f642fc8610ecd44b 100644 (file)
@@ -660,7 +660,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
                } else {
                        XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
                        XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
-                       xo->seq.low = oseq = oseq + 1;
+                       xo->seq.low = oseq + 1;
                        xo->seq.hi = oseq_hi;
                        oseq += skb_shinfo(skb)->gso_segs;
                }
index 54e21f19d722c43d0b1fd102777a3467cee5b009..f9d2f2233f09531697b35209fe86754d23971e3f 100644 (file)
@@ -2056,6 +2056,11 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
        struct xfrm_mgr *km;
        struct xfrm_policy *pol = NULL;
 
+#ifdef CONFIG_COMPAT
+       if (in_compat_syscall())
+               return -EOPNOTSUPP;
+#endif
+
        if (!optval && !optlen) {
                xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
                xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
index 7f52b8eb177db4978750caed402aa34f6a31d7b4..e92b8c019c8823249d23745782f6e29beaace2c6 100644 (file)
@@ -121,22 +121,17 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
        struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
        struct xfrm_replay_state_esn *rs;
 
-       if (p->flags & XFRM_STATE_ESN) {
-               if (!rt)
-                       return -EINVAL;
+       if (!rt)
+               return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0;
 
-               rs = nla_data(rt);
+       rs = nla_data(rt);
 
-               if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
-                       return -EINVAL;
-
-               if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
-                   nla_len(rt) != sizeof(*rs))
-                       return -EINVAL;
-       }
+       if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
+               return -EINVAL;
 
-       if (!rt)
-               return 0;
+       if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
+           nla_len(rt) != sizeof(*rs))
+               return -EINVAL;
 
        /* As only ESP and AH support ESN feature. */
        if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
@@ -3258,6 +3253,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list)
 static struct pernet_operations xfrm_user_net_ops = {
        .init       = xfrm_user_net_init,
        .exit_batch = xfrm_user_net_exit,
+       .async      = true,
 };
 
 static int __init xfrm_user_init(void)
index ec3fc8d88e873e59ae5617ad9ebbb961cc27c2e8..2c2a587e0942b9d714539521d413ae96e01bfaf0 100644 (file)
@@ -43,6 +43,7 @@ hostprogs-y += xdp_redirect_cpu
 hostprogs-y += xdp_monitor
 hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
+hostprogs-y += cpustat
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -93,6 +94,7 @@ xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
 xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
 xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -144,6 +146,7 @@ always += xdp_monitor_kern.o
 always += xdp_rxq_info_kern.o
 always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
+always += cpustat_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -188,6 +191,7 @@ HOSTLOADLIBES_xdp_redirect_cpu += -lelf
 HOSTLOADLIBES_xdp_monitor += -lelf
 HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
index 69806d74fa53bac11aa4b113661539777a300a6b..b1a310c3ae899b043c70085fc71a08b698ea0308 100644 (file)
@@ -67,6 +67,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
        bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
        bool is_sockops = strncmp(event, "sockops", 7) == 0;
        bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
+       bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
        size_t insns_cnt = size / sizeof(struct bpf_insn);
        enum bpf_prog_type prog_type;
        char buf[256];
@@ -96,6 +97,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
                prog_type = BPF_PROG_TYPE_SOCK_OPS;
        } else if (is_sk_skb) {
                prog_type = BPF_PROG_TYPE_SK_SKB;
+       } else if (is_sk_msg) {
+               prog_type = BPF_PROG_TYPE_SK_MSG;
        } else {
                printf("Unknown event '%s'\n", event);
                return -1;
@@ -113,7 +116,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
        if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
                return 0;
 
-       if (is_socket || is_sockops || is_sk_skb) {
+       if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
                if (is_socket)
                        event += 6;
                else
@@ -589,7 +592,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
                    memcmp(shname, "socket", 6) == 0 ||
                    memcmp(shname, "cgroup/", 7) == 0 ||
                    memcmp(shname, "sockops", 7) == 0 ||
-                   memcmp(shname, "sk_skb", 6) == 0) {
+                   memcmp(shname, "sk_skb", 6) == 0 ||
+                   memcmp(shname, "sk_msg", 6) == 0) {
                        ret = load_and_attach(shname, data->d_buf,
                                              data->d_size);
                        if (ret != 0)
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644 (file)
index 0000000..68c84da
--- /dev/null
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ *   WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU                        8
+#define MAX_PSTATE_ENTRIES     5
+#define MAX_CSTATE_ENTRIES     3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp         |
+ * +--------------------------+
+ * | cstate index             |
+ * +--------------------------+
+ * | pstate timestamp         |
+ * +--------------------------+
+ * | pstate index             |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME    0
+#define MAP_OFF_CSTATE_IDX     1
+#define MAP_OFF_PSTATE_TIME    2
+#define MAP_OFF_PSTATE_IDX     3
+#define MAP_OFF_NUM            4
+
+struct bpf_map_def SEC("maps") my_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u64),
+       .max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u64),
+       .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(u64),
+       .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+       u64 pad;
+       u32 state;
+       u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+       u32 i;
+
+       for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+               if (frequency == cpu_opps[i])
+                       return i;
+       }
+
+       return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+       u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+       u32 key, cpu, pstate_idx;
+       u64 *val;
+
+       if (ctx->cpu_id > MAX_CPU)
+               return 0;
+
+       cpu = ctx->cpu_id;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+       cts = bpf_map_lookup_elem(&my_map, &key);
+       if (!cts)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+       cstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!cstate)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+       pts = bpf_map_lookup_elem(&my_map, &key);
+       if (!pts)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+       pstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!pstate)
+               return 0;
+
+       prev_state = *cstate;
+       *cstate = ctx->state;
+
+       if (!*cts) {
+               *cts = bpf_ktime_get_ns();
+               return 0;
+       }
+
+       cur_ts = bpf_ktime_get_ns();
+       delta = cur_ts - *cts;
+       *cts = cur_ts;
+
+       /*
+        * When state doesn't equal to (u32)-1, the cpu will enter
+        * one idle state; for this case we need to record interval
+        * for the pstate.
+        *
+        *                 OPP2
+        *            +---------------------+
+        *     OPP1   |                     |
+        *   ---------+                     |
+        *                                  |  Idle state
+        *                                  +---------------
+        *
+        *            |<- pstate duration ->|
+        *            ^                     ^
+        *           pts                  cur_ts
+        */
+       if (ctx->state != (u32)-1) {
+
+               /* record pstate after have first cpu_frequency event */
+               if (!*pts)
+                       return 0;
+
+               delta = cur_ts - *pts;
+
+               pstate_idx = find_cpu_pstate_idx(*pstate);
+               if (pstate_idx >= MAX_PSTATE_ENTRIES)
+                       return 0;
+
+               key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+               val = bpf_map_lookup_elem(&pstate_duration, &key);
+               if (val)
+                       __sync_fetch_and_add((long *)val, delta);
+
+       /*
+        * When state equal to (u32)-1, the cpu just exits from one
+        * specific idle state; for this case we need to record
+        * interval for the pstate.
+        *
+        *       OPP2
+        *   -----------+
+        *              |                          OPP1
+        *              |                     +-----------
+        *              |     Idle state      |
+        *              +---------------------+
+        *
+        *              |<- cstate duration ->|
+        *              ^                     ^
+        *             cts                  cur_ts
+        */
+       } else {
+
+               key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+               val = bpf_map_lookup_elem(&cstate_duration, &key);
+               if (val)
+                       __sync_fetch_and_add((long *)val, delta);
+       }
+
+       /* Update timestamp for pstate as new start time */
+       if (*pts)
+               *pts = cur_ts;
+
+       return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+       u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+       u32 key, cpu, pstate_idx;
+       u64 *val;
+
+       cpu = ctx->cpu_id;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+       pts = bpf_map_lookup_elem(&my_map, &key);
+       if (!pts)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+       pstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!pstate)
+               return 0;
+
+       key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+       cstate = bpf_map_lookup_elem(&my_map, &key);
+       if (!cstate)
+               return 0;
+
+       prev_state = *pstate;
+       *pstate = ctx->state;
+
+       if (!*pts) {
+               *pts = bpf_ktime_get_ns();
+               return 0;
+       }
+
+       cur_ts = bpf_ktime_get_ns();
+       delta = cur_ts - *pts;
+       *pts = cur_ts;
+
+       /* When CPU is in idle, bail out to skip pstate statistics */
+       if (*cstate != (u32)(-1))
+               return 0;
+
+       /*
+        * The cpu changes to another different OPP (in below diagram
+        * change frequency from OPP3 to OPP1), need recording interval
+        * for previous frequency OPP3 and update timestamp as start
+        * time for new frequency OPP1.
+        *
+        *                 OPP3
+        *            +---------------------+
+        *     OPP2   |                     |
+        *   ---------+                     |
+        *                                  |    OPP1
+        *                                  +---------------
+        *
+        *            |<- pstate duration ->|
+        *            ^                     ^
+        *           pts                  cur_ts
+        */
+       pstate_idx = find_cpu_pstate_idx(*pstate);
+       if (pstate_idx >= MAX_PSTATE_ENTRIES)
+               return 0;
+
+       key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+       val = bpf_map_lookup_elem(&pstate_duration, &key);
+       if (val)
+               __sync_fetch_and_add((long *)val, delta);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644 (file)
index 0000000..2b4cd1a
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU                        8
+#define MAX_PSTATE_ENTRIES     5
+#define MAX_CSTATE_ENTRIES     3
+#define MAX_STARS              40
+
+#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ    "208000"
+#define CPUFREQ_HIGHEST_FREQ   "12000000"
+
+struct cpu_stat_data {
+       unsigned long cstate[MAX_CSTATE_ENTRIES];
+       unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+       int i, j;
+       char state_str[sizeof("cstate-9")];
+       struct cpu_stat_data *data;
+
+       /* Clear screen */
+       printf("\033[2J");
+
+       /* Header */
+       printf("\nCPU states statistics:\n");
+       printf("%-10s ", "state(ms)");
+
+       for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+               sprintf(state_str, "cstate-%d", i);
+               printf("%-11s ", state_str);
+       }
+
+       for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+               sprintf(state_str, "pstate-%d", i);
+               printf("%-11s ", state_str);
+       }
+
+       printf("\n");
+
+       for (j = 0; j < MAX_CPU; j++) {
+               data = &stat_data[j];
+
+               printf("CPU-%-6d ", j);
+               for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+                       printf("%-11ld ", data->cstate[i] / 1000000);
+
+               for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+                       printf("%-11ld ", data->pstate[i] / 1000000);
+
+               printf("\n");
+       }
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+       unsigned long key, value;
+       int c, i;
+
+       for (c = 0; c < MAX_CPU; c++) {
+               for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+                       key = c * MAX_CSTATE_ENTRIES + i;
+                       bpf_map_lookup_elem(cstate_fd, &key, &value);
+                       stat_data[c].cstate[i] = value;
+               }
+
+               for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+                       key = c * MAX_PSTATE_ENTRIES + i;
+                       bpf_map_lookup_elem(pstate_fd, &key, &value);
+                       stat_data[c].pstate[i] = value;
+               }
+       }
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+       int rcpu, i, ret;
+       cpu_set_t cpumask;
+       cpu_set_t original_cpumask;
+
+       ret = sysconf(_SC_NPROCESSORS_CONF);
+       if (ret < 0)
+               return -1;
+
+       rcpu = sched_getcpu();
+       if (rcpu < 0)
+               return -1;
+
+       /* Keep track of the CPUs we will run on */
+       sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+       for (i = 0; i < ret; i++) {
+
+               /* Pointless to wake up ourself */
+               if (i == rcpu)
+                       continue;
+
+               /* Pointless to wake CPUs we will not run on */
+               if (!CPU_ISSET(i, &original_cpumask))
+                       continue;
+
+               CPU_ZERO(&cpumask);
+               CPU_SET(i, &cpumask);
+
+               sched_setaffinity(0, sizeof(cpumask), &cpumask);
+       }
+
+       /* Enable all the CPUs of the original mask */
+       sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+       return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+       int len, fd;
+
+       fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+       if (fd < 0) {
+               printf("failed to open scaling_max_freq, errno=%d\n", errno);
+               return fd;
+       }
+
+       len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+       if (len < 0) {
+               printf("failed to open scaling_max_freq, errno=%d\n", errno);
+               goto err;
+       }
+
+       len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+       if (len < 0) {
+               printf("failed to open scaling_max_freq, errno=%d\n", errno);
+               goto err;
+       }
+
+err:
+       close(fd);
+       return len;
+}
+
+static void int_exit(int sig)
+{
+       cpu_stat_inject_cpu_idle_event();
+       cpu_stat_inject_cpu_frequency_event();
+       cpu_stat_update(map_fd[1], map_fd[2]);
+       cpu_stat_print();
+       exit(0);
+}
+
+int main(int argc, char **argv)
+{
+       char filename[256];
+       int ret;
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 1;
+       }
+
+       ret = cpu_stat_inject_cpu_idle_event();
+       if (ret < 0)
+               return 1;
+
+       ret = cpu_stat_inject_cpu_frequency_event();
+       if (ret < 0)
+               return 1;
+
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+
+       while (1) {
+               cpu_stat_update(map_fd[1], map_fd[2]);
+               cpu_stat_print();
+               sleep(5);
+       }
+
+       return 0;
+}
index efdc16d195ff6e889f66104c60003a9801843831..9a8db7bd6db4899591835ebfb4dffc32b1bb876b 100644 (file)
@@ -52,7 +52,8 @@ int _gre_set_tunnel(struct __sk_buff *skb)
        key.tunnel_tos = 0;
        key.tunnel_ttl = 64;
 
-       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+                                    BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
@@ -92,7 +93,8 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
        key.tunnel_label = 0xabcde;
 
        ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
-                                    BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
+                                    BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+                                    BPF_F_SEQ_NUMBER);
        if (ret < 0) {
                ERROR(ret);
                return TC_ACT_SHOT;
index 8ee0371a100af1f82741ff09e49f29b950ccd074..9f617423685618fab136aaf41743578f1d4f2bd2 100755 (executable)
@@ -61,6 +61,7 @@ cleanup_and_exit()
 
        [ -n "$msg" ] && echo "ERROR: $msg"
 
+       test_cgrp2_sock -d ${CGRP_MNT}/sockopts
        ip li del cgrp2_sock
        umount ${CGRP_MNT}
 
index fc4e64d00cb3063a065cc07ec6c02c7184d19626..0f396a86e0cbebd68e9ddf2d5c8557ce40028f38 100755 (executable)
@@ -28,6 +28,9 @@ function attach_bpf {
 }
 
 function cleanup {
+       if [ -d /tmp/cgroupv2/foo ]; then
+               test_cgrp2_sock -d /tmp/cgroupv2/foo
+       fi
        ip link del veth0b
        ip netns delete at_ns0
        umount /tmp/cgroupv2
index 43ce049996eeb5b4d81394705861a3f11715d3b4..c265863ccdf924df375eab36230292df47d36f42 100755 (executable)
@@ -23,7 +23,8 @@ function config_device {
 function add_gre_tunnel {
        # in namespace
        ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+               local 172.16.1.100 remote 172.16.1.200
        ip netns exec at_ns0 ip link set dev $DEV_NS up
        ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
 
@@ -43,7 +44,7 @@ function add_ip6gretap_tunnel {
 
        # in namespace
        ip netns exec at_ns0 \
-               ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
+               ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
                local ::11 remote ::22
 
        ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
index a77a583d94d42ca375a7519a066ff4f648e05226..7068fbdde951f1ad7b64ff6ba28ff50c382a14bf 100644 (file)
@@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
 {
        char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
        char time_fmt2[] = "Get Time Failed, ErrCode: %d";
+       char addr_fmt[] = "Address recorded on event: %llx";
        char fmt[] = "CPU-%d period %lld ip %llx";
        u32 cpu = bpf_get_smp_processor_id();
        struct bpf_perf_event_value value_buf;
@@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
        else
          bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
 
+       if (ctx->addr != 0)
+         bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
+
        val = bpf_map_lookup_elem(&counts, &key);
        if (val)
                (*val)++;
index bf4f1b6d9a52e0d33e17189be30900f90defd266..56f7a259a7c92500c41a850b45ff66ab63e47158 100644 (file)
@@ -215,6 +215,17 @@ static void test_bpf_perf_event(void)
                /* Intel Instruction Retired */
                .config = 0xc0,
        };
+       struct perf_event_attr attr_type_raw_lock_load = {
+               .sample_freq = SAMPLE_FREQ,
+               .freq = 1,
+               .type = PERF_TYPE_RAW,
+               /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
+               .config = 0x21d0,
+               /* Request to record lock address from PEBS */
+               .sample_type = PERF_SAMPLE_ADDR,
+               /* Record address value requires precise event */
+               .precise_ip = 2,
+       };
 
        printf("Test HW_CPU_CYCLES\n");
        test_perf_event_all_cpu(&attr_type_hw);
@@ -236,6 +247,10 @@ static void test_bpf_perf_event(void)
        test_perf_event_all_cpu(&attr_type_raw);
        test_perf_event_task(&attr_type_raw);
 
+       printf("Test Lock Load\n");
+       test_perf_event_all_cpu(&attr_type_raw_lock_load);
+       test_perf_event_task(&attr_type_raw_lock_load);
+
        printf("*** PASS ***\n");
 }
 
index d54e91eb6cbf38b55c503190e90dee4a54f1e993..b701b5c21342a78105fdf7e1e5beac6a049788c0 100644 (file)
@@ -20,6 +20,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
+#include <sys/resource.h>
 
 #include "bpf_load.h"
 #include "bpf_util.h"
@@ -75,6 +76,7 @@ static void usage(const char *prog)
 
 int main(int argc, char **argv)
 {
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        const char *optstr = "SN";
        char filename[256];
        int ret, opt, key = 0;
@@ -98,6 +100,11 @@ int main(int argc, char **argv)
                return 1;
        }
 
+       if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+               perror("setrlimit(RLIMIT_MEMLOCK)");
+               return 1;
+       }
+
        ifindex_in = strtoul(argv[optind], NULL, 0);
        ifindex_out = strtoul(argv[optind + 1], NULL, 0);
        printf("input: %d output: %d\n", ifindex_in, ifindex_out);
index 0e349b80686e76a421759b931ec04d194e446768..ba942e3ead890de9f4632d25a5fc1fd000676296 100644 (file)
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
 hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct
 
 HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
@@ -16,7 +17,6 @@ HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
 bpf-direct-objs := bpf-direct.o
 
 # Try to match the kernel target.
-ifndef CROSS_COMPILE
 ifndef CONFIG_64BIT
 
 # s390 has -m31 flag to build 31 bit binaries
@@ -35,12 +35,4 @@ HOSTLOADLIBES_bpf-fancy += $(MFLAG)
 HOSTLOADLIBES_dropper += $(MFLAG)
 endif
 always := $(hostprogs-m)
-else
-# MIPS system calls are defined based on the -mabi that is passed
-# to the toolchain which may or may not be a valid option
-# for the host toolchain. So disable tests if target architecture
-# is MIPS but the host isn't.
-ifndef CONFIG_MIPS
-always := $(hostprogs-m)
-endif
 endif
index 73f1da4d116cf9a78a01cb3293d11f8a27a589cc..9bf2881bd11b44d777554994e308e36f64ae2556 100644 (file)
@@ -2,7 +2,7 @@
 hostprogs-y := sockmap
 
 # Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
index 52b0053274f425a6a07eb70dfbce89ca9c32d4fb..9ad5ba79c85ae1a16710a337e32d4aaf08bfe776 100644 (file)
@@ -43,6 +43,42 @@ struct bpf_map_def SEC("maps") sock_map = {
        .max_entries = 20,
 };
 
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+       .type = BPF_MAP_TYPE_SOCKMAP,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+       .type = BPF_MAP_TYPE_SOCKMAP,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 2
+};
+
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -105,4 +141,165 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 
        return 0;
 }
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+       int *bytes, zero = 0, one = 1;
+       int *start, *end;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               bpf_msg_cork_bytes(msg, *bytes);
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end)
+               bpf_msg_pull_data(msg, *start, *end, 0);
+       return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+       int err1 = -1, err2 = -1, zero = 0, one = 1;
+       int *bytes, *start, *end, len1, len2;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               err1 = bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               err2 = bpf_msg_cork_bytes(msg, *bytes);
+       len1 = (__u64)msg->data_end - (__u64)msg->data;
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end) {
+               int err;
+
+               bpf_printk("sk_msg2: pull(%i:%i)\n",
+                          start ? *start : 0, end ? *end : 0);
+               err = bpf_msg_pull_data(msg, *start, *end, 0);
+               if (err)
+                       bpf_printk("sk_msg2: pull_data err %i\n",
+                                  err);
+               len2 = (__u64)msg->data_end - (__u64)msg->data;
+               bpf_printk("sk_msg2: length update %i->%i\n",
+                          len1, len2);
+       }
+       bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+                  len1, err1, err2);
+       return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+       int *bytes, zero = 0, one = 1;
+       int *start, *end;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               bpf_msg_cork_bytes(msg, *bytes);
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end)
+               bpf_msg_pull_data(msg, *start, *end, 0);
+       return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+       int err1 = 0, err2 = 0, zero = 0, one = 1;
+       int *bytes, *start, *end, len1, len2;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               err1 = bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               err2 = bpf_msg_cork_bytes(msg, *bytes);
+       len1 = (__u64)msg->data_end - (__u64)msg->data;
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end) {
+               int err;
+
+               bpf_printk("sk_msg2: pull(%i:%i)\n",
+                          start ? *start : 0, end ? *end : 0);
+               err = bpf_msg_pull_data(msg, *start, *end, 0);
+               if (err)
+                       bpf_printk("sk_msg2: pull_data err %i\n",
+                                  err);
+               len2 = (__u64)msg->data_end - (__u64)msg->data;
+               bpf_printk("sk_msg2: length update %i->%i\n",
+                          len1, len2);
+       }
+       bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n",
+                  len1, err1, err2);
+       return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+       void *data_end = (void *)(long) msg->data_end;
+       void *data = (void *)(long) msg->data;
+       int ret = 0, *bytes, zero = 0;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes) {
+               ret = bpf_msg_apply_bytes(msg, *bytes);
+               if (ret)
+                       return SK_DROP;
+       } else {
+               return SK_DROP;
+       }
+       return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+       void *data_end = (void *)(long) msg->data_end;
+       void *data = (void *)(long) msg->data;
+       int ret = 0, *bytes, zero = 0;
+
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes) {
+               if (((__u64)data_end - (__u64)data) >= *bytes)
+                       return SK_PASS;
+               ret = bpf_msg_cork_bytes(msg, *bytes);
+               if (ret)
+                       return SK_DROP;
+       }
+       return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+       int *bytes, zero = 0, one = 1;
+       int *start, *end;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               bpf_msg_cork_bytes(msg, *bytes);
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end)
+               bpf_msg_pull_data(msg, *start, *end, 0);
+
+       return SK_DROP;
+}
+
+
 char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
new file mode 100755 (executable)
index 0000000..6d8cc40
--- /dev/null
@@ -0,0 +1,450 @@
+#Test a bunch of positive cases to verify basic functionality
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+       for i in 1 10 100; do
+               for l in 1 10 100; do
+                       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+                       echo $TEST
+                       $TEST
+                       sleep 2
+               done
+       done
+done
+done
+done
+
+#Test max iov
+t="sendmsg"
+r=1
+i=1024
+l=1
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+
+# Test max iov with 1k send
+
+t="sendmsg"
+r=1
+i=1024
+l=1024
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+
+# Test apply with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test apply with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test apply with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test apply and redirect with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test apply and redirect with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test apply and redirect with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test cork with 1B not really useful but test it anyways
+r=1
+i=1024
+l=1024
+prog="--txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_redir --txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+
+# mix and match cork and apply not really useful but valid programs
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+# Tests for bpf_msg_pull_data()
+for i in `seq 99 100 1600`; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+               --txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+for i in `seq 199 100 1600`; do
+       TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+               --txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
+       echo $TEST
+       $TEST
+       sleep 2
+done
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+       --txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+       --txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+       --txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+       --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+       --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
+echo $TEST
+$TEST
+sleep 2
+
+# Run through gamut again with start and end
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+       for i in 1 10 100; do
+               for l in 1 10 100; do
+                       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
+                       echo $TEST
+                       $TEST
+                       sleep 2
+               done
+       done
+done
+done
+done
+
+# Some specific tests to cover specific code paths
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
index 7c25c0c112bc1578fef071cd31e235ec9eaa01bc..07aa237221d12c46bf3f07e5cb17cb7aebe9a4c3 100644 (file)
@@ -29,6 +29,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
+#include <sys/sendfile.h>
 
 #include <linux/netlink.h>
 #include <linux/socket.h>
@@ -54,6 +55,16 @@ void running_handler(int a);
 /* global sockets */
 int s1, s2, c1, c2, p1, p2;
 
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+
 static const struct option long_options[] = {
        {"help",        no_argument,            NULL, 'h' },
        {"cgroup",      required_argument,      NULL, 'c' },
@@ -62,6 +73,16 @@ static const struct option long_options[] = {
        {"iov_count",   required_argument,      NULL, 'i' },
        {"length",      required_argument,      NULL, 'l' },
        {"test",        required_argument,      NULL, 't' },
+       {"data_test",   no_argument,            NULL, 'd' },
+       {"txmsg",               no_argument,    &txmsg_pass,  1  },
+       {"txmsg_noisy",         no_argument,    &txmsg_noisy, 1  },
+       {"txmsg_redir",         no_argument,    &txmsg_redir, 1  },
+       {"txmsg_redir_noisy",   no_argument,    &txmsg_redir_noisy, 1},
+       {"txmsg_drop",          no_argument,    &txmsg_drop, 1 },
+       {"txmsg_apply", required_argument,      NULL, 'a'},
+       {"txmsg_cork",  required_argument,      NULL, 'k'},
+       {"txmsg_start", required_argument,      NULL, 's'},
+       {"txmsg_end",   required_argument,      NULL, 'e'},
        {0, 0, NULL, 0 }
 };
 
@@ -195,19 +216,71 @@ struct msg_stats {
        struct timespec end;
 };
 
+struct sockmap_options {
+       int verbose;
+       bool base;
+       bool sendpage;
+       bool data_test;
+       bool drop_expected;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+                            struct msg_stats *s,
+                            struct sockmap_options *opt)
+{
+       bool drop = opt->drop_expected;
+       unsigned char k = 0;
+       FILE *file;
+       int i, fp;
+
+       file = fopen(".sendpage_tst.tmp", "w+");
+       for (i = 0; i < iov_length * cnt; i++, k++)
+               fwrite(&k, sizeof(char), 1, file);
+       fflush(file);
+       fseek(file, 0, SEEK_SET);
+       fclose(file);
+
+       fp = open(".sendpage_tst.tmp", O_RDONLY);
+       clock_gettime(CLOCK_MONOTONIC, &s->start);
+       for (i = 0; i < cnt; i++) {
+               int sent = sendfile(fd, fp, NULL, iov_length);
+
+               if (!drop && sent < 0) {
+                       perror("send loop error:");
+                       close(fp);
+                       return sent;
+               } else if (drop && sent >= 0) {
+                       printf("sendpage loop error expected: %i\n", sent);
+                       close(fp);
+                       return -EIO;
+               }
+
+               if (sent > 0)
+                       s->bytes_sent += sent;
+       }
+       clock_gettime(CLOCK_MONOTONIC, &s->end);
+       close(fp);
+       return 0;
+}
+
 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
-                   struct msg_stats *s, bool tx)
+                   struct msg_stats *s, bool tx,
+                   struct sockmap_options *opt)
 {
        struct msghdr msg = {0};
        int err, i, flags = MSG_NOSIGNAL;
        struct iovec *iov;
+       unsigned char k;
+       bool data_test = opt->data_test;
+       bool drop = opt->drop_expected;
 
        iov = calloc(iov_count, sizeof(struct iovec));
        if (!iov)
                return errno;
 
+       k = 0;
        for (i = 0; i < iov_count; i++) {
-               char *d = calloc(iov_length, sizeof(char));
+               unsigned char *d = calloc(iov_length, sizeof(char));
 
                if (!d) {
                        fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
@@ -215,21 +288,34 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
                }
                iov[i].iov_base = d;
                iov[i].iov_len = iov_length;
+
+               if (data_test && tx) {
+                       int j;
+
+                       for (j = 0; j < iov_length; j++)
+                               d[j] = k++;
+               }
        }
 
        msg.msg_iov = iov;
        msg.msg_iovlen = iov_count;
+       k = 0;
 
        if (tx) {
                clock_gettime(CLOCK_MONOTONIC, &s->start);
                for (i = 0; i < cnt; i++) {
                        int sent = sendmsg(fd, &msg, flags);
 
-                       if (sent < 0) {
+                       if (!drop && sent < 0) {
                                perror("send loop error:");
                                goto out_errno;
+                       } else if (drop && sent >= 0) {
+                               printf("send loop error expected: %i\n", sent);
+                               errno = -EIO;
+                               goto out_errno;
                        }
-                       s->bytes_sent += sent;
+                       if (sent > 0)
+                               s->bytes_sent += sent;
                }
                clock_gettime(CLOCK_MONOTONIC, &s->end);
        } else {
@@ -272,6 +358,26 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
                        }
 
                        s->bytes_recvd += recv;
+
+                       if (data_test) {
+                               int j;
+
+                               for (i = 0; i < msg.msg_iovlen; i++) {
+                                       unsigned char *d = iov[i].iov_base;
+
+                                       for (j = 0;
+                                            j < iov[i].iov_len && recv; j++) {
+                                               if (d[j] != k++) {
+                                                       errno = -EIO;
+                                                       fprintf(stderr,
+                                                               "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+                                                               i, j, d[j], k - 1, d[j+1], k + 1);
+                                                       goto out_errno;
+                                               }
+                                               recv--;
+                                       }
+                               }
+                       }
                }
                clock_gettime(CLOCK_MONOTONIC, &s->end);
        }
@@ -300,7 +406,7 @@ static inline float recvdBps(struct msg_stats s)
 }
 
 static int sendmsg_test(int iov_count, int iov_buf, int cnt,
-                       int verbose, bool base)
+                       struct sockmap_options *opt)
 {
        float sent_Bps = 0, recvd_Bps = 0;
        int rx_fd, txpid, rxpid, err = 0;
@@ -309,14 +415,20 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
        errno = 0;
 
-       if (base)
+       if (opt->base)
                rx_fd = p1;
        else
                rx_fd = p2;
 
        rxpid = fork();
        if (rxpid == 0) {
-               err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
+               if (opt->drop_expected)
+                       exit(1);
+
+               if (opt->sendpage)
+                       iov_count = 1;
+               err = msg_loop(rx_fd, iov_count, iov_buf,
+                              cnt, &s, false, opt);
                if (err)
                        fprintf(stderr,
                                "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -339,7 +451,12 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
        txpid = fork();
        if (txpid == 0) {
-               err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
+               if (opt->sendpage)
+                       err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+               else
+                       err = msg_loop(c1, iov_count, iov_buf,
+                                      cnt, &s, true, opt);
+
                if (err)
                        fprintf(stderr,
                                "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -364,7 +481,7 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
        return err;
 }
 
-static int forever_ping_pong(int rate, int verbose)
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
 {
        struct timeval timeout;
        char buf[1024] = {0};
@@ -429,7 +546,7 @@ static int forever_ping_pong(int rate, int verbose)
                if (rate)
                        sleep(rate);
 
-               if (verbose) {
+               if (opt->verbose) {
                        printf(".");
                        fflush(stdout);
 
@@ -443,20 +560,34 @@ enum {
        PING_PONG,
        SENDMSG,
        BASE,
+       BASE_SENDPAGE,
+       SENDPAGE,
 };
 
 int main(int argc, char **argv)
 {
-       int iov_count = 1, length = 1024, rate = 1, verbose = 0;
+       int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
        struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
        int opt, longindex, err, cg_fd = 0;
+       struct sockmap_options options = {0};
        int test = PING_PONG;
        char filename[256];
 
-       while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:",
+       while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
                                  long_options, &longindex)) != -1) {
                switch (opt) {
-               /* Cgroup configuration */
+               case 's':
+                       txmsg_start = atoi(optarg);
+                       break;
+               case 'e':
+                       txmsg_end = atoi(optarg);
+                       break;
+               case 'a':
+                       txmsg_apply = atoi(optarg);
+                       break;
+               case 'k':
+                       txmsg_cork = atoi(optarg);
+                       break;
                case 'c':
                        cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
                        if (cg_fd < 0) {
@@ -470,7 +601,7 @@ int main(int argc, char **argv)
                        rate = atoi(optarg);
                        break;
                case 'v':
-                       verbose = 1;
+                       options.verbose = 1;
                        break;
                case 'i':
                        iov_count = atoi(optarg);
@@ -478,6 +609,9 @@ int main(int argc, char **argv)
                case 'l':
                        length = atoi(optarg);
                        break;
+               case 'd':
+                       options.data_test = true;
+                       break;
                case 't':
                        if (strcmp(optarg, "ping") == 0) {
                                test = PING_PONG;
@@ -485,11 +619,17 @@ int main(int argc, char **argv)
                                test = SENDMSG;
                        } else if (strcmp(optarg, "base") == 0) {
                                test = BASE;
+                       } else if (strcmp(optarg, "base_sendpage") == 0) {
+                               test = BASE_SENDPAGE;
+                       } else if (strcmp(optarg, "sendpage") == 0) {
+                               test = SENDPAGE;
                        } else {
                                usage(argv);
                                return -1;
                        }
                        break;
+               case 0:
+                       break;
                case 'h':
                default:
                        usage(argv);
@@ -515,16 +655,16 @@ int main(int argc, char **argv)
        /* catch SIGINT */
        signal(SIGINT, running_handler);
 
-       /* If base test skip BPF setup */
-       if (test == BASE)
-               goto run;
-
        if (load_bpf_file(filename)) {
                fprintf(stderr, "load_bpf_file: (%s) %s\n",
                        filename, strerror(errno));
                return 1;
        }
 
+       /* If base test skip BPF setup */
+       if (test == BASE || test == BASE_SENDPAGE)
+               goto run;
+
        /* Attach programs to sockmap */
        err = bpf_prog_attach(prog_fd[0], map_fd[0],
                                BPF_SK_SKB_STREAM_PARSER, 0);
@@ -557,15 +697,129 @@ int main(int argc, char **argv)
                goto out;
        }
 
-       if (test == PING_PONG)
-               err = forever_ping_pong(rate, verbose);
-       else if (test == SENDMSG)
-               err = sendmsg_test(iov_count, length, rate, verbose, false);
-       else if (test == BASE)
-               err = sendmsg_test(iov_count, length, rate, verbose, true);
+       /* Attach txmsg program to sockmap */
+       if (txmsg_pass)
+               tx_prog_fd = prog_fd[3];
+       else if (txmsg_noisy)
+               tx_prog_fd = prog_fd[4];
+       else if (txmsg_redir)
+               tx_prog_fd = prog_fd[5];
+       else if (txmsg_redir_noisy)
+               tx_prog_fd = prog_fd[6];
+       else if (txmsg_drop)
+               tx_prog_fd = prog_fd[9];
+       /* apply and cork must be last */
+       else if (txmsg_apply)
+               tx_prog_fd = prog_fd[7];
+       else if (txmsg_cork)
+               tx_prog_fd = prog_fd[8];
        else
+               tx_prog_fd = 0;
+
+       if (tx_prog_fd) {
+               int redir_fd, i = 0;
+
+               err = bpf_prog_attach(tx_prog_fd,
+                                     map_fd[1], BPF_SK_MSG_VERDICT, 0);
+               if (err) {
+                       fprintf(stderr,
+                               "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+                               err, strerror(errno));
+                       return err;
+               }
+
+               err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+               if (err) {
+                       fprintf(stderr,
+                               "ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+                               err, strerror(errno));
+                       return err;
+               }
+
+               if (txmsg_redir || txmsg_redir_noisy)
+                       redir_fd = c2;
+               else
+                       redir_fd = c1;
+
+               err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+               if (err) {
+                       fprintf(stderr,
+                               "ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+                               err, strerror(errno));
+                       return err;
+               }
+
+               if (txmsg_apply) {
+                       err = bpf_map_update_elem(map_fd[3],
+                                                 &i, &txmsg_apply, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
+                                       err, strerror(errno));
+                               return err;
+                       }
+               }
+
+               if (txmsg_cork) {
+                       err = bpf_map_update_elem(map_fd[4],
+                                                 &i, &txmsg_cork, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
+                                       err, strerror(errno));
+                               return err;
+                       }
+               }
+
+               if (txmsg_start) {
+                       err = bpf_map_update_elem(map_fd[5],
+                                                 &i, &txmsg_start, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
+                                       err, strerror(errno));
+                               return err;
+                       }
+               }
+
+               if (txmsg_end) {
+                       i = 1;
+                       err = bpf_map_update_elem(map_fd[5],
+                                                 &i, &txmsg_end, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
+                                       err, strerror(errno));
+                               return err;
+                       }
+               }
+       }
+
+       if (txmsg_drop)
+               options.drop_expected = true;
+
+       if (test == PING_PONG)
+               err = forever_ping_pong(rate, &options);
+       else if (test == SENDMSG) {
+               options.base = false;
+               options.sendpage = false;
+               err = sendmsg_test(iov_count, length, rate, &options);
+       } else if (test == SENDPAGE) {
+               options.base = false;
+               options.sendpage = true;
+               err = sendmsg_test(iov_count, length, rate, &options);
+       } else if (test == BASE) {
+               options.base = true;
+               options.sendpage = false;
+               err = sendmsg_test(iov_count, length, rate, &options);
+       } else if (test == BASE_SENDPAGE) {
+               options.base = true;
+               options.sendpage = true;
+               err = sendmsg_test(iov_count, length, rate, &options);
+       } else
                fprintf(stderr, "unknown test\n");
 out:
+       bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
        close(s1);
        close(s2);
        close(p1);
index 47cddf32aeba025f2741dbc7f48d596f4f6653fa..4f2b25d43ec9b46923e52ae5d6f2bf789a6465b4 100644 (file)
@@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool
 
 objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
 
+objtool_args += $(if $(part-of-module), --module,)
+
 ifndef CONFIG_FRAME_POINTER
 objtool_args += --no-fp
 endif
@@ -264,6 +266,12 @@ objtool_args += --no-unreachable
 else
 objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
 endif
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_CFLAGS),)
+  objtool_args += --retpoline
+endif
+endif
+
 
 ifdef CONFIG_MODVERSIONS
 objtool_o = $(@D)/.tmp_$(@F)
index 5589bae34af6299ca26bc0ae7760dc1586949430..a6f538b31ad6c381d28f348707831a6dfd45a872 100644 (file)
@@ -297,11 +297,11 @@ cmd_dt_S_dtb=                                             \
        echo '\#include <asm-generic/vmlinux.lds.h>';   \
        echo '.section .dtb.init.rodata,"a"';           \
        echo '.balign STRUCT_ALIGNMENT';                \
-       echo '.global __dtb_$(*F)_begin';               \
-       echo '__dtb_$(*F)_begin:';                      \
+       echo '.global __dtb_$(subst -,_,$(*F))_begin';  \
+       echo '__dtb_$(subst -,_,$(*F))_begin:';         \
        echo '.incbin "$<" ';                           \
-       echo '__dtb_$(*F)_end:';                        \
-       echo '.global __dtb_$(*F)_end';                 \
+       echo '__dtb_$(subst -,_,$(*F))_end:';           \
+       echo '.global __dtb_$(subst -,_,$(*F))_end';    \
        echo '.balign STRUCT_ALIGNMENT';                \
 ) > $@
 
index fa3d39b6f23bbc0c3ea412b2e05584cddc8482a2..449b68c4c90cbecc6ee76b777e4fde603f3694b7 100644 (file)
  * (Note: it'd be easy to port over the complete mkdep state machine,
  *  but I don't think the added complexity is worth it)
  */
-/*
- * Note 2: if somebody writes HELLO_CONFIG_BOOM in a file, it will depend onto
- * CONFIG_BOOM. This could seem a bug (not too hard to fix), but please do not
- * fix it! Some UserModeLinux files (look at arch/um/) call CONFIG_BOOM as
- * UML_CONFIG_BOOM, to avoid conflicts with /usr/include/linux/autoconf.h,
- * through arch/um/include/uml-config.h; this fixdep "bug" makes sure that
- * those files will have correct dependencies.
- */
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -233,8 +225,13 @@ static int str_ends_with(const char *s, int slen, const char *sub)
 static void parse_config_file(const char *p)
 {
        const char *q, *r;
+       const char *start = p;
 
        while ((p = strstr(p, "CONFIG_"))) {
+               if (p > start && (isalnum(p[-1]) || p[-1] == '_')) {
+                       p += 7;
+                       continue;
+               }
                p += 7;
                q = p;
                while (*q && (isalnum(*q) || *q == '_'))
@@ -286,8 +283,6 @@ static int is_ignored_file(const char *s, int len)
 {
        return str_ends_with(s, len, "include/generated/autoconf.h") ||
               str_ends_with(s, len, "include/generated/autoksyms.h") ||
-              str_ends_with(s, len, "arch/um/include/uml-config.h") ||
-              str_ends_with(s, len, "include/linux/kconfig.h") ||
               str_ends_with(s, len, ".ver");
 }
 
index 94b664817ad91e2e48c8fef6361a20ab2a632763..d84a5674e95e3aeeb77a445db46096b3bdf31e3d 100755 (executable)
@@ -15,7 +15,7 @@ signal(SIGPIPE, SIG_DFL)
 if len(sys.argv) < 3:
     sys.stderr.write("usage: %s [option] file1 file2\n" % sys.argv[0])
     sys.stderr.write("The options are:\n")
-    sys.stderr.write("-c       cateogrize output based on symbole type\n")
+    sys.stderr.write("-c       categorize output based on symbol type\n")
     sys.stderr.write("-d       Show delta of Data Section\n")
     sys.stderr.write("-t       Show delta of text Section\n")
     sys.exit(-1)
index 1249b727644b742db1c215eda9691e3d5e69d877..8fd6437beda80778554cd43c47d3e120e0f0c4d2 100644 (file)
@@ -56,10 +56,10 @@ statement S;
 p << r.p;
 @@
 
-coccilib.org.print_todo(p[0], "WARNING opportunity for kmemdep")
+coccilib.org.print_todo(p[0], "WARNING opportunity for kmemdup")
 
 @script:python depends on report@
 p << r.p;
 @@
 
-coccilib.report.print_report(p[0], "WARNING opportunity for kmemdep")
+coccilib.report.print_report(p[0], "WARNING opportunity for kmemdup")
index 9ee9bf7fd1a2113bfb869cf53b6d26d2f0852455..65792650c63057f0ed7edf7223dad30c06ac2391 100644 (file)
@@ -595,7 +595,7 @@ static void optimize_result(void)
                 * original char code */
                if (!best_table_len[i]) {
 
-                       /* find the token with the breates profit value */
+                       /* find the token with the best profit value */
                        best = find_best_token();
                        if (token_profit[best] == 0)
                                break;
index 5c12dc91ef348ea5a64a31edeb98657cc93512b4..df26c7b0fe13b611087ad8fe73fb7b1ae2257971 100644 (file)
@@ -178,7 +178,7 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p)
        case S_HEX:
        done:
                if (sym_string_valid(sym, p)) {
-                       sym->def[def].val = strdup(p);
+                       sym->def[def].val = xstrdup(p);
                        sym->flags |= def_flags;
                } else {
                        if (def != S_DEF_AUTO)
index 2858738b22d5aeac27154f74af433540839d37a6..240880a89111df06e95da0b44b08fa8aa184fa58 100644 (file)
@@ -101,7 +101,7 @@ static struct message *message__new(const char *msg, char *option,
        if (self->files == NULL)
                goto out_fail;
 
-       self->msg = strdup(msg);
+       self->msg = xstrdup(msg);
        if (self->msg == NULL)
                goto out_fail_msg;
 
index 4e23febbe4b2836451ab91327c634b268555bc53..2d5ec2d0e95293c8293adacb229dc2ecb5e6f170 100644 (file)
@@ -115,6 +115,7 @@ int file_write_dep(const char *name);
 void *xmalloc(size_t size);
 void *xcalloc(size_t nmemb, size_t size);
 void *xrealloc(void *p, size_t size);
+char *xstrdup(const char *s);
 
 struct gstr {
        size_t len;
index a10bd9d6fafd003a57aeb2baf24d4bc6dd04c982..6c0bcd9c472d6bee0b9fdadab18e9d46e5b05cdb 100755 (executable)
@@ -55,7 +55,8 @@ EOF
            echo " *** required header files."                            1>&2
            echo " *** 'make menuconfig' requires the ncurses libraries." 1>&2
            echo " *** "                                                  1>&2
-           echo " *** Install ncurses (ncurses-devel) and try again."    1>&2
+           echo " *** Install ncurses (ncurses-devel or libncurses-dev " 1>&2
+           echo " *** depending on your distribution) and try again."    1>&2
            echo " *** "                                                  1>&2
            exit 1
        fi
index 99222855544c3a7c2f67b3c33b3796eb36e5ff80..36cd3e1f1c28895b1a64740d8bcac17454197d70 100644 (file)
@@ -212,6 +212,7 @@ void menu_add_option(int token, char *arg)
                        sym_defconfig_list = current_entry->sym;
                else if (sym_defconfig_list != current_entry->sym)
                        zconf_error("trying to redefine defconfig symbol");
+               sym_defconfig_list->flags |= SYMBOL_AUTO;
                break;
        case T_OPT_ENV:
                prop_add_env(arg);
index cca9663be5ddd918703d534ef95368ccc44db322..2220bc4b051bd914e34bd20beb351b98da5ecc86 100644 (file)
@@ -183,7 +183,7 @@ static void sym_validate_range(struct symbol *sym)
                sprintf(str, "%lld", val2);
        else
                sprintf(str, "0x%llx", val2);
-       sym->curr.val = strdup(str);
+       sym->curr.val = xstrdup(str);
 }
 
 static void sym_set_changed(struct symbol *sym)
@@ -849,7 +849,7 @@ struct symbol *sym_lookup(const char *name, int flags)
                                   : !(symbol->flags & (SYMBOL_CONST|SYMBOL_CHOICE))))
                                return symbol;
                }
-               new_name = strdup(name);
+               new_name = xstrdup(name);
        } else {
                new_name = NULL;
                hash = 0;
index b98a79e30e04a4017bd33264e85b2c40e46bc3af..c6f6e21b809ffe7a6f60acd2a7f016ee88971d5c 100644 (file)
@@ -154,3 +154,14 @@ void *xrealloc(void *p, size_t size)
        fprintf(stderr, "Out of memory.\n");
        exit(1);
 }
+
+char *xstrdup(const char *s)
+{
+       char *p;
+
+       p = strdup(s);
+       if (p)
+               return p;
+       fprintf(stderr, "Out of memory.\n");
+       exit(1);
+}
index 02de6fe302a9aec4fc747e5728f5d59f48946174..88b650eb9cc9141233d5da9be8702b9069de65e6 100644 (file)
@@ -332,16 +332,12 @@ void zconf_nextfile(const char *name)
                                "Inclusion path:\n  current file : '%s'\n",
                                zconf_curname(), zconf_lineno(),
                                zconf_curname());
-                       iter = current_file->parent;
-                       while (iter && \
-                              strcmp(iter->name,current_file->name)) {
-                               fprintf(stderr, "  included from: '%s:%d'\n",
-                                       iter->name, iter->lineno-1);
+                       iter = current_file;
+                       do {
                                iter = iter->parent;
-                       }
-                       if (iter)
                                fprintf(stderr, "  included from: '%s:%d'\n",
-                                       iter->name, iter->lineno+1);
+                                       iter->name, iter->lineno - 1);
+                       } while (strcmp(iter->name, current_file->name));
                        exit(1);
                }
        }
index 4be98050b961fe73df6bf6516b1e5a7f8135d149..ad6305b0f40cb962edf922ae73639c75b63c60c0 100644 (file)
@@ -127,7 +127,7 @@ no_mainmenu_stmt: /* empty */
         * later regardless of whether it comes from the 'prompt' in
         * mainmenu_stmt or here
         */
-       menu_add_prompt(P_MENU, strdup("Linux Kernel Configuration"), NULL);
+       menu_add_prompt(P_MENU, xstrdup("Linux Kernel Configuration"), NULL);
 };
 
 
@@ -276,6 +276,7 @@ choice: T_CHOICE word_opt T_EOL
        sym->flags |= SYMBOL_AUTO;
        menu_add_entry(sym);
        menu_add_expr(P_CHOICE, NULL, NULL);
+       free($2);
        printd(DEBUG_PARSE, "%s:%d:choice\n", zconf_curname(), zconf_lineno());
 };
 
index c0d129d7f4304abfac7f2e7d699000df02678f73..be56a1153014af54af0db2bf23c92306c4a8b4fb 100755 (executable)
@@ -246,7 +246,7 @@ else
 fi;
 
 # final build of init/
-${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init GCC_PLUGINS_CFLAGS="${GCC_PLUGINS_CFLAGS}"
+${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
 
 archive_builtin
 
index 8644d864e3c196ca3dc8a10cb84c2daa97e3a596..b4d7b6242a404476b959948fbefa4a7e8c95cd24 100644 (file)
@@ -6743,6 +6743,7 @@ static void __net_exit selinux_nf_unregister(struct net *net)
 static struct pernet_operations selinux_net_ops = {
        .init = selinux_nf_register,
        .exit = selinux_nf_unregister,
+       .async = true,
 };
 
 static int __init selinux_nf_ip_init(void)
index e36d17835d4ff3dffff8bb42ed3e50bb7e1af571..3f29c03162ca571c7b1dcf90b15be21dc28fa43c 100644 (file)
@@ -89,6 +89,7 @@ static void __net_exit smack_nf_unregister(struct net *net)
 static struct pernet_operations smack_net_ops = {
        .init = smack_nf_register,
        .exit = smack_nf_unregister,
+       .async = true,
 };
 
 static int __init smack_nf_ip_init(void)
index 0b3026d937b101918581edab80aa3b2803ecec5a..8a77620a38548ef8ad36d5b9bf154a14a785f568 100644 (file)
@@ -889,7 +889,7 @@ static int snd_ctl_elem_read(struct snd_card *card,
 
        index_offset = snd_ctl_get_ioff(kctl, &control->id);
        vd = &kctl->vd[index_offset];
-       if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) && kctl->get == NULL)
+       if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL)
                return -EPERM;
 
        snd_ctl_build_ioff(&control->id, kctl, index_offset);
index b044c0a5a674b116e9441c13558deb0c0a4e70f9..02298c9c602046b56406b7da87d0c86ff89f2a24 100644 (file)
@@ -1762,10 +1762,9 @@ static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file)
                return -ENOMEM;
        _snd_pcm_hw_params_any(params);
        err = snd_pcm_hw_refine(substream, params);
-       format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT);
-       kfree(params);
        if (err < 0)
-               return err;
+               goto error;
+       format_mask = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT);
        for (fmt = 0; fmt < 32; ++fmt) {
                if (snd_mask_test(format_mask, fmt)) {
                        int f = snd_pcm_oss_format_to(fmt);
@@ -1773,7 +1772,10 @@ static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file)
                                formats |= f;
                }
        }
-       return formats;
+
+ error:
+       kfree(params);
+       return err < 0 ? err : formats;
 }
 
 static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int format)
index 04d4db44fae5c9199754aa80066ae0c1220a41d5..61a07fe34cd271e60dc0c31a7dddae750c2532b1 100644 (file)
@@ -255,12 +255,12 @@ static int seq_free_client1(struct snd_seq_client *client)
 
        if (!client)
                return 0;
-       snd_seq_delete_all_ports(client);
-       snd_seq_queue_client_leave(client->number);
        spin_lock_irqsave(&clients_lock, flags);
        clienttablock[client->number] = 1;
        clienttab[client->number] = NULL;
        spin_unlock_irqrestore(&clients_lock, flags);
+       snd_seq_delete_all_ports(client);
+       snd_seq_queue_client_leave(client->number);
        snd_use_lock_sync(&client->use_lock);
        snd_seq_queue_client_termination(client->number);
        if (client->pool)
@@ -910,7 +910,8 @@ int snd_seq_dispatch_event(struct snd_seq_event_cell *cell, int atomic, int hop)
 static int snd_seq_client_enqueue_event(struct snd_seq_client *client,
                                        struct snd_seq_event *event,
                                        struct file *file, int blocking,
-                                       int atomic, int hop)
+                                       int atomic, int hop,
+                                       struct mutex *mutexp)
 {
        struct snd_seq_event_cell *cell;
        int err;
@@ -948,7 +949,8 @@ static int snd_seq_client_enqueue_event(struct snd_seq_client *client,
                return -ENXIO; /* queue is not allocated */
 
        /* allocate an event cell */
-       err = snd_seq_event_dup(client->pool, event, &cell, !blocking || atomic, file);
+       err = snd_seq_event_dup(client->pool, event, &cell, !blocking || atomic,
+                               file, mutexp);
        if (err < 0)
                return err;
 
@@ -1017,12 +1019,11 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
                return -ENXIO;
 
        /* allocate the pool now if the pool is not allocated yet */ 
+       mutex_lock(&client->ioctl_mutex);
        if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) {
-               mutex_lock(&client->ioctl_mutex);
                err = snd_seq_pool_init(client->pool);
-               mutex_unlock(&client->ioctl_mutex);
                if (err < 0)
-                       return -ENOMEM;
+                       goto out;
        }
 
        /* only process whole events */
@@ -1073,7 +1074,7 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
                /* ok, enqueue it */
                err = snd_seq_client_enqueue_event(client, &event, file,
                                                   !(file->f_flags & O_NONBLOCK),
-                                                  0, 0);
+                                                  0, 0, &client->ioctl_mutex);
                if (err < 0)
                        break;
 
@@ -1084,6 +1085,8 @@ static ssize_t snd_seq_write(struct file *file, const char __user *buf,
                written += len;
        }
 
+ out:
+       mutex_unlock(&client->ioctl_mutex);
        return written ? written : err;
 }
 
@@ -1838,9 +1841,11 @@ static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client,
            (! snd_seq_write_pool_allocated(client) ||
             info->output_pool != client->pool->size)) {
                if (snd_seq_write_pool_allocated(client)) {
+                       /* is the pool in use? */
+                       if (atomic_read(&client->pool->counter))
+                               return -EBUSY;
                        /* remove all existing cells */
                        snd_seq_pool_mark_closing(client->pool);
-                       snd_seq_queue_client_leave_cells(client->number);
                        snd_seq_pool_done(client->pool);
                }
                client->pool->size = info->output_pool;
@@ -2260,7 +2265,8 @@ static int kernel_client_enqueue(int client, struct snd_seq_event *ev,
        if (! cptr->accept_output)
                result = -EPERM;
        else /* send it */
-               result = snd_seq_client_enqueue_event(cptr, ev, file, blocking, atomic, hop);
+               result = snd_seq_client_enqueue_event(cptr, ev, file, blocking,
+                                                     atomic, hop, NULL);
 
        snd_seq_client_unlock(cptr);
        return result;
index a8c2822e01984ff207c8a46b5ebd67546ee8b2eb..72c0302a55d23c05720d6062bef600b40fec6971 100644 (file)
@@ -125,7 +125,7 @@ int snd_seq_fifo_event_in(struct snd_seq_fifo *f,
                return -EINVAL;
 
        snd_use_lock_use(&f->use_lock);
-       err = snd_seq_event_dup(f->pool, event, &cell, 1, NULL); /* always non-blocking */
+       err = snd_seq_event_dup(f->pool, event, &cell, 1, NULL, NULL); /* always non-blocking */
        if (err < 0) {
                if ((err == -ENOMEM) || (err == -EAGAIN))
                        atomic_inc(&f->overflow);
index f763682584a8f09837240b683a2da5413149b728..ab1112e90f88dbd29bae5eea8edd175175504edc 100644 (file)
@@ -220,7 +220,8 @@ void snd_seq_cell_free(struct snd_seq_event_cell * cell)
  */
 static int snd_seq_cell_alloc(struct snd_seq_pool *pool,
                              struct snd_seq_event_cell **cellp,
-                             int nonblock, struct file *file)
+                             int nonblock, struct file *file,
+                             struct mutex *mutexp)
 {
        struct snd_seq_event_cell *cell;
        unsigned long flags;
@@ -244,7 +245,11 @@ static int snd_seq_cell_alloc(struct snd_seq_pool *pool,
                set_current_state(TASK_INTERRUPTIBLE);
                add_wait_queue(&pool->output_sleep, &wait);
                spin_unlock_irq(&pool->lock);
+               if (mutexp)
+                       mutex_unlock(mutexp);
                schedule();
+               if (mutexp)
+                       mutex_lock(mutexp);
                spin_lock_irq(&pool->lock);
                remove_wait_queue(&pool->output_sleep, &wait);
                /* interrupted? */
@@ -287,7 +292,7 @@ static int snd_seq_cell_alloc(struct snd_seq_pool *pool,
  */
 int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event,
                      struct snd_seq_event_cell **cellp, int nonblock,
-                     struct file *file)
+                     struct file *file, struct mutex *mutexp)
 {
        int ncells, err;
        unsigned int extlen;
@@ -304,7 +309,7 @@ int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event,
        if (ncells >= pool->total_elements)
                return -ENOMEM;
 
-       err = snd_seq_cell_alloc(pool, &cell, nonblock, file);
+       err = snd_seq_cell_alloc(pool, &cell, nonblock, file, mutexp);
        if (err < 0)
                return err;
 
@@ -330,7 +335,8 @@ int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event,
                        int size = sizeof(struct snd_seq_event);
                        if (len < size)
                                size = len;
-                       err = snd_seq_cell_alloc(pool, &tmp, nonblock, file);
+                       err = snd_seq_cell_alloc(pool, &tmp, nonblock, file,
+                                                mutexp);
                        if (err < 0)
                                goto __error;
                        if (cell->event.data.ext.ptr == NULL)
index 32f959c17786d9ac8c071ba0e6fd070dc06da78b..3abe306c394af95c8dbdb99475f7829a17efc33c 100644 (file)
@@ -66,7 +66,8 @@ struct snd_seq_pool {
 void snd_seq_cell_free(struct snd_seq_event_cell *cell);
 
 int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event,
-                     struct snd_seq_event_cell **cellp, int nonblock, struct file *file);
+                     struct snd_seq_event_cell **cellp, int nonblock,
+                     struct file *file, struct mutex *mutexp);
 
 /* return number of unused (free) cells */
 static inline int snd_seq_unused_cells(struct snd_seq_pool *pool)
index bc1c8488fc2a1508d9572617e9030ba180477fd0..2bc6759e4adcf6a794efc22e4707b8f228d6b362 100644 (file)
@@ -87,7 +87,7 @@ void snd_seq_prioq_delete(struct snd_seq_prioq **fifo)
        if (f->cells > 0) {
                /* drain prioQ */
                while (f->cells > 0)
-                       snd_seq_cell_free(snd_seq_prioq_cell_out(f));
+                       snd_seq_cell_free(snd_seq_prioq_cell_out(f, NULL));
        }
        
        kfree(f);
@@ -214,8 +214,18 @@ int snd_seq_prioq_cell_in(struct snd_seq_prioq * f,
        return 0;
 }
 
+/* return 1 if the current time >= event timestamp */
+static int event_is_ready(struct snd_seq_event *ev, void *current_time)
+{
+       if ((ev->flags & SNDRV_SEQ_TIME_STAMP_MASK) == SNDRV_SEQ_TIME_STAMP_TICK)
+               return snd_seq_compare_tick_time(current_time, &ev->time.tick);
+       else
+               return snd_seq_compare_real_time(current_time, &ev->time.time);
+}
+
 /* dequeue cell from prioq */
-struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f)
+struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f,
+                                                 void *current_time)
 {
        struct snd_seq_event_cell *cell;
        unsigned long flags;
@@ -227,6 +237,8 @@ struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f)
        spin_lock_irqsave(&f->lock, flags);
 
        cell = f->head;
+       if (cell && current_time && !event_is_ready(&cell->event, current_time))
+               cell = NULL;
        if (cell) {
                f->head = cell->next;
 
@@ -252,18 +264,6 @@ int snd_seq_prioq_avail(struct snd_seq_prioq * f)
        return f->cells;
 }
 
-
-/* peek at cell at the head of the prioq */
-struct snd_seq_event_cell *snd_seq_prioq_cell_peek(struct snd_seq_prioq * f)
-{
-       if (f == NULL) {
-               pr_debug("ALSA: seq: snd_seq_prioq_cell_in() called with NULL prioq\n");
-               return NULL;
-       }
-       return f->head;
-}
-
-
 static inline int prioq_match(struct snd_seq_event_cell *cell,
                              int client, int timestamp)
 {
index d38bb78d934545b56e87f248ac1b6b46f34be8e7..2c315ca10fc4c1a8ef5eddd20e3731705fa8097c 100644 (file)
@@ -44,14 +44,12 @@ void snd_seq_prioq_delete(struct snd_seq_prioq **fifo);
 int snd_seq_prioq_cell_in(struct snd_seq_prioq *f, struct snd_seq_event_cell *cell);
 
 /* dequeue cell from prioq */ 
-struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f);
+struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f,
+                                                 void *current_time);
 
 /* return number of events available in prioq */
 int snd_seq_prioq_avail(struct snd_seq_prioq *f);
 
-/* peek at cell at the head of the prioq */
-struct snd_seq_event_cell *snd_seq_prioq_cell_peek(struct snd_seq_prioq *f);
-
 /* client left queue */
 void snd_seq_prioq_leave(struct snd_seq_prioq *f, int client, int timestamp);        
 
index 0428e9061b47c63d9c4d414d98b6f633515f79ef..b377f50483529e969dbd65cdea65f0ab80e01863 100644 (file)
@@ -277,30 +277,20 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
 
       __again:
        /* Process tick queue... */
-       while ((cell = snd_seq_prioq_cell_peek(q->tickq)) != NULL) {
-               if (snd_seq_compare_tick_time(&q->timer->tick.cur_tick,
-                                             &cell->event.time.tick)) {
-                       cell = snd_seq_prioq_cell_out(q->tickq);
-                       if (cell)
-                               snd_seq_dispatch_event(cell, atomic, hop);
-               } else {
-                       /* event remains in the queue */
+       for (;;) {
+               cell = snd_seq_prioq_cell_out(q->tickq,
+                                             &q->timer->tick.cur_tick);
+               if (!cell)
                        break;
-               }
+               snd_seq_dispatch_event(cell, atomic, hop);
        }
 
-
        /* Process time queue... */
-       while ((cell = snd_seq_prioq_cell_peek(q->timeq)) != NULL) {
-               if (snd_seq_compare_real_time(&q->timer->cur_time,
-                                             &cell->event.time.time)) {
-                       cell = snd_seq_prioq_cell_out(q->timeq);
-                       if (cell)
-                               snd_seq_dispatch_event(cell, atomic, hop);
-               } else {
-                       /* event remains in the queue */
+       for (;;) {
+               cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time);
+               if (!cell)
                        break;
-               }
+               snd_seq_dispatch_event(cell, atomic, hop);
        }
 
        /* free lock */
index c71dcacea807bf0e0d11aa147401acd12280686c..d5017adf9febc4f591e54ef26447603975da8282 100644 (file)
@@ -186,6 +186,10 @@ module_param(power_save, xint, 0644);
 MODULE_PARM_DESC(power_save, "Automatic power-saving timeout "
                 "(in second, 0 = disable).");
 
+static bool pm_blacklist = true;
+module_param(pm_blacklist, bool, 0644);
+MODULE_PARM_DESC(pm_blacklist, "Enable power-management blacklist");
+
 /* reset the HD-audio controller in power save mode.
  * this may give more power-saving, but will take longer time to
  * wake up.
@@ -2186,6 +2190,24 @@ static int azx_probe(struct pci_dev *pci,
        return err;
 }
 
+#ifdef CONFIG_PM
+/* On some boards setting power_save to a non 0 value leads to clicking /
+ * popping sounds when ever we enter/leave powersaving mode. Ideally we would
+ * figure out how to avoid these sounds, but that is not always feasible.
+ * So we keep a list of devices where we disable powersaving as its known
+ * to causes problems on these devices.
+ */
+static struct snd_pci_quirk power_save_blacklist[] = {
+       /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+       SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
+       /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+       SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
+       /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
+       SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0),
+       {}
+};
+#endif /* CONFIG_PM */
+
 /* number of codec slots for each chipset: 0 = default slots (i.e. 4) */
 static unsigned int azx_max_codecs[AZX_NUM_DRIVERS] = {
        [AZX_DRIVER_NVIDIA] = 8,
@@ -2198,6 +2220,7 @@ static int azx_probe_continue(struct azx *chip)
        struct hdac_bus *bus = azx_bus(chip);
        struct pci_dev *pci = chip->pci;
        int dev = chip->dev_index;
+       int val;
        int err;
 
        hda->probe_continued = 1;
@@ -2278,7 +2301,21 @@ static int azx_probe_continue(struct azx *chip)
 
        chip->running = 1;
        azx_add_card_list(chip);
-       snd_hda_set_power_save(&chip->bus, power_save * 1000);
+
+       val = power_save;
+#ifdef CONFIG_PM
+       if (pm_blacklist) {
+               const struct snd_pci_quirk *q;
+
+               q = snd_pci_quirk_lookup(chip->pci, power_save_blacklist);
+               if (q && val) {
+                       dev_info(chip->card->dev, "device %04x:%04x is on the power_save blacklist, forcing power_save to 0\n",
+                                q->subvendor, q->subdevice);
+                       val = 0;
+               }
+       }
+#endif /* CONFIG_PM */
+       snd_hda_set_power_save(&chip->bus, val * 1000);
        if (azx_has_pm_runtime(chip) || hda->use_vga_switcheroo)
                pm_runtime_put_autosuspend(&pci->dev);
 
index 37e1cf8218ff0f864de4635d0188ed5b9b91d73c..5b4dbcec6de8dab957f045786d4808b4edfd573f 100644 (file)
@@ -957,6 +957,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
        SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
        SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
        SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
+       SND_PCI_QUIRK(0x103c, 0x807C, "HP EliteBook 820 G3", CXT_FIXUP_HP_DOCK),
+       SND_PCI_QUIRK(0x103c, 0x80FD, "HP ProBook 640 G2", CXT_FIXUP_HP_DOCK),
        SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
        SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
        SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO),
index ce28f7ce64e63774655a421bf1102b7c354786dc..9af301c6bba24af68e5f778427a8269f2f2e2730 100644 (file)
@@ -4997,13 +4997,14 @@ static void alc_fixup_tpt470_dock(struct hda_codec *codec,
 
        if (action == HDA_FIXUP_ACT_PRE_PROBE) {
                spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
+               snd_hda_apply_pincfgs(codec, pincfgs);
+       } else if (action == HDA_FIXUP_ACT_INIT) {
                /* Enable DOCK device */
                snd_hda_codec_write(codec, 0x17, 0,
                            AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
                /* Enable DOCK device */
                snd_hda_codec_write(codec, 0x19, 0,
                            AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
-               snd_hda_apply_pincfgs(codec, pincfgs);
        }
 }
 
@@ -5273,6 +5274,16 @@ static void alc298_fixup_speaker_volume(struct hda_codec *codec,
        }
 }
 
+/* disable DAC3 (0x06) selection on NID 0x17 as it has no volume amp control */
+static void alc295_fixup_disable_dac3(struct hda_codec *codec,
+                                     const struct hda_fixup *fix, int action)
+{
+       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+               hda_nid_t conn[2] = { 0x02, 0x03 };
+               snd_hda_override_conn_list(codec, 0x17, 2, conn);
+       }
+}
+
 /* Hook to update amp GPIO4 for automute */
 static void alc280_hp_gpio4_automute_hook(struct hda_codec *codec,
                                          struct hda_jack_callback *jack)
@@ -5465,6 +5476,7 @@ enum {
        ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
        ALC255_FIXUP_DELL_SPK_NOISE,
        ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+       ALC295_FIXUP_DISABLE_DAC3,
        ALC280_FIXUP_HP_HEADSET_MIC,
        ALC221_FIXUP_HP_FRONT_MIC,
        ALC292_FIXUP_TPT460,
@@ -5479,10 +5491,12 @@ enum {
        ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE,
        ALC233_FIXUP_LENOVO_MULTI_CODECS,
        ALC294_FIXUP_LENOVO_MIC_LOCATION,
+       ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE,
        ALC700_FIXUP_INTEL_REFERENCE,
        ALC274_FIXUP_DELL_BIND_DACS,
        ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
        ALC298_FIXUP_TPT470_DOCK,
+       ALC255_FIXUP_DUMMY_LINEOUT_VERB,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6197,6 +6211,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
        },
+       [ALC295_FIXUP_DISABLE_DAC3] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc295_fixup_disable_dac3,
+       },
        [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -6282,6 +6300,18 @@ static const struct hda_fixup alc269_fixups[] = {
                        { }
                },
        },
+       [ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x16, 0x0101102f }, /* Rear Headset HP */
+                       { 0x19, 0x02a1913c }, /* use as Front headset mic, without its own jack detect */
+                       { 0x1a, 0x01a19030 }, /* Rear Headset MIC */
+                       { 0x1b, 0x02011020 },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+       },
        [ALC700_FIXUP_INTEL_REFERENCE] = {
                .type = HDA_FIXUP_VERBS,
                .v.verbs = (const struct hda_verb[]) {
@@ -6318,6 +6348,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC293_FIXUP_LENOVO_SPK_NOISE
        },
+       [ALC255_FIXUP_DUMMY_LINEOUT_VERB] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x14, 0x0201101f },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6366,10 +6405,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
        SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
        SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
+       SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3),
        SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
+       SND_PCI_QUIRK(0x1028, 0x080c, "Dell WYSE", ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
        SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
        SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
+       SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB),
        SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -6507,9 +6549,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x2249, "Thinkpad", ALC292_FIXUP_TPT460),
        SND_PCI_QUIRK(0x17aa, 0x224b, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
@@ -6871,7 +6915,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x12, 0x90a60120},
                {0x14, 0x90170110},
                {0x21, 0x0321101f}),
-       SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+       SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
                {0x12, 0xb7a60130},
                {0x14, 0x90170110},
                {0x21, 0x04211020}),
index c33a512283a48fecfe52a7ca3086dc7b30376e29..9fb356db3ab25af6d5c75026cee6887d87298918 100644 (file)
@@ -579,13 +579,6 @@ static int acp_init(void __iomem *acp_mmio, u32 asic_type)
                for (bank = 1; bank < 48; bank++)
                        acp_set_sram_bank_state(acp_mmio, bank, false);
        }
-
-       /* Stoney supports 16bit resolution */
-       if (asic_type == CHIP_STONEY) {
-               val = acp_reg_read(acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
-               val |= 0x03;
-               acp_reg_write(val, acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
-       }
        return 0;
 }
 
@@ -774,6 +767,7 @@ static int acp_dma_hw_params(struct snd_pcm_substream *substream,
 {
        int status;
        uint64_t size;
+       u32 val = 0;
        struct page *pg;
        struct snd_pcm_runtime *runtime;
        struct audio_substream_data *rtd;
@@ -786,6 +780,14 @@ static int acp_dma_hw_params(struct snd_pcm_substream *substream,
        if (WARN_ON(!rtd))
                return -EINVAL;
 
+       if (adata->asic_type == CHIP_STONEY) {
+               val = acp_reg_read(adata->acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
+               if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+                       val |= ACP_I2S_SP_16BIT_RESOLUTION_EN;
+               else
+                       val |= ACP_I2S_MIC_16BIT_RESOLUTION_EN;
+               acp_reg_write(val, adata->acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
+       }
        size = params_buffer_bytes(params);
        status = snd_pcm_lib_malloc_pages(substream, size);
        if (status < 0)
index ecb458935d1e82607f907c9105ccaac20411f8ce..9293f179f2721fc0ec9ea5f3acdeb63f076f89f4 100644 (file)
@@ -70,6 +70,8 @@
 #define CAPTURE_END_DMA_DESCR_CH15 7
 
 #define mmACP_I2S_16BIT_RESOLUTION_EN       0x5209
+#define ACP_I2S_MIC_16BIT_RESOLUTION_EN 0x01
+#define ACP_I2S_SP_16BIT_RESOLUTION_EN 0x02
 enum acp_dma_priority_level {
        /* 0x0 Specifies the DMA channel is given normal priority */
        ACP_DMA_PRIORITY_LEVEL_NORMAL = 0x0,
index 5672e516bec378c5d5437c8e63e8dac15e515c33..c1830ccd3bb8ecd640179818fb9c58924ae5d857 100644 (file)
@@ -798,12 +798,7 @@ static int hdmi_codec_probe(struct platform_device *pdev)
 
 static int hdmi_codec_remove(struct platform_device *pdev)
 {
-       struct device *dev = &pdev->dev;
-       struct hdmi_codec_priv *hcp;
-
-       hcp = dev_get_drvdata(dev);
-       kfree(hcp->chmap_info);
-       snd_soc_unregister_codec(dev);
+       snd_soc_unregister_codec(&pdev->dev);
 
        return 0;
 }
index 831b297978a485ff58fe4eea38373c6392cab7ae..45a73049cf648770421bfc4795e7dbc773f35981 100644 (file)
@@ -1722,6 +1722,7 @@ static const struct regmap_config rt5651_regmap = {
        .num_reg_defaults = ARRAY_SIZE(rt5651_reg),
        .ranges = rt5651_ranges,
        .num_ranges = ARRAY_SIZE(rt5651_ranges),
+       .use_single_rw = true,
 };
 
 #if defined(CONFIG_OF)
index e1ab5537d27a80f702a2f8934b79c21ddf514d87..c5c76ab8ccf1008ba306e2021de42c4d0443cc53 100644 (file)
@@ -529,10 +529,15 @@ static const struct snd_kcontrol_new sgtl5000_snd_controls[] = {
 static int sgtl5000_digital_mute(struct snd_soc_dai *codec_dai, int mute)
 {
        struct snd_soc_codec *codec = codec_dai->codec;
-       u16 adcdac_ctrl = SGTL5000_DAC_MUTE_LEFT | SGTL5000_DAC_MUTE_RIGHT;
+       u16 i2s_pwr = SGTL5000_I2S_IN_POWERUP;
 
-       snd_soc_update_bits(codec, SGTL5000_CHIP_ADCDAC_CTRL,
-                       adcdac_ctrl, mute ? adcdac_ctrl : 0);
+       /*
+        * During 'digital mute' do not mute DAC
+        * because LINE_IN would be muted aswell. We want to mute
+        * only I2S block - this can be done by powering it off
+        */
+       snd_soc_update_bits(codec, SGTL5000_CHIP_DIG_POWER,
+                       i2s_pwr, mute ? 0 : i2s_pwr);
 
        return 0;
 }
@@ -871,15 +876,26 @@ static int sgtl5000_pcm_hw_params(struct snd_pcm_substream *substream,
 static int sgtl5000_set_bias_level(struct snd_soc_codec *codec,
                                   enum snd_soc_bias_level level)
 {
+       struct sgtl5000_priv *sgtl = snd_soc_codec_get_drvdata(codec);
+       int ret;
+
        switch (level) {
        case SND_SOC_BIAS_ON:
        case SND_SOC_BIAS_PREPARE:
        case SND_SOC_BIAS_STANDBY:
+               regcache_cache_only(sgtl->regmap, false);
+               ret = regcache_sync(sgtl->regmap);
+               if (ret) {
+                       regcache_cache_only(sgtl->regmap, true);
+                       return ret;
+               }
+
                snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER,
                                    SGTL5000_REFTOP_POWERUP,
                                    SGTL5000_REFTOP_POWERUP);
                break;
        case SND_SOC_BIAS_OFF:
+               regcache_cache_only(sgtl->regmap, true);
                snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER,
                                    SGTL5000_REFTOP_POWERUP, 0);
                break;
@@ -1237,6 +1253,10 @@ static int sgtl5000_probe(struct snd_soc_codec *codec)
         */
        snd_soc_write(codec, SGTL5000_DAP_CTRL, 0);
 
+       /* Unmute DAC after start */
+       snd_soc_update_bits(codec, SGTL5000_CHIP_ADCDAC_CTRL,
+               SGTL5000_DAC_MUTE_LEFT | SGTL5000_DAC_MUTE_RIGHT, 0);
+
        return 0;
 
 err:
index 66e32f5d2917f2f0b958c124b5f4a0eeca296c10..989d093abda7e6c25c7f812e62596e0005d74d42 100644 (file)
@@ -1204,12 +1204,14 @@ static int wmfw_add_ctl(struct wm_adsp *dsp, struct wm_coeff_ctl *ctl)
                kcontrol->put = wm_coeff_put_acked;
                break;
        default:
-               kcontrol->get = wm_coeff_get;
-               kcontrol->put = wm_coeff_put;
-
-               ctl->bytes_ext.max = ctl->len;
-               ctl->bytes_ext.get = wm_coeff_tlv_get;
-               ctl->bytes_ext.put = wm_coeff_tlv_put;
+               if (kcontrol->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) {
+                       ctl->bytes_ext.max = ctl->len;
+                       ctl->bytes_ext.get = wm_coeff_tlv_get;
+                       ctl->bytes_ext.put = wm_coeff_tlv_put;
+               } else {
+                       kcontrol->get = wm_coeff_get;
+                       kcontrol->put = wm_coeff_put;
+               }
                break;
        }
 
index dca1143c1150ac58aa148a49db54c33226e366c8..a4aa931ebfaef4c081dc5594c4ffe06fba6aeb38 100644 (file)
 
 #define SUN8I_I2S_CHAN_CFG_REG         0x30
 #define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM_MASK    GENMASK(6, 4)
-#define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM(chan)   (chan - 1)
+#define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM(chan)   ((chan - 1) << 4)
 #define SUN8I_I2S_CHAN_CFG_TX_SLOT_NUM_MASK    GENMASK(2, 0)
 #define SUN8I_I2S_CHAN_CFG_TX_SLOT_NUM(chan)   (chan - 1)
 
index 50252046b01df7cdb376e1c8ae8c59559befdd1f..754e632a27bd2e8c15931e821fa158bf50bfdb78 100644 (file)
@@ -3325,4 +3325,51 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"),
        }
 },
 
+{
+       /*
+        * Bower's & Wilkins PX headphones only support the 48 kHz sample rate
+        * even though it advertises more. The capture interface doesn't work
+        * even on windows.
+        */
+       USB_DEVICE(0x19b5, 0x0021),
+       .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+               .ifnum = QUIRK_ANY_INTERFACE,
+               .type = QUIRK_COMPOSITE,
+               .data = (const struct snd_usb_audio_quirk[]) {
+                       {
+                               .ifnum = 0,
+                               .type = QUIRK_AUDIO_STANDARD_MIXER,
+                       },
+                       /* Capture */
+                       {
+                               .ifnum = 1,
+                               .type = QUIRK_IGNORE_INTERFACE,
+                       },
+                       /* Playback */
+                       {
+                               .ifnum = 2,
+                               .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+                               .data = &(const struct audioformat) {
+                                       .formats = SNDRV_PCM_FMTBIT_S16_LE,
+                                       .channels = 2,
+                                       .iface = 2,
+                                       .altsetting = 1,
+                                       .altset_idx = 1,
+                                       .attributes = UAC_EP_CS_ATTR_FILL_MAX |
+                                               UAC_EP_CS_ATTR_SAMPLE_RATE,
+                                       .endpoint = 0x03,
+                                       .ep_attr = USB_ENDPOINT_XFER_ISOC,
+                                       .rates = SNDRV_PCM_RATE_48000,
+                                       .rate_min = 48000,
+                                       .rate_max = 48000,
+                                       .nr_rates = 1,
+                                       .rate_table = (unsigned int[]) {
+                                               48000
+                                       }
+                               }
+                       },
+               }
+       }
+},
+
 #undef USB_DEVICE_VENDOR_SPEC
index a0951505c7f5b2804c3203fe7eeb0dea7d4935b1..4ed9d0c41843888d6e9c4aa7f79b980c3e0ac74b 100644 (file)
@@ -50,6 +50,7 @@
 /*standard module options for ALSA. This module supports only one card*/
 static int hdmi_card_index = SNDRV_DEFAULT_IDX1;
 static char *hdmi_card_id = SNDRV_DEFAULT_STR1;
+static bool single_port;
 
 module_param_named(index, hdmi_card_index, int, 0444);
 MODULE_PARM_DESC(index,
@@ -57,6 +58,9 @@ MODULE_PARM_DESC(index,
 module_param_named(id, hdmi_card_id, charp, 0444);
 MODULE_PARM_DESC(id,
                "ID string for INTEL Intel HDMI Audio controller.");
+module_param(single_port, bool, 0444);
+MODULE_PARM_DESC(single_port,
+               "Single-port mode (for compatibility)");
 
 /*
  * ELD SA bits in the CEA Speaker Allocation data block
@@ -1579,7 +1583,11 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id)
 static void notify_audio_lpe(struct platform_device *pdev, int port)
 {
        struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev);
-       struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+       struct snd_intelhad *ctx;
+
+       ctx = &card_ctx->pcm_ctx[single_port ? 0 : port];
+       if (single_port)
+               ctx->port = port;
 
        schedule_work(&ctx->hdmi_audio_wq);
 }
@@ -1743,6 +1751,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 {
        struct snd_card *card;
        struct snd_intelhad_card *card_ctx;
+       struct snd_intelhad *ctx;
        struct snd_pcm *pcm;
        struct intel_hdmi_lpe_audio_pdata *pdata;
        int irq;
@@ -1787,6 +1796,21 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, card_ctx);
 
+       card_ctx->num_pipes = pdata->num_pipes;
+       card_ctx->num_ports = single_port ? 1 : pdata->num_ports;
+
+       for_each_port(card_ctx, port) {
+               ctx = &card_ctx->pcm_ctx[port];
+               ctx->card_ctx = card_ctx;
+               ctx->dev = card_ctx->dev;
+               ctx->port = single_port ? -1 : port;
+               ctx->pipe = -1;
+
+               spin_lock_init(&ctx->had_spinlock);
+               mutex_init(&ctx->mutex);
+               INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
+       }
+
        dev_dbg(&pdev->dev, "%s: mmio_start = 0x%x, mmio_end = 0x%x\n",
                __func__, (unsigned int)res_mmio->start,
                (unsigned int)res_mmio->end);
@@ -1816,19 +1840,12 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
        init_channel_allocations();
 
        card_ctx->num_pipes = pdata->num_pipes;
-       card_ctx->num_ports = pdata->num_ports;
+       card_ctx->num_ports = single_port ? 1 : pdata->num_ports;
 
        for_each_port(card_ctx, port) {
-               struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
                int i;
 
-               ctx->card_ctx = card_ctx;
-               ctx->dev = card_ctx->dev;
-               ctx->port = port;
-               ctx->pipe = -1;
-
-               INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
-
+               ctx = &card_ctx->pcm_ctx[port];
                ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS,
                                  MAX_CAP_STREAMS, &pcm);
                if (ret)
index 0dfe4d3f74e24d6655fc40f0460b9e489fb9ef69..f41079da38c55f8a2e891b044ac9ffb73919c37e 100644 (file)
 #define X86_FEATURE_SEV                        ( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB           ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW                ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW         ( 8*32+ 0) /* Intel TPR Shadow */
index c8ec0ae16bf03af4c40f08b06dcea2b7e24e4b9e..1ea545965ee36c3a8dca16fd0e34f19aa4894e60 100644 (file)
@@ -1,19 +1,28 @@
 # SPDX-License-Identifier: GPL-2.0
-prefix = /usr
+include ../scripts/Makefile.include
+
+prefix ?= /usr/local
 
 CC = gcc
 LEX = flex
 YACC = bison
 MAKE = make
+INSTALL ?= install
 
 CFLAGS += -Wall -O2
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
 FEATURE_USER = .bpf
 FEATURE_TESTS = libbfd disassembler-four-args
 FEATURE_DISPLAY = libbfd disassembler-four-args
@@ -38,40 +47,59 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
-%.yacc.c: %.y
-       $(YACC) -o $@ -d $<
+$(OUTPUT)%.yacc.c: $(srctree)/tools/bpf/%.y
+       $(QUIET_BISON)$(YACC) -o $@ -d $<
 
-%.lex.c: %.l
-       $(LEX) -o $@ $<
+$(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
+       $(QUIET_FLEX)$(LEX) -o $@ $<
 
-all: bpf_jit_disasm bpf_dbg bpf_asm bpftool
+$(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
+       $(QUIET_CC)$(COMPILE.c) -o $@ $<
 
-bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
-bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
-bpf_jit_disasm : bpf_jit_disasm.o
+$(OUTPUT)%.yacc.o: $(OUTPUT)%.yacc.c
+       $(QUIET_CC)$(COMPILE.c) -o $@ $<
+$(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
+       $(QUIET_CC)$(COMPILE.c) -o $@ $<
 
-bpf_dbg : LDLIBS = -lreadline
-bpf_dbg : bpf_dbg.o
+PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
 
-bpf_asm : LDLIBS =
-bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
-bpf_exp.lex.o : bpf_exp.yacc.c
+all: $(PROGS) bpftool
 
-clean: bpftool_clean
-       rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.*
+$(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
+$(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
+       $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lopcodes -lbfd -ldl
 
-install: bpftool_install
-       install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
-       install bpf_dbg $(prefix)/bin/bpf_dbg
-       install bpf_asm $(prefix)/bin/bpf_asm
+$(OUTPUT)bpf_dbg: $(OUTPUT)bpf_dbg.o
+       $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lreadline
+
+$(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.lex.o
+       $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
+
+$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
+
+clean: bpftool_clean
+       $(call QUIET_CLEAN, bpf-progs)
+       $(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+              $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
+       $(call QUIET_CLEAN, core-gen)
+       $(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
+
+install: $(PROGS) bpftool_install
+       $(call QUIET_INSTALL, bpf_jit_disasm)
+       $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
+       $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
+       $(call QUIET_INSTALL, bpf_dbg)
+       $(Q)$(INSTALL) $(OUTPUT)bpf_dbg $(DESTDIR)$(prefix)/bin/bpf_dbg
+       $(call QUIET_INSTALL, bpf_asm)
+       $(Q)$(INSTALL) $(OUTPUT)bpf_asm $(DESTDIR)$(prefix)/bin/bpf_asm
 
 bpftool:
-       $(MAKE) -C bpftool
+       $(call descend,bpftool)
 
 bpftool_install:
-       $(MAKE) -C bpftool install
+       $(call descend,bpftool,install)
 
 bpftool_clean:
-       $(MAKE) -C bpftool clean
+       $(call descend,bpftool,clean)
 
-.PHONY: bpftool FORCE
+.PHONY: all install clean bpftool bpftool_install bpftool_clean
index e4ceee7f2dff4ca0b594511b405a181c75c28bfd..67ca6c69376cd95a030142ee11f2d66778ddc0ec 100644 (file)
@@ -21,7 +21,7 @@ MAP COMMANDS
 =============
 
 |      **bpftool** **prog { show | list }** [*PROG*]
-|      **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
+|      **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
 |      **bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |      **bpftool** **prog pin** *PROG* *FILE*
 |      **bpftool** **prog load** *OBJ* *FILE*
@@ -39,12 +39,18 @@ DESCRIPTION
                  Output will start with program ID followed by program type and
                  zero or more named attributes (depending on kernel version).
 
-       **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** }]
-                 Dump eBPF instructions of the program from the kernel.
-                 If *FILE* is specified image will be written to a file,
-                 otherwise it will be disassembled and printed to stdout.
+       **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }]
+                 Dump eBPF instructions of the program from the kernel. By
+                 default, eBPF will be disassembled and printed to standard
+                 output in human-readable format. In this case, **opcodes**
+                 controls if raw opcodes should be printed as well.
 
-                 **opcodes** controls if raw opcodes will be printed.
+                 If **file** is specified, the binary image will instead be
+                 written to *FILE*.
+
+                 If **visual** is specified, control flow graph (CFG) will be
+                 built instead, and eBPF instructions will be presented with
+                 CFG in DOT format, on standard output.
 
        **bpftool prog dump jited**  *PROG* [{ **file** *FILE* | **opcodes** }]
                  Dump jited image (host machine code) of the program.
index 26901ec87361ed1b999d863e7e4c8ed6b7380123..4e69782c4a793f0860e6532aac5b6aaaecc873e0 100644 (file)
@@ -38,7 +38,7 @@ bash_compdir ?= /usr/share/bash-completion/completions
 CC = gcc
 
 CFLAGS += -O2
-CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
@@ -70,7 +70,7 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
-include $(wildcard *.d)
+include $(wildcard $(OUTPUT)*.d)
 
 all: $(OUTPUT)bpftool
 
@@ -89,6 +89,8 @@ $(OUTPUT)%.o: %.c
 clean: $(LIBBPF)-clean
        $(call QUIET_CLEAN, bpftool)
        $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+       $(call QUIET_CLEAN, core-gen)
+       $(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
 
 install: $(OUTPUT)bpftool
        $(call QUIET_INSTALL, bpftool)
index 08719c54a614a19a9eeafd9048d3bd21dd90b706..490811b45fa75c60a470ee5f626df844d23f9e17 100644 (file)
@@ -147,7 +147,7 @@ _bpftool()
 
     # Deal with simplest keywords
     case $prev in
-        help|key|opcodes)
+        help|key|opcodes|visual)
             return 0
             ;;
         tag)
@@ -223,11 +223,16 @@ _bpftool()
                             return 0
                             ;;
                     *)
-                            _bpftool_once_attr 'file'
+                        _bpftool_once_attr 'file'
+                        if _bpftool_search_list 'xlated'; then
+                            COMPREPLY+=( $( compgen -W 'opcodes visual' -- \
+                                "$cur" ) )
+                        else
                             COMPREPLY+=( $( compgen -W 'opcodes' -- \
                                 "$cur" ) )
-                            return 0
-                            ;;
+                        fi
+                        return 0
+                        ;;
                     esac
                     ;;
                 pin)
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
new file mode 100644 (file)
index 0000000..f30b3a4
--- /dev/null
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cfg.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+struct cfg {
+       struct list_head funcs;
+       int func_num;
+};
+
+struct func_node {
+       struct list_head l;
+       struct list_head bbs;
+       struct bpf_insn *start;
+       struct bpf_insn *end;
+       int idx;
+       int bb_num;
+};
+
+struct bb_node {
+       struct list_head l;
+       struct list_head e_prevs;
+       struct list_head e_succs;
+       struct bpf_insn *head;
+       struct bpf_insn *tail;
+       int idx;
+};
+
+#define EDGE_FLAG_EMPTY                0x0
+#define EDGE_FLAG_FALLTHROUGH  0x1
+#define EDGE_FLAG_JUMP         0x2
+struct edge_node {
+       struct list_head l;
+       struct bb_node *src;
+       struct bb_node *dst;
+       int flags;
+};
+
+#define ENTRY_BLOCK_INDEX      0
+#define EXIT_BLOCK_INDEX       1
+#define NUM_FIXED_BLOCKS       2
+#define func_prev(func)                list_prev_entry(func, l)
+#define func_next(func)                list_next_entry(func, l)
+#define bb_prev(bb)            list_prev_entry(bb, l)
+#define bb_next(bb)            list_next_entry(bb, l)
+#define entry_bb(func)         func_first_bb(func)
+#define exit_bb(func)          func_last_bb(func)
+#define cfg_first_func(cfg)    \
+       list_first_entry(&cfg->funcs, struct func_node, l)
+#define cfg_last_func(cfg)     \
+       list_last_entry(&cfg->funcs, struct func_node, l)
+#define func_first_bb(func)    \
+       list_first_entry(&func->bbs, struct bb_node, l)
+#define func_last_bb(func)     \
+       list_last_entry(&func->bbs, struct bb_node, l)
+
+static struct func_node *cfg_append_func(struct cfg *cfg, struct bpf_insn *insn)
+{
+       struct func_node *new_func, *func;
+
+       list_for_each_entry(func, &cfg->funcs, l) {
+               if (func->start == insn)
+                       return func;
+               else if (func->start > insn)
+                       break;
+       }
+
+       func = func_prev(func);
+       new_func = calloc(1, sizeof(*new_func));
+       if (!new_func) {
+               p_err("OOM when allocating FUNC node");
+               return NULL;
+       }
+       new_func->start = insn;
+       new_func->idx = cfg->func_num;
+       list_add(&new_func->l, &func->l);
+       cfg->func_num++;
+
+       return new_func;
+}
+
+static struct bb_node *func_append_bb(struct func_node *func,
+                                     struct bpf_insn *insn)
+{
+       struct bb_node *new_bb, *bb;
+
+       list_for_each_entry(bb, &func->bbs, l) {
+               if (bb->head == insn)
+                       return bb;
+               else if (bb->head > insn)
+                       break;
+       }
+
+       bb = bb_prev(bb);
+       new_bb = calloc(1, sizeof(*new_bb));
+       if (!new_bb) {
+               p_err("OOM when allocating BB node");
+               return NULL;
+       }
+       new_bb->head = insn;
+       INIT_LIST_HEAD(&new_bb->e_prevs);
+       INIT_LIST_HEAD(&new_bb->e_succs);
+       list_add(&new_bb->l, &bb->l);
+
+       return new_bb;
+}
+
+static struct bb_node *func_insert_dummy_bb(struct list_head *after)
+{
+       struct bb_node *bb;
+
+       bb = calloc(1, sizeof(*bb));
+       if (!bb) {
+               p_err("OOM when allocating BB node");
+               return NULL;
+       }
+
+       INIT_LIST_HEAD(&bb->e_prevs);
+       INIT_LIST_HEAD(&bb->e_succs);
+       list_add(&bb->l, after);
+
+       return bb;
+}
+
+static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
+                               struct bpf_insn *end)
+{
+       struct func_node *func, *last_func;
+
+       func = cfg_append_func(cfg, cur);
+       if (!func)
+               return true;
+
+       for (; cur < end; cur++) {
+               if (cur->code != (BPF_JMP | BPF_CALL))
+                       continue;
+               if (cur->src_reg != BPF_PSEUDO_CALL)
+                       continue;
+               func = cfg_append_func(cfg, cur + cur->off + 1);
+               if (!func)
+                       return true;
+       }
+
+       last_func = cfg_last_func(cfg);
+       last_func->end = end - 1;
+       func = cfg_first_func(cfg);
+       list_for_each_entry_from(func, &last_func->l, l) {
+               func->end = func_next(func)->start - 1;
+       }
+
+       return false;
+}
+
+static bool func_partition_bb_head(struct func_node *func)
+{
+       struct bpf_insn *cur, *end;
+       struct bb_node *bb;
+
+       cur = func->start;
+       end = func->end;
+       INIT_LIST_HEAD(&func->bbs);
+       bb = func_append_bb(func, cur);
+       if (!bb)
+               return true;
+
+       for (; cur <= end; cur++) {
+               if (BPF_CLASS(cur->code) == BPF_JMP) {
+                       u8 opcode = BPF_OP(cur->code);
+
+                       if (opcode == BPF_EXIT || opcode == BPF_CALL)
+                               continue;
+
+                       bb = func_append_bb(func, cur + cur->off + 1);
+                       if (!bb)
+                               return true;
+
+                       if (opcode != BPF_JA) {
+                               bb = func_append_bb(func, cur + 1);
+                               if (!bb)
+                                       return true;
+                       }
+               }
+       }
+
+       return false;
+}
+
+static void func_partition_bb_tail(struct func_node *func)
+{
+       unsigned int bb_idx = NUM_FIXED_BLOCKS;
+       struct bb_node *bb, *last;
+
+       last = func_last_bb(func);
+       last->tail = func->end;
+       bb = func_first_bb(func);
+       list_for_each_entry_from(bb, &last->l, l) {
+               bb->tail = bb_next(bb)->head - 1;
+               bb->idx = bb_idx++;
+       }
+
+       last->idx = bb_idx++;
+       func->bb_num = bb_idx;
+}
+
+static bool func_add_special_bb(struct func_node *func)
+{
+       struct bb_node *bb;
+
+       bb = func_insert_dummy_bb(&func->bbs);
+       if (!bb)
+               return true;
+       bb->idx = ENTRY_BLOCK_INDEX;
+
+       bb = func_insert_dummy_bb(&func_last_bb(func)->l);
+       if (!bb)
+               return true;
+       bb->idx = EXIT_BLOCK_INDEX;
+
+       return false;
+}
+
+static bool func_partition_bb(struct func_node *func)
+{
+       if (func_partition_bb_head(func))
+               return true;
+
+       func_partition_bb_tail(func);
+
+       return false;
+}
+
+static struct bb_node *func_search_bb_with_head(struct func_node *func,
+                                               struct bpf_insn *insn)
+{
+       struct bb_node *bb;
+
+       list_for_each_entry(bb, &func->bbs, l) {
+               if (bb->head == insn)
+                       return bb;
+       }
+
+       return NULL;
+}
+
+static struct edge_node *new_edge(struct bb_node *src, struct bb_node *dst,
+                                 int flags)
+{
+       struct edge_node *e;
+
+       e = calloc(1, sizeof(*e));
+       if (!e) {
+               p_err("OOM when allocating edge node");
+               return NULL;
+       }
+
+       if (src)
+               e->src = src;
+       if (dst)
+               e->dst = dst;
+
+       e->flags |= flags;
+
+       return e;
+}
+
+static bool func_add_bb_edges(struct func_node *func)
+{
+       struct bpf_insn *insn;
+       struct edge_node *e;
+       struct bb_node *bb;
+
+       bb = entry_bb(func);
+       e = new_edge(bb, bb_next(bb), EDGE_FLAG_FALLTHROUGH);
+       if (!e)
+               return true;
+       list_add_tail(&e->l, &bb->e_succs);
+
+       bb = exit_bb(func);
+       e = new_edge(bb_prev(bb), bb, EDGE_FLAG_FALLTHROUGH);
+       if (!e)
+               return true;
+       list_add_tail(&e->l, &bb->e_prevs);
+
+       bb = entry_bb(func);
+       bb = bb_next(bb);
+       list_for_each_entry_from(bb, &exit_bb(func)->l, l) {
+               e = new_edge(bb, NULL, EDGE_FLAG_EMPTY);
+               if (!e)
+                       return true;
+               e->src = bb;
+
+               insn = bb->tail;
+               if (BPF_CLASS(insn->code) != BPF_JMP ||
+                   BPF_OP(insn->code) == BPF_EXIT) {
+                       e->dst = bb_next(bb);
+                       e->flags |= EDGE_FLAG_FALLTHROUGH;
+                       list_add_tail(&e->l, &bb->e_succs);
+                       continue;
+               } else if (BPF_OP(insn->code) == BPF_JA) {
+                       e->dst = func_search_bb_with_head(func,
+                                                         insn + insn->off + 1);
+                       e->flags |= EDGE_FLAG_JUMP;
+                       list_add_tail(&e->l, &bb->e_succs);
+                       continue;
+               }
+
+               e->dst = bb_next(bb);
+               e->flags |= EDGE_FLAG_FALLTHROUGH;
+               list_add_tail(&e->l, &bb->e_succs);
+
+               e = new_edge(bb, NULL, EDGE_FLAG_JUMP);
+               if (!e)
+                       return true;
+               e->src = bb;
+               e->dst = func_search_bb_with_head(func, insn + insn->off + 1);
+               list_add_tail(&e->l, &bb->e_succs);
+       }
+
+       return false;
+}
+
+static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
+{
+       int cnt = len / sizeof(*insn);
+       struct func_node *func;
+
+       INIT_LIST_HEAD(&cfg->funcs);
+
+       if (cfg_partition_funcs(cfg, insn, insn + cnt))
+               return true;
+
+       list_for_each_entry(func, &cfg->funcs, l) {
+               if (func_partition_bb(func) || func_add_special_bb(func))
+                       return true;
+
+               if (func_add_bb_edges(func))
+                       return true;
+       }
+
+       return false;
+}
+
+static void cfg_destroy(struct cfg *cfg)
+{
+       struct func_node *func, *func2;
+
+       list_for_each_entry_safe(func, func2, &cfg->funcs, l) {
+               struct bb_node *bb, *bb2;
+
+               list_for_each_entry_safe(bb, bb2, &func->bbs, l) {
+                       struct edge_node *e, *e2;
+
+                       list_for_each_entry_safe(e, e2, &bb->e_prevs, l) {
+                               list_del(&e->l);
+                               free(e);
+                       }
+
+                       list_for_each_entry_safe(e, e2, &bb->e_succs, l) {
+                               list_del(&e->l);
+                               free(e);
+                       }
+
+                       list_del(&bb->l);
+                       free(bb);
+               }
+
+               list_del(&func->l);
+               free(func);
+       }
+}
+
+static void draw_bb_node(struct func_node *func, struct bb_node *bb)
+{
+       const char *shape;
+
+       if (bb->idx == ENTRY_BLOCK_INDEX || bb->idx == EXIT_BLOCK_INDEX)
+               shape = "Mdiamond";
+       else
+               shape = "record";
+
+       printf("\tfn_%d_bb_%d [shape=%s,style=filled,label=\"",
+              func->idx, bb->idx, shape);
+
+       if (bb->idx == ENTRY_BLOCK_INDEX) {
+               printf("ENTRY");
+       } else if (bb->idx == EXIT_BLOCK_INDEX) {
+               printf("EXIT");
+       } else {
+               unsigned int start_idx;
+               struct dump_data dd = {};
+
+               printf("{");
+               kernel_syms_load(&dd);
+               start_idx = bb->head - func->start;
+               dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx);
+               kernel_syms_destroy(&dd);
+               printf("}");
+       }
+
+       printf("\"];\n\n");
+}
+
+static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb)
+{
+       const char *style = "\"solid,bold\"";
+       const char *color = "black";
+       int func_idx = func->idx;
+       struct edge_node *e;
+       int weight = 10;
+
+       if (list_empty(&bb->e_succs))
+               return;
+
+       list_for_each_entry(e, &bb->e_succs, l) {
+               printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=%s, color=%s, weight=%d, constraint=true",
+                      func_idx, e->src->idx, func_idx, e->dst->idx,
+                      style, color, weight);
+               printf("];\n");
+       }
+}
+
+static void func_output_bb_def(struct func_node *func)
+{
+       struct bb_node *bb;
+
+       list_for_each_entry(bb, &func->bbs, l) {
+               draw_bb_node(func, bb);
+       }
+}
+
+static void func_output_edges(struct func_node *func)
+{
+       int func_idx = func->idx;
+       struct bb_node *bb;
+
+       list_for_each_entry(bb, &func->bbs, l) {
+               draw_bb_succ_edges(func, bb);
+       }
+
+       /* Add an invisible edge from ENTRY to EXIT, this is to
+        * improve the graph layout.
+        */
+       printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=\"invis\", constraint=true];\n",
+              func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX);
+}
+
+static void cfg_dump(struct cfg *cfg)
+{
+       struct func_node *func;
+
+       printf("digraph \"DOT graph for eBPF program\" {\n");
+       list_for_each_entry(func, &cfg->funcs, l) {
+               printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n",
+                      func->idx, func->idx);
+               func_output_bb_def(func);
+               func_output_edges(func);
+               printf("}\n");
+       }
+       printf("}\n");
+}
+
+void dump_xlated_cfg(void *buf, unsigned int len)
+{
+       struct bpf_insn *insn = buf;
+       struct cfg cfg;
+
+       memset(&cfg, 0, sizeof(cfg));
+       if (cfg_build(&cfg, insn, len))
+               return;
+
+       cfg_dump(&cfg);
+
+       cfg_destroy(&cfg);
+}
diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h
new file mode 100644 (file)
index 0000000..2cc9bd9
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_CFG_H
+#define __BPF_TOOL_CFG_H
+
+void dump_xlated_cfg(void *buf, unsigned int len);
+
+#endif /* __BPF_TOOL_CFG_H */
index 0b482c0070e04fb06b65eb503959afc06170b472..465995281dcd36f2b25e97aef7316ebd0e441ff5 100644 (file)
 
 #include "main.h"
 
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC           0xcafe4a11
+#endif
+
 void p_err(const char *fmt, ...)
 {
        va_list ap;
index 185acfa229b592f0bf0b62d972e90c3746959283..1ec852d21d441b7015830def2ee8fdabba564bfe 100644 (file)
@@ -46,6 +46,9 @@
 
 #include "main.h"
 
+#define BATCH_LINE_LEN_MAX 65536
+#define BATCH_ARG_NB_MAX 4096
+
 const char *bin_name;
 static int last_argc;
 static char **last_argv;
@@ -157,6 +160,54 @@ void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep)
        }
 }
 
+/* Split command line into argument vector. */
+static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb)
+{
+       static const char ws[] = " \t\r\n";
+       char *cp = line;
+       int n_argc = 0;
+
+       while (*cp) {
+               /* Skip leading whitespace. */
+               cp += strspn(cp, ws);
+
+               if (*cp == '\0')
+                       break;
+
+               if (n_argc >= (maxargs - 1)) {
+                       p_err("too many arguments to command %d", cmd_nb);
+                       return -1;
+               }
+
+               /* Word begins with quote. */
+               if (*cp == '\'' || *cp == '"') {
+                       char quote = *cp++;
+
+                       n_argv[n_argc++] = cp;
+                       /* Find ending quote. */
+                       cp = strchr(cp, quote);
+                       if (!cp) {
+                               p_err("unterminated quoted string in command %d",
+                                     cmd_nb);
+                               return -1;
+                       }
+               } else {
+                       n_argv[n_argc++] = cp;
+
+                       /* Find end of word. */
+                       cp += strcspn(cp, ws);
+                       if (*cp == '\0')
+                               break;
+               }
+
+               /* Separate words. */
+               *cp++ = 0;
+       }
+       n_argv[n_argc] = NULL;
+
+       return n_argc;
+}
+
 static int do_batch(int argc, char **argv);
 
 static const struct cmd cmds[] = {
@@ -171,11 +222,12 @@ static const struct cmd cmds[] = {
 
 static int do_batch(int argc, char **argv)
 {
+       char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX];
+       char *n_argv[BATCH_ARG_NB_MAX];
        unsigned int lines = 0;
-       char *n_argv[4096];
-       char buf[65536];
        int n_argc;
        FILE *fp;
+       char *cp;
        int err;
        int i;
 
@@ -191,7 +243,10 @@ static int do_batch(int argc, char **argv)
        }
        NEXT_ARG();
 
-       fp = fopen(*argv, "r");
+       if (!strcmp(*argv, "-"))
+               fp = stdin;
+       else
+               fp = fopen(*argv, "r");
        if (!fp) {
                p_err("Can't open file (%s): %s", *argv, strerror(errno));
                return -1;
@@ -200,27 +255,45 @@ static int do_batch(int argc, char **argv)
        if (json_output)
                jsonw_start_array(json_wtr);
        while (fgets(buf, sizeof(buf), fp)) {
+               cp = strchr(buf, '#');
+               if (cp)
+                       *cp = '\0';
+
                if (strlen(buf) == sizeof(buf) - 1) {
                        errno = E2BIG;
                        break;
                }
 
-               n_argc = 0;
-               n_argv[n_argc] = strtok(buf, " \t\n");
-
-               while (n_argv[n_argc]) {
-                       n_argc++;
-                       if (n_argc == ARRAY_SIZE(n_argv)) {
-                               p_err("line %d has too many arguments, skip",
+               /* Append continuation lines if any (coming after a line ending
+                * with '\' in the batch file).
+                */
+               while ((cp = strstr(buf, "\\\n")) != NULL) {
+                       if (!fgets(contline, sizeof(contline), fp) ||
+                           strlen(contline) == 0) {
+                               p_err("missing continuation line on command %d",
                                      lines);
-                               n_argc = 0;
-                               break;
+                               err = -1;
+                               goto err_close;
+                       }
+
+                       cp = strchr(contline, '#');
+                       if (cp)
+                               *cp = '\0';
+
+                       if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) {
+                               p_err("command %d is too long", lines);
+                               err = -1;
+                               goto err_close;
                        }
-                       n_argv[n_argc] = strtok(NULL, " \t\n");
+                       buf[strlen(buf) - 2] = '\0';
+                       strcat(buf, contline);
                }
 
+               n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
                if (!n_argc)
                        continue;
+               if (n_argc < 0)
+                       goto err_close;
 
                if (json_output) {
                        jsonw_start_object(json_wtr);
@@ -247,11 +320,12 @@ static int do_batch(int argc, char **argv)
                p_err("reading batch file failed: %s", strerror(errno));
                err = -1;
        } else {
-               p_info("processed %d lines", lines);
+               p_info("processed %d commands", lines);
                err = 0;
        }
 err_close:
-       fclose(fp);
+       if (fp != stdin)
+               fclose(fp);
 
        if (json_output)
                jsonw_end_array(json_wtr);
index e549e329be8216646dbab3c11a6485d85d02f1c6..f7a810897eac4cbfd342eac8f75f1f062a6301f8 100644 (file)
@@ -47,8 +47,9 @@
 #include <bpf.h>
 #include <libbpf.h>
 
+#include "cfg.h"
 #include "main.h"
-#include "disasm.h"
+#include "xlated_dumper.h"
 
 static const char * const prog_type_name[] = {
        [BPF_PROG_TYPE_UNSPEC]          = "unspec",
@@ -407,259 +408,6 @@ static int do_show(int argc, char **argv)
        return err;
 }
 
-#define SYM_MAX_NAME   256
-
-struct kernel_sym {
-       unsigned long address;
-       char name[SYM_MAX_NAME];
-};
-
-struct dump_data {
-       unsigned long address_call_base;
-       struct kernel_sym *sym_mapping;
-       __u32 sym_count;
-       char scratch_buff[SYM_MAX_NAME];
-};
-
-static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
-{
-       return ((struct kernel_sym *)sym_a)->address -
-              ((struct kernel_sym *)sym_b)->address;
-}
-
-static void kernel_syms_load(struct dump_data *dd)
-{
-       struct kernel_sym *sym;
-       char buff[256];
-       void *tmp, *address;
-       FILE *fp;
-
-       fp = fopen("/proc/kallsyms", "r");
-       if (!fp)
-               return;
-
-       while (!feof(fp)) {
-               if (!fgets(buff, sizeof(buff), fp))
-                       break;
-               tmp = realloc(dd->sym_mapping,
-                             (dd->sym_count + 1) *
-                             sizeof(*dd->sym_mapping));
-               if (!tmp) {
-out:
-                       free(dd->sym_mapping);
-                       dd->sym_mapping = NULL;
-                       fclose(fp);
-                       return;
-               }
-               dd->sym_mapping = tmp;
-               sym = &dd->sym_mapping[dd->sym_count];
-               if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
-                       continue;
-               sym->address = (unsigned long)address;
-               if (!strcmp(sym->name, "__bpf_call_base")) {
-                       dd->address_call_base = sym->address;
-                       /* sysctl kernel.kptr_restrict was set */
-                       if (!sym->address)
-                               goto out;
-               }
-               if (sym->address)
-                       dd->sym_count++;
-       }
-
-       fclose(fp);
-
-       qsort(dd->sym_mapping, dd->sym_count,
-             sizeof(*dd->sym_mapping), kernel_syms_cmp);
-}
-
-static void kernel_syms_destroy(struct dump_data *dd)
-{
-       free(dd->sym_mapping);
-}
-
-static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
-                                            unsigned long key)
-{
-       struct kernel_sym sym = {
-               .address = key,
-       };
-
-       return dd->sym_mapping ?
-              bsearch(&sym, dd->sym_mapping, dd->sym_count,
-                      sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
-}
-
-static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-       va_list args;
-
-       va_start(args, fmt);
-       vprintf(fmt, args);
-       va_end(args);
-}
-
-static const char *print_call_pcrel(struct dump_data *dd,
-                                   struct kernel_sym *sym,
-                                   unsigned long address,
-                                   const struct bpf_insn *insn)
-{
-       if (sym)
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "%+d#%s", insn->off, sym->name);
-       else
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "%+d#0x%lx", insn->off, address);
-       return dd->scratch_buff;
-}
-
-static const char *print_call_helper(struct dump_data *dd,
-                                    struct kernel_sym *sym,
-                                    unsigned long address)
-{
-       if (sym)
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "%s", sym->name);
-       else
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "0x%lx", address);
-       return dd->scratch_buff;
-}
-
-static const char *print_call(void *private_data,
-                             const struct bpf_insn *insn)
-{
-       struct dump_data *dd = private_data;
-       unsigned long address = dd->address_call_base + insn->imm;
-       struct kernel_sym *sym;
-
-       sym = kernel_syms_search(dd, address);
-       if (insn->src_reg == BPF_PSEUDO_CALL)
-               return print_call_pcrel(dd, sym, address, insn);
-       else
-               return print_call_helper(dd, sym, address);
-}
-
-static const char *print_imm(void *private_data,
-                            const struct bpf_insn *insn,
-                            __u64 full_imm)
-{
-       struct dump_data *dd = private_data;
-
-       if (insn->src_reg == BPF_PSEUDO_MAP_FD)
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "map[id:%u]", insn->imm);
-       else
-               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-                        "0x%llx", (unsigned long long)full_imm);
-       return dd->scratch_buff;
-}
-
-static void dump_xlated_plain(struct dump_data *dd, void *buf,
-                             unsigned int len, bool opcodes)
-{
-       const struct bpf_insn_cbs cbs = {
-               .cb_print       = print_insn,
-               .cb_call        = print_call,
-               .cb_imm         = print_imm,
-               .private_data   = dd,
-       };
-       struct bpf_insn *insn = buf;
-       bool double_insn = false;
-       unsigned int i;
-
-       for (i = 0; i < len / sizeof(*insn); i++) {
-               if (double_insn) {
-                       double_insn = false;
-                       continue;
-               }
-
-               double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-               printf("% 4d: ", i);
-               print_bpf_insn(&cbs, NULL, insn + i, true);
-
-               if (opcodes) {
-                       printf("       ");
-                       fprint_hex(stdout, insn + i, 8, " ");
-                       if (double_insn && i < len - 1) {
-                               printf(" ");
-                               fprint_hex(stdout, insn + i + 1, 8, " ");
-                       }
-                       printf("\n");
-               }
-       }
-}
-
-static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-       unsigned int l = strlen(fmt);
-       char chomped_fmt[l];
-       va_list args;
-
-       va_start(args, fmt);
-       if (l > 0) {
-               strncpy(chomped_fmt, fmt, l - 1);
-               chomped_fmt[l - 1] = '\0';
-       }
-       jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
-       va_end(args);
-}
-
-static void dump_xlated_json(struct dump_data *dd, void *buf,
-                            unsigned int len, bool opcodes)
-{
-       const struct bpf_insn_cbs cbs = {
-               .cb_print       = print_insn_json,
-               .cb_call        = print_call,
-               .cb_imm         = print_imm,
-               .private_data   = dd,
-       };
-       struct bpf_insn *insn = buf;
-       bool double_insn = false;
-       unsigned int i;
-
-       jsonw_start_array(json_wtr);
-       for (i = 0; i < len / sizeof(*insn); i++) {
-               if (double_insn) {
-                       double_insn = false;
-                       continue;
-               }
-               double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-               jsonw_start_object(json_wtr);
-               jsonw_name(json_wtr, "disasm");
-               print_bpf_insn(&cbs, NULL, insn + i, true);
-
-               if (opcodes) {
-                       jsonw_name(json_wtr, "opcodes");
-                       jsonw_start_object(json_wtr);
-
-                       jsonw_name(json_wtr, "code");
-                       jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
-
-                       jsonw_name(json_wtr, "src_reg");
-                       jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
-
-                       jsonw_name(json_wtr, "dst_reg");
-                       jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
-
-                       jsonw_name(json_wtr, "off");
-                       print_hex_data_json((uint8_t *)(&insn[i].off), 2);
-
-                       jsonw_name(json_wtr, "imm");
-                       if (double_insn && i < len - 1)
-                               print_hex_data_json((uint8_t *)(&insn[i].imm),
-                                                   12);
-                       else
-                               print_hex_data_json((uint8_t *)(&insn[i].imm),
-                                                   4);
-                       jsonw_end_object(json_wtr);
-               }
-               jsonw_end_object(json_wtr);
-       }
-       jsonw_end_array(json_wtr);
-}
-
 static int do_dump(int argc, char **argv)
 {
        struct bpf_prog_info info = {};
@@ -668,6 +416,7 @@ static int do_dump(int argc, char **argv)
        unsigned int buf_size;
        char *filepath = NULL;
        bool opcodes = false;
+       bool visual = false;
        unsigned char *buf;
        __u32 *member_len;
        __u64 *member_ptr;
@@ -706,6 +455,9 @@ static int do_dump(int argc, char **argv)
        } else if (is_prefix(*argv, "opcodes")) {
                opcodes = true;
                NEXT_ARG();
+       } else if (is_prefix(*argv, "visual")) {
+               visual = true;
+               NEXT_ARG();
        }
 
        if (argc) {
@@ -777,27 +529,30 @@ static int do_dump(int argc, char **argv)
 
                if (json_output)
                        jsonw_null(json_wtr);
-       } else {
-               if (member_len == &info.jited_prog_len) {
-                       const char *name = NULL;
-
-                       if (info.ifindex) {
-                               name = ifindex_to_bfd_name_ns(info.ifindex,
-                                                             info.netns_dev,
-                                                             info.netns_ino);
-                               if (!name)
-                                       goto err_free;
-                       }
-
-                       disasm_print_insn(buf, *member_len, opcodes, name);
-               } else {
-                       kernel_syms_load(&dd);
-                       if (json_output)
-                               dump_xlated_json(&dd, buf, *member_len, opcodes);
-                       else
-                               dump_xlated_plain(&dd, buf, *member_len, opcodes);
-                       kernel_syms_destroy(&dd);
+       } else if (member_len == &info.jited_prog_len) {
+               const char *name = NULL;
+
+               if (info.ifindex) {
+                       name = ifindex_to_bfd_name_ns(info.ifindex,
+                                                     info.netns_dev,
+                                                     info.netns_ino);
+                       if (!name)
+                               goto err_free;
                }
+
+               disasm_print_insn(buf, *member_len, opcodes, name);
+       } else if (visual) {
+               if (json_output)
+                       jsonw_null(json_wtr);
+               else
+                       dump_xlated_cfg(buf, *member_len);
+       } else {
+               kernel_syms_load(&dd);
+               if (json_output)
+                       dump_xlated_json(&dd, buf, *member_len, opcodes);
+               else
+                       dump_xlated_plain(&dd, buf, *member_len, opcodes);
+               kernel_syms_destroy(&dd);
        }
 
        free(buf);
@@ -851,7 +606,7 @@ static int do_help(int argc, char **argv)
 
        fprintf(stderr,
                "Usage: %s %s { show | list } [PROG]\n"
-               "       %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
+               "       %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
                "       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
                "       %s %s pin   PROG FILE\n"
                "       %s %s load  OBJ  FILE\n"
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
new file mode 100644 (file)
index 0000000..20da835
--- /dev/null
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "disasm.h"
+#include "json_writer.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
+{
+       return ((struct kernel_sym *)sym_a)->address -
+              ((struct kernel_sym *)sym_b)->address;
+}
+
+void kernel_syms_load(struct dump_data *dd)
+{
+       struct kernel_sym *sym;
+       char buff[256];
+       void *tmp, *address;
+       FILE *fp;
+
+       fp = fopen("/proc/kallsyms", "r");
+       if (!fp)
+               return;
+
+       while (!feof(fp)) {
+               if (!fgets(buff, sizeof(buff), fp))
+                       break;
+               tmp = realloc(dd->sym_mapping,
+                             (dd->sym_count + 1) *
+                             sizeof(*dd->sym_mapping));
+               if (!tmp) {
+out:
+                       free(dd->sym_mapping);
+                       dd->sym_mapping = NULL;
+                       fclose(fp);
+                       return;
+               }
+               dd->sym_mapping = tmp;
+               sym = &dd->sym_mapping[dd->sym_count];
+               if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
+                       continue;
+               sym->address = (unsigned long)address;
+               if (!strcmp(sym->name, "__bpf_call_base")) {
+                       dd->address_call_base = sym->address;
+                       /* sysctl kernel.kptr_restrict was set */
+                       if (!sym->address)
+                               goto out;
+               }
+               if (sym->address)
+                       dd->sym_count++;
+       }
+
+       fclose(fp);
+
+       qsort(dd->sym_mapping, dd->sym_count,
+             sizeof(*dd->sym_mapping), kernel_syms_cmp);
+}
+
+void kernel_syms_destroy(struct dump_data *dd)
+{
+       free(dd->sym_mapping);
+}
+
+static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+                                            unsigned long key)
+{
+       struct kernel_sym sym = {
+               .address = key,
+       };
+
+       return dd->sym_mapping ?
+              bsearch(&sym, dd->sym_mapping, dd->sym_count,
+                      sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
+}
+
+static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+       va_list args;
+
+       va_start(args, fmt);
+       vprintf(fmt, args);
+       va_end(args);
+}
+
+static void
+print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+       char buf[64], *p;
+       va_list args;
+
+       va_start(args, fmt);
+       vsnprintf(buf, sizeof(buf), fmt, args);
+       va_end(args);
+
+       p = buf;
+       while (*p != '\0') {
+               if (*p == '\n') {
+                       memmove(p + 3, p, strlen(buf) + 1 - (p - buf));
+                       /* Align each instruction dump row left. */
+                       *p++ = '\\';
+                       *p++ = 'l';
+                       /* Output multiline concatenation. */
+                       *p++ = '\\';
+               } else if (*p == '<' || *p == '>' || *p == '|' || *p == '&') {
+                       memmove(p + 1, p, strlen(buf) + 1 - (p - buf));
+                       /* Escape special character. */
+                       *p++ = '\\';
+               }
+
+               p++;
+       }
+
+       printf("%s", buf);
+}
+
+static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+       unsigned int l = strlen(fmt);
+       char chomped_fmt[l];
+       va_list args;
+
+       va_start(args, fmt);
+       if (l > 0) {
+               strncpy(chomped_fmt, fmt, l - 1);
+               chomped_fmt[l - 1] = '\0';
+       }
+       jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
+       va_end(args);
+}
+
+static const char *print_call_pcrel(struct dump_data *dd,
+                                   struct kernel_sym *sym,
+                                   unsigned long address,
+                                   const struct bpf_insn *insn)
+{
+       if (sym)
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "%+d#%s", insn->off, sym->name);
+       else
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "%+d#0x%lx", insn->off, address);
+       return dd->scratch_buff;
+}
+
+static const char *print_call_helper(struct dump_data *dd,
+                                    struct kernel_sym *sym,
+                                    unsigned long address)
+{
+       if (sym)
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "%s", sym->name);
+       else
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "0x%lx", address);
+       return dd->scratch_buff;
+}
+
+static const char *print_call(void *private_data,
+                             const struct bpf_insn *insn)
+{
+       struct dump_data *dd = private_data;
+       unsigned long address = dd->address_call_base + insn->imm;
+       struct kernel_sym *sym;
+
+       sym = kernel_syms_search(dd, address);
+       if (insn->src_reg == BPF_PSEUDO_CALL)
+               return print_call_pcrel(dd, sym, address, insn);
+       else
+               return print_call_helper(dd, sym, address);
+}
+
+static const char *print_imm(void *private_data,
+                            const struct bpf_insn *insn,
+                            __u64 full_imm)
+{
+       struct dump_data *dd = private_data;
+
+       if (insn->src_reg == BPF_PSEUDO_MAP_FD)
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "map[id:%u]", insn->imm);
+       else
+               snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+                        "0x%llx", (unsigned long long)full_imm);
+       return dd->scratch_buff;
+}
+
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+                     bool opcodes)
+{
+       const struct bpf_insn_cbs cbs = {
+               .cb_print       = print_insn_json,
+               .cb_call        = print_call,
+               .cb_imm         = print_imm,
+               .private_data   = dd,
+       };
+       struct bpf_insn *insn = buf;
+       bool double_insn = false;
+       unsigned int i;
+
+       jsonw_start_array(json_wtr);
+       for (i = 0; i < len / sizeof(*insn); i++) {
+               if (double_insn) {
+                       double_insn = false;
+                       continue;
+               }
+               double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+               jsonw_start_object(json_wtr);
+               jsonw_name(json_wtr, "disasm");
+               print_bpf_insn(&cbs, NULL, insn + i, true);
+
+               if (opcodes) {
+                       jsonw_name(json_wtr, "opcodes");
+                       jsonw_start_object(json_wtr);
+
+                       jsonw_name(json_wtr, "code");
+                       jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
+
+                       jsonw_name(json_wtr, "src_reg");
+                       jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
+
+                       jsonw_name(json_wtr, "dst_reg");
+                       jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
+
+                       jsonw_name(json_wtr, "off");
+                       print_hex_data_json((uint8_t *)(&insn[i].off), 2);
+
+                       jsonw_name(json_wtr, "imm");
+                       if (double_insn && i < len - 1)
+                               print_hex_data_json((uint8_t *)(&insn[i].imm),
+                                                   12);
+                       else
+                               print_hex_data_json((uint8_t *)(&insn[i].imm),
+                                                   4);
+                       jsonw_end_object(json_wtr);
+               }
+               jsonw_end_object(json_wtr);
+       }
+       jsonw_end_array(json_wtr);
+}
+
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+                      bool opcodes)
+{
+       const struct bpf_insn_cbs cbs = {
+               .cb_print       = print_insn,
+               .cb_call        = print_call,
+               .cb_imm         = print_imm,
+               .private_data   = dd,
+       };
+       struct bpf_insn *insn = buf;
+       bool double_insn = false;
+       unsigned int i;
+
+       for (i = 0; i < len / sizeof(*insn); i++) {
+               if (double_insn) {
+                       double_insn = false;
+                       continue;
+               }
+
+               double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+               printf("% 4d: ", i);
+               print_bpf_insn(&cbs, NULL, insn + i, true);
+
+               if (opcodes) {
+                       printf("       ");
+                       fprint_hex(stdout, insn + i, 8, " ");
+                       if (double_insn && i < len - 1) {
+                               printf(" ");
+                               fprint_hex(stdout, insn + i + 1, 8, " ");
+                       }
+                       printf("\n");
+               }
+       }
+}
+
+void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
+                          unsigned int start_idx)
+{
+       const struct bpf_insn_cbs cbs = {
+               .cb_print       = print_insn_for_graph,
+               .cb_call        = print_call,
+               .cb_imm         = print_imm,
+               .private_data   = dd,
+       };
+       struct bpf_insn *insn_start = buf_start;
+       struct bpf_insn *insn_end = buf_end;
+       struct bpf_insn *cur = insn_start;
+
+       for (; cur <= insn_end; cur++) {
+               printf("% 4d: ", (int)(cur - insn_start + start_idx));
+               print_bpf_insn(&cbs, NULL, cur, true);
+               if (cur != insn_end)
+                       printf(" | ");
+       }
+}
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
new file mode 100644 (file)
index 0000000..b34affa
--- /dev/null
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_XLATED_DUMPER_H
+#define __BPF_TOOL_XLATED_DUMPER_H
+
+#define SYM_MAX_NAME   256
+
+struct kernel_sym {
+       unsigned long address;
+       char name[SYM_MAX_NAME];
+};
+
+struct dump_data {
+       unsigned long address_call_base;
+       struct kernel_sym *sym_mapping;
+       __u32 sym_count;
+       char scratch_buff[SYM_MAX_NAME + 8];
+};
+
+void kernel_syms_load(struct dump_data *dd);
+void kernel_syms_destroy(struct dump_data *dd);
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+                     bool opcodes);
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+                      bool opcodes);
+void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end,
+                          unsigned int start_index);
+
+#endif
index db6bdc3751268351da3126f57566639fce355b12..d245c41213ac20850a1b9b58f1925b827899ff04 100644 (file)
@@ -133,6 +133,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_SOCK_OPS,
        BPF_PROG_TYPE_SK_SKB,
        BPF_PROG_TYPE_CGROUP_DEVICE,
+       BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +144,7 @@ enum bpf_attach_type {
        BPF_SK_SKB_STREAM_PARSER,
        BPF_SK_SKB_STREAM_VERDICT,
        BPF_CGROUP_DEVICE,
+       BPF_SK_MSG_VERDICT,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -231,6 +233,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY           (1U << 3)
 #define BPF_F_WRONLY           (1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID   (1U << 5)
+
+enum bpf_stack_build_id_status {
+       /* user space need an empty entry to identify end of a trace */
+       BPF_STACK_BUILD_ID_EMPTY = 0,
+       /* with valid build_id and offset */
+       BPF_STACK_BUILD_ID_VALID = 1,
+       /* couldn't get build_id, fallback to ip */
+       BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+       __s32           status;
+       unsigned char   build_id[BPF_BUILD_ID_SIZE];
+       union {
+               __u64   offset;
+               __u64   ip;
+       };
+};
+
 union bpf_attr {
        struct { /* anonymous struct used by BPF_MAP_CREATE command */
                __u32   map_type;       /* one of enum bpf_map_type */
@@ -696,6 +720,15 @@ union bpf_attr {
  * int bpf_override_return(pt_regs, rc)
  *     @pt_regs: pointer to struct pt_regs
  *     @rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -757,7 +790,11 @@ union bpf_attr {
        FN(perf_prog_read_value),       \
        FN(getsockopt),                 \
        FN(override_return),            \
-       FN(sock_ops_cb_flags_set),
+       FN(sock_ops_cb_flags_set),      \
+       FN(msg_redirect_map),           \
+       FN(msg_apply_bytes),            \
+       FN(msg_cork_bytes),             \
+       FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -919,6 +956,14 @@ enum sk_action {
        SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+       void *data;
+       void *data_end;
+};
+
 #define BPF_TAG_SIZE   8
 
 struct bpf_prog_info {
index 0fb5ef939732517293f222f2c85d88f2b4c1e973..7b26d4b0b0529649816ec1523d225eec7fa8ee26 100644 (file)
@@ -761,6 +761,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
 #define KVM_GET_EMULATED_CPUID   _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
+#define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
 
 /*
  * Extension capability list.
@@ -934,6 +935,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_AIS_MIGRATION 150
 #define KVM_CAP_PPC_GET_CPU_CHAR 151
 #define KVM_CAP_S390_BPB 152
+#define KVM_CAP_GET_MSR_FEATURES 153
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
index a5684d0968b4fd087905e659c0ce80bd170434c2..5898c22ba310bdf27f626a2dfb7bc45e5cf2a505 100755 (executable)
@@ -33,7 +33,7 @@ import resource
 import struct
 import re
 import subprocess
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 
 VMX_EXIT_REASONS = {
     'EXCEPTION_NMI':        0,
@@ -228,6 +228,7 @@ IOCTL_NUMBERS = {
 }
 
 ENCODING = locale.getpreferredencoding(False)
+TRACE_FILTER = re.compile(r'^[^\(]*$')
 
 
 class Arch(object):
@@ -260,6 +261,11 @@ class Arch(object):
                     return ArchX86(SVM_EXIT_REASONS)
                 return
 
+    def tracepoint_is_child(self, field):
+        if (TRACE_FILTER.match(field)):
+            return None
+        return field.split('(', 1)[0]
+
 
 class ArchX86(Arch):
     def __init__(self, exit_reasons):
@@ -267,6 +273,10 @@ class ArchX86(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = exit_reasons
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchPPC(Arch):
     def __init__(self):
@@ -282,6 +292,10 @@ class ArchPPC(Arch):
         self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
         self.exit_reasons = {}
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchA64(Arch):
     def __init__(self):
@@ -289,6 +303,10 @@ class ArchA64(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = AARCH64_EXIT_REASONS
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchS390(Arch):
     def __init__(self):
@@ -296,6 +314,12 @@ class ArchS390(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = None
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        if field.startswith('instruction_'):
+            return 'exit_instruction'
+
+
 ARCH = Arch.get_arch()
 
 
@@ -331,9 +355,6 @@ class perf_event_attr(ctypes.Structure):
 PERF_TYPE_TRACEPOINT = 2
 PERF_FORMAT_GROUP = 1 << 3
 
-PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
-PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
-
 
 class Group(object):
     """Represents a perf event group."""
@@ -376,8 +397,8 @@ class Event(object):
         self.syscall = self.libc.syscall
         self.name = name
         self.fd = None
-        self.setup_event(group, trace_cpu, trace_pid, trace_point,
-                         trace_filter, trace_set)
+        self._setup_event(group, trace_cpu, trace_pid, trace_point,
+                          trace_filter, trace_set)
 
     def __del__(self):
         """Closes the event's file descriptor.
@@ -390,7 +411,7 @@ class Event(object):
         if self.fd:
             os.close(self.fd)
 
-    def perf_event_open(self, attr, pid, cpu, group_fd, flags):
+    def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
         """Wrapper for the sys_perf_evt_open() syscall.
 
         Used to set up performance events, returns a file descriptor or -1
@@ -409,7 +430,7 @@ class Event(object):
                             ctypes.c_int(pid), ctypes.c_int(cpu),
                             ctypes.c_int(group_fd), ctypes.c_long(flags))
 
-    def setup_event_attribute(self, trace_set, trace_point):
+    def _setup_event_attribute(self, trace_set, trace_point):
         """Returns an initialized ctype perf_event_attr struct."""
 
         id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
@@ -419,8 +440,8 @@ class Event(object):
         event_attr.config = int(open(id_path).read())
         return event_attr
 
-    def setup_event(self, group, trace_cpu, trace_pid, trace_point,
-                    trace_filter, trace_set):
+    def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
+                     trace_filter, trace_set):
         """Sets up the perf event in Linux.
 
         Issues the syscall to register the event in the kernel and
@@ -428,7 +449,7 @@ class Event(object):
 
         """
 
-        event_attr = self.setup_event_attribute(trace_set, trace_point)
+        event_attr = self._setup_event_attribute(trace_set, trace_point)
 
         # First event will be group leader.
         group_leader = -1
@@ -437,8 +458,8 @@ class Event(object):
         if group.events:
             group_leader = group.events[0].fd
 
-        fd = self.perf_event_open(event_attr, trace_pid,
-                                  trace_cpu, group_leader, 0)
+        fd = self._perf_event_open(event_attr, trace_pid,
+                                   trace_cpu, group_leader, 0)
         if fd == -1:
             err = ctypes.get_errno()
             raise OSError(err, os.strerror(err),
@@ -475,6 +496,10 @@ class Event(object):
 
 class Provider(object):
     """Encapsulates functionalities used by all providers."""
+    def __init__(self, pid):
+        self.child_events = False
+        self.pid = pid
+
     @staticmethod
     def is_field_wanted(fields_filter, field):
         """Indicate whether field is valid according to fields_filter."""
@@ -500,12 +525,12 @@ class TracepointProvider(Provider):
     """
     def __init__(self, pid, fields_filter):
         self.group_leaders = []
-        self.filters = self.get_filters()
+        self.filters = self._get_filters()
         self.update_fields(fields_filter)
-        self.pid = pid
+        super(TracepointProvider, self).__init__(pid)
 
     @staticmethod
-    def get_filters():
+    def _get_filters():
         """Returns a dict of trace events, their filter ids and
         the values that can be filtered.
 
@@ -521,8 +546,8 @@ class TracepointProvider(Provider):
             filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
         return filters
 
-    def get_available_fields(self):
-        """Returns a list of available event's of format 'event name(filter
+    def _get_available_fields(self):
+        """Returns a list of available events of format 'event name(filter
         name)'.
 
         All available events have directories under
@@ -549,11 +574,12 @@ class TracepointProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self.fields = [field for field in self.get_available_fields()
-                       if self.is_field_wanted(fields_filter, field)]
+        self.fields = [field for field in self._get_available_fields()
+                       if self.is_field_wanted(fields_filter, field) or
+                       ARCH.tracepoint_is_child(field)]
 
     @staticmethod
-    def get_online_cpus():
+    def _get_online_cpus():
         """Returns a list of cpu id integers."""
         def parse_int_list(list_string):
             """Returns an int list from a string of comma separated integers and
@@ -575,17 +601,17 @@ class TracepointProvider(Provider):
             cpu_string = cpu_list.readline()
             return parse_int_list(cpu_string)
 
-    def setup_traces(self):
+    def _setup_traces(self):
         """Creates all event and group objects needed to be able to retrieve
         data."""
-        fields = self.get_available_fields()
+        fields = self._get_available_fields()
         if self._pid > 0:
             # Fetch list of all threads of the monitored pid, as qemu
             # starts a thread for each vcpu.
             path = os.path.join('/proc', str(self._pid), 'task')
             groupids = self.walkdir(path)[1]
         else:
-            groupids = self.get_online_cpus()
+            groupids = self._get_online_cpus()
 
         # The constant is needed as a buffer for python libs, std
         # streams and other files that the script opens.
@@ -663,7 +689,7 @@ class TracepointProvider(Provider):
         # The garbage collector will get rid of all Event/Group
         # objects and open files after removing the references.
         self.group_leaders = []
-        self.setup_traces()
+        self._setup_traces()
         self.fields = self._fields
 
     def read(self, by_guest=0):
@@ -671,8 +697,12 @@ class TracepointProvider(Provider):
         ret = defaultdict(int)
         for group in self.group_leaders:
             for name, val in group.read().items():
-                if name in self._fields:
-                    ret[name] += val
+                if name not in self._fields:
+                    continue
+                parent = ARCH.tracepoint_is_child(name)
+                if parent:
+                    name += ' ' + parent
+                ret[name] += val
         return ret
 
     def reset(self):
@@ -690,11 +720,11 @@ class DebugfsProvider(Provider):
         self._baseline = {}
         self.do_read = True
         self.paths = []
-        self.pid = pid
+        super(DebugfsProvider, self).__init__(pid)
         if include_past:
-            self.restore()
+            self._restore()
 
-    def get_available_fields(self):
+    def _get_available_fields(self):
         """"Returns a list of available fields.
 
         The fields are all available KVM debugfs files
@@ -704,8 +734,9 @@ class DebugfsProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self._fields = [field for field in self.get_available_fields()
-                        if self.is_field_wanted(fields_filter, field)]
+        self._fields = [field for field in self._get_available_fields()
+                        if self.is_field_wanted(fields_filter, field) or
+                        ARCH.debugfs_is_child(field)]
 
     @property
     def fields(self):
@@ -758,7 +789,7 @@ class DebugfsProvider(Provider):
                     paths.append(dir)
         for path in paths:
             for field in self._fields:
-                value = self.read_field(field, path)
+                value = self._read_field(field, path)
                 key = path + field
                 if reset == 1:
                     self._baseline[key] = value
@@ -766,20 +797,21 @@ class DebugfsProvider(Provider):
                     self._baseline[key] = 0
                 if self._baseline.get(key, -1) == -1:
                     self._baseline[key] = value
-                increment = (results.get(field, 0) + value -
-                             self._baseline.get(key, 0))
-                if by_guest:
-                    pid = key.split('-')[0]
-                    if pid in results:
-                        results[pid] += increment
-                    else:
-                        results[pid] = increment
+                parent = ARCH.debugfs_is_child(field)
+                if parent:
+                    field = field + ' ' + parent
+                else:
+                    if by_guest:
+                        field = key.split('-')[0]    # set 'field' to 'pid'
+                increment = value - self._baseline.get(key, 0)
+                if field in results:
+                    results[field] += increment
                 else:
                     results[field] = increment
 
         return results
 
-    def read_field(self, field, path):
+    def _read_field(self, field, path):
         """Returns the value of a single field from a specific VM."""
         try:
             return int(open(os.path.join(PATH_DEBUGFS_KVM,
@@ -794,12 +826,15 @@ class DebugfsProvider(Provider):
         self._baseline = {}
         self.read(1)
 
-    def restore(self):
+    def _restore(self):
         """Reset field counters"""
         self._baseline = {}
         self.read(2)
 
 
+EventStat = namedtuple('EventStat', ['value', 'delta'])
+
+
 class Stats(object):
     """Manages the data providers and the data they provide.
 
@@ -808,13 +843,13 @@ class Stats(object):
 
     """
     def __init__(self, options):
-        self.providers = self.get_providers(options)
+        self.providers = self._get_providers(options)
         self._pid_filter = options.pid
         self._fields_filter = options.fields
         self.values = {}
+        self._child_events = False
 
-    @staticmethod
-    def get_providers(options):
+    def _get_providers(self, options):
         """Returns a list of data providers depending on the passed options."""
         providers = []
 
@@ -826,7 +861,7 @@ class Stats(object):
 
         return providers
 
-    def update_provider_filters(self):
+    def _update_provider_filters(self):
         """Propagates fields filters to providers."""
         # As we reset the counters when updating the fields we can
         # also clear the cache of old values.
@@ -847,7 +882,7 @@ class Stats(object):
     def fields_filter(self, fields_filter):
         if fields_filter != self._fields_filter:
             self._fields_filter = fields_filter
-            self.update_provider_filters()
+            self._update_provider_filters()
 
     @property
     def pid_filter(self):
@@ -861,16 +896,33 @@ class Stats(object):
             for provider in self.providers:
                 provider.pid = self._pid_filter
 
+    @property
+    def child_events(self):
+        return self._child_events
+
+    @child_events.setter
+    def child_events(self, val):
+        self._child_events = val
+        for provider in self.providers:
+            provider.child_events = val
+
     def get(self, by_guest=0):
         """Returns a dict with field -> (value, delta to last value) of all
-        provider data."""
+        provider data.
+        Key formats:
+          * plain: 'key' is event name
+          * child-parent: 'key' is in format '<child> <parent>'
+          * pid: 'key' is the pid of the guest, and the record contains the
+               aggregated event data
+        These formats are generated by the providers, and handled in class TUI.
+        """
         for provider in self.providers:
             new = provider.read(by_guest=by_guest)
-            for key in new if by_guest else provider.fields:
-                oldval = self.values.get(key, (0, 0))[0]
+            for key in new:
+                oldval = self.values.get(key, EventStat(0, 0)).value
                 newval = new.get(key, 0)
                 newdelta = newval - oldval
-                self.values[key] = (newval, newdelta)
+                self.values[key] = EventStat(newval, newdelta)
         return self.values
 
     def toggle_display_guests(self, to_pid):
@@ -899,10 +951,10 @@ class Stats(object):
         self.get(to_pid)
         return 0
 
+
 DELAY_DEFAULT = 3.0
 MAX_GUEST_NAME_LEN = 48
 MAX_REGEX_LEN = 44
-DEFAULT_REGEX = r'^[^\(]*$'
 SORT_DEFAULT = 0
 
 
@@ -969,7 +1021,7 @@ class Tui(object):
 
         return res
 
-    def print_all_gnames(self, row):
+    def _print_all_gnames(self, row):
         """Print a list of all running guests along with their pids."""
         self.screen.addstr(row, 2, '%8s  %-60s' %
                            ('Pid', 'Guest Name (fuzzy list, might be '
@@ -1032,19 +1084,13 @@ class Tui(object):
 
         return name
 
-    def update_drilldown(self):
-        """Sets or removes a filter that only allows fields without braces."""
-        if not self.stats.fields_filter:
-            self.stats.fields_filter = DEFAULT_REGEX
-
-        elif self.stats.fields_filter == DEFAULT_REGEX:
-            self.stats.fields_filter = None
-
-    def update_pid(self, pid):
+    def _update_pid(self, pid):
         """Propagates pid selection to stats object."""
+        self.screen.addstr(4, 1, 'Updating pid filter...')
+        self.screen.refresh()
         self.stats.pid_filter = pid
 
-    def refresh_header(self, pid=None):
+    def _refresh_header(self, pid=None):
         """Refreshes the header."""
         if pid is None:
             pid = self.stats.pid_filter
@@ -1059,8 +1105,7 @@ class Tui(object):
                                .format(pid, gname), curses.A_BOLD)
         else:
             self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
-        if self.stats.fields_filter and self.stats.fields_filter \
-           != DEFAULT_REGEX:
+        if self.stats.fields_filter:
             regex = self.stats.fields_filter
             if len(regex) > MAX_REGEX_LEN:
                 regex = regex[:MAX_REGEX_LEN] + '...'
@@ -1075,56 +1120,99 @@ class Tui(object):
         self.screen.addstr(4, 1, 'Collecting data...')
         self.screen.refresh()
 
-    def refresh_body(self, sleeptime):
+    def _refresh_body(self, sleeptime):
+        def is_child_field(field):
+            return field.find('(') != -1
+
+        def insert_child(sorted_items, child, values, parent):
+            num = len(sorted_items)
+            for i in range(0, num):
+                # only add child if parent is present
+                if parent.startswith(sorted_items[i][0]):
+                    sorted_items.insert(i + 1, ('  ' + child, values))
+
+        def get_sorted_events(self, stats):
+            """ separate parent and child events """
+            if self._sorting == SORT_DEFAULT:
+                def sortkey((_k, v)):
+                    # sort by (delta value, overall value)
+                    return (v.delta, v.value)
+            else:
+                def sortkey((_k, v)):
+                    # sort by overall value
+                    return v.value
+
+            childs = []
+            sorted_items = []
+            # we can't rule out child events to appear prior to parents even
+            # when sorted - separate out all children first, and add in later
+            for key, values in sorted(stats.items(), key=sortkey,
+                                      reverse=True):
+                if values == (0, 0):
+                    continue
+                if key.find(' ') != -1:
+                    if not self.stats.child_events:
+                        continue
+                    childs.insert(0, (key, values))
+                else:
+                    sorted_items.append((key, values))
+            if self.stats.child_events:
+                for key, values in childs:
+                    (child, parent) = key.split(' ')
+                    insert_child(sorted_items, child, values, parent)
+
+            return sorted_items
+
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
         stats = self.stats.get(self._display_guests)
-
-        def sortCurAvg(x):
-            # sort by current events if available
-            if stats[x][1]:
-                return (-stats[x][1], -stats[x][0])
+        total = 0.
+        ctotal = 0.
+        for key, values in stats.items():
+            if self._display_guests:
+                if self.get_gname_from_pid(key):
+                    total += values.value
+                continue
+            if not key.find(' ') != -1:
+                total += values.value
             else:
-                return (0, -stats[x][0])
+                ctotal += values.value
+        if total == 0.:
+            # we don't have any fields, or all non-child events are filtered
+            total = ctotal
 
-        def sortTotal(x):
-            # sort by totals
-            return (0, -stats[x][0])
-        total = 0.
-        for key in stats.keys():
-            if key.find('(') is -1:
-                total += stats[key][0]
-        if self._sorting == SORT_DEFAULT:
-            sortkey = sortCurAvg
-        else:
-            sortkey = sortTotal
+        # print events
         tavg = 0
-        for key in sorted(stats.keys(), key=sortkey):
-            if row >= self.screen.getmaxyx()[0] - 1:
-                break
-            values = stats[key]
-            if not values[0] and not values[1]:
+        tcur = 0
+        for key, values in get_sorted_events(self, stats):
+            if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
                 break
-            if values[0] is not None:
-                cur = int(round(values[1] / sleeptime)) if values[1] else ''
-                if self._display_guests:
-                    key = self.get_gname_from_pid(key)
-                self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
-                                   (key, values[0], values[0] * 100 / total,
-                                    cur))
-                if cur is not '' and key.find('(') is -1:
-                    tavg += cur
+            if self._display_guests:
+                key = self.get_gname_from_pid(key)
+                if not key:
+                    continue
+            cur = int(round(values.delta / sleeptime)) if values.delta else ''
+            if key[0] != ' ':
+                if values.delta:
+                    tcur += values.delta
+                ptotal = values.value
+                ltotal = total
+            else:
+                ltotal = ptotal
+            self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
+                               values.value,
+                               values.value * 100 / float(ltotal), cur))
             row += 1
         if row == 3:
             self.screen.addstr(4, 1, 'No matching events reported yet')
-        else:
+        if row > 4:
+            tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
             self.screen.addstr(row, 1, '%-40s %10d        %8s' %
-                               ('Total', total, tavg if tavg else ''),
-                               curses.A_BOLD)
+                               ('Total', total, tavg), curses.A_BOLD)
         self.screen.refresh()
 
-    def show_msg(self, text):
+    def _show_msg(self, text):
         """Display message centered text and exit on key press"""
         hint = 'Press any key to continue'
         curses.cbreak()
@@ -1139,16 +1227,16 @@ class Tui(object):
                            curses.A_STANDOUT)
         self.screen.getkey()
 
-    def show_help_interactive(self):
+    def _show_help_interactive(self):
         """Display help with list of interactive commands"""
         msg = ('   b     toggle events by guests (debugfs only, honors'
                ' filters)',
                '   c     clear filter',
                '   f     filter by regular expression',
-               '   g     filter by guest name',
+               '   g     filter by guest name/PID',
                '   h     display interactive commands reference',
                '   o     toggle sorting order (Total vs CurAvg/s)',
-               '   p     filter by PID',
+               '   p     filter by guest name/PID',
                '   q     quit',
                '   r     reset stats',
                '   s     set update interval',
@@ -1165,14 +1253,15 @@ class Tui(object):
             self.screen.addstr(row, 0, line)
             row += 1
         self.screen.getkey()
-        self.refresh_header()
+        self._refresh_header()
 
-    def show_filter_selection(self):
+    def _show_filter_selection(self):
         """Draws filter selection mask.
 
         Asks for a valid regex and sets the fields filter accordingly.
 
         """
+        msg = ''
         while True:
             self.screen.erase()
             self.screen.addstr(0, 0,
@@ -1181,61 +1270,25 @@ class Tui(object):
             self.screen.addstr(2, 0,
                                "Current regex: {0}"
                                .format(self.stats.fields_filter))
+            self.screen.addstr(5, 0, msg)
             self.screen.addstr(3, 0, "New regex: ")
             curses.echo()
             regex = self.screen.getstr().decode(ENCODING)
             curses.noecho()
             if len(regex) == 0:
-                self.stats.fields_filter = DEFAULT_REGEX
-                self.refresh_header()
+                self.stats.fields_filter = ''
+                self._refresh_header()
                 return
             try:
                 re.compile(regex)
                 self.stats.fields_filter = regex
-                self.refresh_header()
+                self._refresh_header()
                 return
             except re.error:
+                msg = '"' + regex + '": Not a valid regular expression'
                 continue
 
-    def show_vm_selection_by_pid(self):
-        """Draws PID selection mask.
-
-        Asks for a pid until a valid pid or 0 has been entered.
-
-        """
-        msg = ''
-        while True:
-            self.screen.erase()
-            self.screen.addstr(0, 0,
-                               'Show statistics for specific pid.',
-                               curses.A_BOLD)
-            self.screen.addstr(1, 0,
-                               'This might limit the shown data to the trace '
-                               'statistics.')
-            self.screen.addstr(5, 0, msg)
-            self.print_all_gnames(7)
-
-            curses.echo()
-            self.screen.addstr(3, 0, "Pid [0 or pid]: ")
-            pid = self.screen.getstr().decode(ENCODING)
-            curses.noecho()
-
-            try:
-                if len(pid) > 0:
-                    pid = int(pid)
-                    if pid != 0 and not os.path.isdir(os.path.join('/proc/',
-                                                                   str(pid))):
-                        msg = '"' + str(pid) + '": Not a running process'
-                        continue
-                else:
-                    pid = 0
-                self.refresh_header(pid)
-                self.update_pid(pid)
-                break
-            except ValueError:
-                msg = '"' + str(pid) + '": Not a valid pid'
-
-    def show_set_update_interval(self):
+    def _show_set_update_interval(self):
         """Draws update interval selection mask."""
         msg = ''
         while True:
@@ -1265,60 +1318,67 @@ class Tui(object):
 
             except ValueError:
                 msg = '"' + str(val) + '": Invalid value'
-        self.refresh_header()
+        self._refresh_header()
 
-    def show_vm_selection_by_guest_name(self):
+    def _show_vm_selection_by_guest(self):
         """Draws guest selection mask.
 
-        Asks for a guest name until a valid guest name or '' is entered.
+        Asks for a guest name or pid until a valid guest name or '' is entered.
 
         """
         msg = ''
         while True:
             self.screen.erase()
             self.screen.addstr(0, 0,
-                               'Show statistics for specific guest.',
+                               'Show statistics for specific guest or pid.',
                                curses.A_BOLD)
             self.screen.addstr(1, 0,
                                'This might limit the shown data to the trace '
                                'statistics.')
             self.screen.addstr(5, 0, msg)
-            self.print_all_gnames(7)
+            self._print_all_gnames(7)
             curses.echo()
-            self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
-            gname = self.screen.getstr().decode(ENCODING)
+            curses.curs_set(1)
+            self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
+            guest = self.screen.getstr().decode(ENCODING)
             curses.noecho()
 
-            if not gname:
-                self.refresh_header(0)
-                self.update_pid(0)
+            pid = 0
+            if not guest or guest == '0':
                 break
-            else:
-                pids = []
-                try:
-                    pids = self.get_pid_from_gname(gname)
-                except:
-                    msg = '"' + gname + '": Internal error while searching, ' \
-                          'use pid filter instead'
-                    continue
-                if len(pids) == 0:
-                    msg = '"' + gname + '": Not an active guest'
+            if guest.isdigit():
+                if not os.path.isdir(os.path.join('/proc/', guest)):
+                    msg = '"' + guest + '": Not a running process'
                     continue
-                if len(pids) > 1:
-                    msg = '"' + gname + '": Multiple matches found, use pid ' \
-                          'filter instead'
-                    continue
-                self.refresh_header(pids[0])
-                self.update_pid(pids[0])
+                pid = int(guest)
                 break
+            pids = []
+            try:
+                pids = self.get_pid_from_gname(guest)
+            except:
+                msg = '"' + guest + '": Internal error while searching, ' \
+                      'use pid filter instead'
+                continue
+            if len(pids) == 0:
+                msg = '"' + guest + '": Not an active guest'
+                continue
+            if len(pids) > 1:
+                msg = '"' + guest + '": Multiple matches found, use pid ' \
+                      'filter instead'
+                continue
+            pid = pids[0]
+            break
+        curses.curs_set(0)
+        self._refresh_header(pid)
+        self._update_pid(pid)
 
     def show_stats(self):
         """Refreshes the screen and processes user input."""
         sleeptime = self._delay_initial
-        self.refresh_header()
+        self._refresh_header()
         start = 0.0  # result based on init value never appears on screen
         while True:
-            self.refresh_body(time.time() - start)
+            self._refresh_body(time.time() - start)
             curses.halfdelay(int(sleeptime * 10))
             start = time.time()
             sleeptime = self._delay_regular
@@ -1327,47 +1387,39 @@ class Tui(object):
                 if char == 'b':
                     self._display_guests = not self._display_guests
                     if self.stats.toggle_display_guests(self._display_guests):
-                        self.show_msg(['Command not available with tracepoints'
-                                       ' enabled', 'Restart with debugfs only '
-                                       '(see option \'-d\') and try again!'])
+                        self._show_msg(['Command not available with '
+                                        'tracepoints enabled', 'Restart with '
+                                        'debugfs only (see option \'-d\') and '
+                                        'try again!'])
                         self._display_guests = not self._display_guests
-                    self.refresh_header()
+                    self._refresh_header()
                 if char == 'c':
-                    self.stats.fields_filter = DEFAULT_REGEX
-                    self.refresh_header(0)
-                    self.update_pid(0)
+                    self.stats.fields_filter = ''
+                    self._refresh_header(0)
+                    self._update_pid(0)
                 if char == 'f':
                     curses.curs_set(1)
-                    self.show_filter_selection()
+                    self._show_filter_selection()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
-                if char == 'g':
-                    curses.curs_set(1)
-                    self.show_vm_selection_by_guest_name()
-                    curses.curs_set(0)
+                if char == 'g' or char == 'p':
+                    self._show_vm_selection_by_guest()
                     sleeptime = self._delay_initial
                 if char == 'h':
-                    self.show_help_interactive()
+                    self._show_help_interactive()
                 if char == 'o':
                     self._sorting = not self._sorting
-                if char == 'p':
-                    curses.curs_set(1)
-                    self.show_vm_selection_by_pid()
-                    curses.curs_set(0)
-                    sleeptime = self._delay_initial
                 if char == 'q':
                     break
                 if char == 'r':
                     self.stats.reset()
                 if char == 's':
                     curses.curs_set(1)
-                    self.show_set_update_interval()
+                    self._show_set_update_interval()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
                 if char == 'x':
-                    self.update_drilldown()
-                    # prevents display of current values on next refresh
-                    self.stats.get(self._display_guests)
+                    self.stats.child_events = not self.stats.child_events
             except KeyboardInterrupt:
                 break
             except curses.error:
@@ -1380,9 +1432,9 @@ def batch(stats):
         s = stats.get()
         time.sleep(1)
         s = stats.get()
-        for key in sorted(s.keys()):
-            values = s[key]
-            print('%-42s%10d%10d' % (key, values[0], values[1]))
+        for key, values in sorted(s.items()):
+            print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
+                  values.delta))
     except KeyboardInterrupt:
         pass
 
@@ -1392,14 +1444,14 @@ def log(stats):
     keys = sorted(stats.get().keys())
 
     def banner():
-        for k in keys:
-            print(k, end=' ')
+        for key in keys:
+            print(key.split(' ')[0], end=' ')
         print()
 
     def statline():
         s = stats.get()
-        for k in keys:
-            print(' %9d' % s[k][1], end=' ')
+        for key in keys:
+            print(' %9d' % s[key].delta, end=' ')
         print()
     line = 0
     banner_repeat = 20
@@ -1504,7 +1556,7 @@ Press any other key to refresh statistics immediately.
                          )
     optparser.add_option('-f', '--fields',
                          action='store',
-                         default=DEFAULT_REGEX,
+                         default='',
                          dest='fields',
                          help='''fields to display (regex)
                                  "-f help" for a list of available events''',
@@ -1539,17 +1591,6 @@ Press any other key to refresh statistics immediately.
 
 def check_access(options):
     """Exits if the current user can't access all needed directories."""
-    if not os.path.exists('/sys/kernel/debug'):
-        sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
-        sys.exit(1)
-
-    if not os.path.exists(PATH_DEBUGFS_KVM):
-        sys.stderr.write("Please make sure, that debugfs is mounted and "
-                         "readable by the current user:\n"
-                         "('mount -t debugfs debugfs /sys/kernel/debug')\n"
-                         "Also ensure, that the kvm modules are loaded.\n")
-        sys.exit(1)
-
     if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
                                                      not options.debugfs):
         sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
@@ -1567,7 +1608,33 @@ def check_access(options):
     return options
 
 
+def assign_globals():
+    global PATH_DEBUGFS_KVM
+    global PATH_DEBUGFS_TRACING
+
+    debugfs = ''
+    for line in file('/proc/mounts'):
+        if line.split(' ')[0] == 'debugfs':
+            debugfs = line.split(' ')[1]
+            break
+    if debugfs == '':
+        sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
+                         "your kernel, mounted and\nreadable by the current "
+                         "user:\n"
+                         "('mount -t debugfs debugfs /sys/kernel/debug')\n")
+        sys.exit(1)
+
+    PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
+    PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
+
+    if not os.path.exists(PATH_DEBUGFS_KVM):
+        sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
+                         "your kernel and that the modules are loaded.\n")
+        sys.exit(1)
+
+
 def main():
+    assign_globals()
     options = get_options()
     options = check_access(options)
 
index b5b3810c9e945d7f3a39568840fbc5b73f84983b..0811d860fe75000de852c569ccaaa3ae1f0aa944 100644 (file)
@@ -35,13 +35,13 @@ INTERACTIVE COMMANDS
 
 *f*::  filter by regular expression
 
-*g*::  filter by guest name
+*g*::  filter by guest name/PID
 
 *h*::  display interactive commands reference
 
 *o*::   toggle sorting order (Total vs CurAvg/s)
 
-*p*::  filter by PID
+*p*::  filter by guest name/PID
 
 *q*::  quit
 
index 5bbbf285af74a0afb01ae4dcaba9a7cdbb3a3e93..64a8fc38418610b8f80a0476e75a8024bc2167fa 100644 (file)
@@ -1857,6 +1857,7 @@ static const struct {
        BPF_PROG_SEC("lwt_xmit",        BPF_PROG_TYPE_LWT_XMIT),
        BPF_PROG_SEC("sockops",         BPF_PROG_TYPE_SOCK_OPS),
        BPF_PROG_SEC("sk_skb",          BPF_PROG_TYPE_SK_SKB),
+       BPF_PROG_SEC("sk_msg",          BPF_PROG_TYPE_SK_MSG),
 };
 #undef BPF_PROG_SEC
 
index 57254f5b2779fb02276f00eb82401a05eaaabeb0..694abc628e9b3060b2252c1e8c86af4d6176e518 100644 (file)
@@ -29,7 +29,7 @@
 #include "builtin.h"
 #include "check.h"
 
-bool no_fp, no_unreachable;
+bool no_fp, no_unreachable, retpoline, module;
 
 static const char * const check_usage[] = {
        "objtool check [<options>] file.o",
@@ -39,6 +39,8 @@ static const char * const check_usage[] = {
 const struct option check_options[] = {
        OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
        OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+       OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
+       OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
        OPT_END(),
 };
 
@@ -53,5 +55,5 @@ int cmd_check(int argc, const char **argv)
 
        objname = argv[0];
 
-       return check(objname, no_fp, no_unreachable, false);
+       return check(objname, false);
 }
index 91e8e19ff5e06193adc55c455b4d97ad947da7ee..77ea2b97117d2fd586e52294593c4d0c23a55ecd 100644 (file)
@@ -25,7 +25,6 @@
  */
 
 #include <string.h>
-#include <subcmd/parse-options.h>
 #include "builtin.h"
 #include "check.h"
 
@@ -36,9 +35,6 @@ static const char *orc_usage[] = {
        NULL,
 };
 
-extern const struct option check_options[];
-extern bool no_fp, no_unreachable;
-
 int cmd_orc(int argc, const char **argv)
 {
        const char *objname;
@@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv)
 
                objname = argv[0];
 
-               return check(objname, no_fp, no_unreachable, true);
+               return check(objname, true);
        }
 
        if (!strcmp(argv[0], "dump")) {
index dd526067fed5ebcacbb45b7ce8a686437bd83d1a..28ff40e19a1413823b9b06ae4d1f1b42d922e850 100644 (file)
 #ifndef _BUILTIN_H
 #define _BUILTIN_H
 
+#include <subcmd/parse-options.h>
+
+extern const struct option check_options[];
+extern bool no_fp, no_unreachable, retpoline, module;
+
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
 
index a8cb69a2657658ec41c5877f1b4a47526c8079b0..92b6a2c21631d810cda14a79ef3fa2d906e6cb50 100644 (file)
@@ -18,6 +18,7 @@
 #include <string.h>
 #include <stdlib.h>
 
+#include "builtin.h"
 #include "check.h"
 #include "elf.h"
 #include "special.h"
@@ -33,7 +34,6 @@ struct alternative {
 };
 
 const char *objname;
-static bool no_fp;
 struct cfi_state initial_func_cfi;
 
 struct instruction *find_insn(struct objtool_file *file,
@@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file)
                         * disguise, so convert them accordingly.
                         */
                        insn->type = INSN_JUMP_DYNAMIC;
+                       insn->retpoline_safe = true;
                        continue;
                } else {
                        /* sibling call */
@@ -548,7 +549,8 @@ static int add_call_destinations(struct objtool_file *file)
                        if (!insn->call_dest && !insn->ignore) {
                                WARN_FUNC("unsupported intra-function call",
                                          insn->sec, insn->offset);
-                               WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
+                               if (retpoline)
+                                       WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
                                return -1;
                        }
 
@@ -923,7 +925,11 @@ static struct rela *find_switch_table(struct objtool_file *file,
                if (find_symbol_containing(file->rodata, text_rela->addend))
                        continue;
 
-               return find_rela_by_dest(file->rodata, text_rela->addend);
+               rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
+               if (!rodata_rela)
+                       continue;
+
+               return rodata_rela;
        }
 
        return NULL;
@@ -1108,6 +1114,41 @@ static int read_unwind_hints(struct objtool_file *file)
        return 0;
 }
 
+static int read_retpoline_hints(struct objtool_file *file)
+{
+       struct section *sec;
+       struct instruction *insn;
+       struct rela *rela;
+
+       sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
+       if (!sec)
+               return 0;
+
+       list_for_each_entry(rela, &sec->rela_list, list) {
+               if (rela->sym->type != STT_SECTION) {
+                       WARN("unexpected relocation symbol type in %s", sec->name);
+                       return -1;
+               }
+
+               insn = find_insn(file, rela->sym->sec, rela->addend);
+               if (!insn) {
+                       WARN("bad .discard.retpoline_safe entry");
+                       return -1;
+               }
+
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+                   insn->type != INSN_CALL_DYNAMIC) {
+                       WARN_FUNC("retpoline_safe hint not an indirect jump/call",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+
+               insn->retpoline_safe = true;
+       }
+
+       return 0;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
        int ret;
@@ -1146,6 +1187,10 @@ static int decode_sections(struct objtool_file *file)
        if (ret)
                return ret;
 
+       ret = read_retpoline_hints(file);
+       if (ret)
+               return ret;
+
        return 0;
 }
 
@@ -1891,6 +1936,38 @@ static int validate_unwind_hints(struct objtool_file *file)
        return warnings;
 }
 
+static int validate_retpoline(struct objtool_file *file)
+{
+       struct instruction *insn;
+       int warnings = 0;
+
+       for_each_insn(file, insn) {
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+                   insn->type != INSN_CALL_DYNAMIC)
+                       continue;
+
+               if (insn->retpoline_safe)
+                       continue;
+
+               /*
+                * .init.text code is ran before userspace and thus doesn't
+                * strictly need retpolines, except for modules which are
+                * loaded late, they very much do need retpoline in their
+                * .init.text
+                */
+               if (!strcmp(insn->sec->name, ".init.text") && !module)
+                       continue;
+
+               WARN_FUNC("indirect %s found in RETPOLINE build",
+                         insn->sec, insn->offset,
+                         insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+
+               warnings++;
+       }
+
+       return warnings;
+}
+
 static bool is_kasan_insn(struct instruction *insn)
 {
        return (insn->type == INSN_CALL &&
@@ -2022,13 +2099,12 @@ static void cleanup(struct objtool_file *file)
        elf_close(file->elf);
 }
 
-int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
+int check(const char *_objname, bool orc)
 {
        struct objtool_file file;
        int ret, warnings = 0;
 
        objname = _objname;
-       no_fp = _no_fp;
 
        file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
        if (!file.elf)
@@ -2052,6 +2128,13 @@ int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
        if (list_empty(&file.insn_list))
                goto out;
 
+       if (retpoline) {
+               ret = validate_retpoline(&file);
+               if (ret < 0)
+                       return ret;
+               warnings += ret;
+       }
+
        ret = validate_functions(&file);
        if (ret < 0)
                goto out;
index 23a1d065cae190c11432bd2c768f38c7f91e470b..c6b68fcb926ff76c6e44675ff636c16f3552a184 100644 (file)
@@ -45,6 +45,7 @@ struct instruction {
        unsigned char type;
        unsigned long immediate;
        bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
+       bool retpoline_safe;
        struct symbol *call_dest;
        struct instruction *jump_dest;
        struct instruction *first_jump_src;
@@ -63,7 +64,7 @@ struct objtool_file {
        bool ignore_unreachables, c_file, hints;
 };
 
-int check(const char *objname, bool no_fp, bool no_unreachable, bool orc);
+int check(const char *objname, bool orc);
 
 struct instruction *find_insn(struct objtool_file *file,
                              struct section *sec, unsigned long offset);
index 954ea9e21236dd6b96d6a1a5330ed410ba278ae0..cf9f4040ea5c7ae1b5890d973efa53592efbe56a 100644 (file)
@@ -8,7 +8,7 @@ perf-kallsyms - Searches running kernel for symbols
 SYNOPSIS
 --------
 [verse]
-'perf kallsyms <options> symbol_name[,symbol_name...]'
+'perf kallsyms' [<options>] symbol_name[,symbol_name...]
 
 DESCRIPTION
 -----------
index bf4ca749d1ac42db783446d4e36e7420dd461b95..a217623fec2ea626494dc046d35a0bda02f20f04 100644 (file)
@@ -881,6 +881,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                }
        }
 
+       /*
+        * If we have just single event and are sending data
+        * through pipe, we need to force the ids allocation,
+        * because we synthesize event name through the pipe
+        * and need the id for that.
+        */
+       if (data->is_pipe && rec->evlist->nr_entries == 1)
+               rec->opts.sample_id = true;
+
        if (record__open(rec) != 0) {
                err = -1;
                goto out_child;
index 98bf9d32f2222247bf2b39d98dab62d9cafe1770..54a4c152edb3917405dd064a1f79037acbb0d239 100644 (file)
@@ -917,7 +917,7 @@ static void print_metric_csv(void *ctx,
        char buf[64], *vals, *ends;
 
        if (unit == NULL || fmt == NULL) {
-               fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+               fprintf(out, "%s%s", csv_sep, csv_sep);
                return;
        }
        snprintf(buf, sizeof(buf), fmt, val);
index b7c823ba8374fb702b4ae3d3033e79b852e4686b..35ac016fcb988997437230103987aaa59415e70a 100644 (file)
@@ -991,7 +991,7 @@ static int perf_top_overwrite_fallback(struct perf_top *top,
        evlist__for_each_entry(evlist, counter)
                counter->attr.write_backward = false;
        opts->overwrite = false;
-       ui__warning("fall back to non-overwrite mode\n");
+       pr_debug2("fall back to non-overwrite mode\n");
        return 1;
 }
 
index cfe46236a5e5b7d641523e3eb25a3d0acc11eaa4..57b9b342d533592ca698544172098c551c223f4b 100644 (file)
@@ -61,6 +61,7 @@ struct record_opts {
        bool         tail_synthesize;
        bool         overwrite;
        bool         ignore_missing_thread;
+       bool         sample_id;
        unsigned int freq;
        unsigned int mmap_pages;
        unsigned int auxtrace_mmap_pages;
index 2864279751122aa8efc518778759cc04589e12ba..fbf927cf775dce38511d864d81780191d2745444 100644 (file)
@@ -327,7 +327,32 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
        if (!disasm_line__is_valid_jump(cursor, sym))
                return;
 
+       /*
+        * This first was seen with a gcc function, _cpp_lex_token, that
+        * has the usual jumps:
+        *
+        *  â”‚1159e6c: â†“ jne    115aa32 <_cpp_lex_token@@Base+0xf92>
+        *
+        * I.e. jumps to a label inside that function (_cpp_lex_token), and
+        * those works, but also this kind:
+        *
+        *  â”‚1159e8b: â†“ jne    c469be <cpp_named_operator2name@@Base+0xa72>
+        *
+        *  I.e. jumps to another function, outside _cpp_lex_token, which
+        *  are not being correctly handled generating as a side effect references
+        *  to ab->offset[] entries that are set to NULL, so to make this code
+        *  more robust, check that here.
+        *
+        *  A proper fix for will be put in place, looking at the function
+        *  name right after the '<' token and probably treating this like a
+        *  'call' instruction.
+        */
        target = ab->offsets[cursor->ops.target.offset];
+       if (target == NULL) {
+               ui_helpline__printf("WARN: jump target inconsistency, press 'o', ab->offsets[%#x] = NULL\n",
+                                   cursor->ops.target.offset);
+               return;
+       }
 
        bcursor = browser_line(&cursor->al);
        btarget = browser_line(target);
index 9faf3b5367db03babf42cba25d808bb6e7a5d0b4..6470ea2aa25eea686b4f37050140a66e4fabda1b 100644 (file)
 #include "sane_ctype.h"
 #include "symbol/kallsyms.h"
 
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+       return !session->itrace_synth_opts ||
+              session->itrace_synth_opts->dont_decode;
+}
+
 int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
                        struct auxtrace_mmap_params *mp,
                        void *userpg, int fd)
@@ -762,6 +768,9 @@ int auxtrace_queues__process_index(struct auxtrace_queues *queues,
        size_t i;
        int err;
 
+       if (auxtrace__dont_decode(session))
+               return 0;
+
        list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
                for (i = 0; i < auxtrace_index->nr; i++) {
                        ent = &auxtrace_index->entries[i];
@@ -892,12 +901,6 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
        return err;
 }
 
-static bool auxtrace__dont_decode(struct perf_session *session)
-{
-       return !session->itrace_synth_opts ||
-              session->itrace_synth_opts->dont_decode;
-}
-
 int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
                                      union perf_event *event,
                                      struct perf_session *session)
index 1e97937b03a933890e7101ecad982c99e564836c..6f09e4962dade1e64512f64da99528ca1d649986 100644 (file)
@@ -137,6 +137,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
        struct perf_evsel *evsel;
        bool use_sample_identifier = false;
        bool use_comm_exec;
+       bool sample_id = opts->sample_id;
 
        /*
         * Set the evsel leader links before we configure attributes,
@@ -163,8 +164,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
                 * match the id.
                 */
                use_sample_identifier = perf_can_sample_identifier();
-               evlist__for_each_entry(evlist, evsel)
-                       perf_evsel__set_sample_id(evsel, use_sample_identifier);
+               sample_id = true;
        } else if (evlist->nr_entries > 1) {
                struct perf_evsel *first = perf_evlist__first(evlist);
 
@@ -174,6 +174,10 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
                        use_sample_identifier = perf_can_sample_identifier();
                        break;
                }
+               sample_id = true;
+       }
+
+       if (sample_id) {
                evlist__for_each_entry(evlist, evsel)
                        perf_evsel__set_sample_id(evsel, use_sample_identifier);
        }
index 370138e7e35ca3f4046516dfe8fc2c304fa7abc5..88223bc7c82b923e3cc4274899bf24bea5d23a64 100644 (file)
@@ -12,7 +12,7 @@
  * States and transits:
  *
  *
- *  OFF--(on)--> READY --(hit)--> HIT
+ *  OFF--> ON --> READY --(hit)--> HIT
  *                 ^               |
  *                 |            (ready)
  *                 |               |
@@ -27,8 +27,9 @@ struct trigger {
        volatile enum {
                TRIGGER_ERROR           = -2,
                TRIGGER_OFF             = -1,
-               TRIGGER_READY           = 0,
-               TRIGGER_HIT             = 1,
+               TRIGGER_ON              = 0,
+               TRIGGER_READY           = 1,
+               TRIGGER_HIT             = 2,
        } state;
        const char *name;
 };
@@ -50,7 +51,7 @@ static inline bool trigger_is_error(struct trigger *t)
 static inline void trigger_on(struct trigger *t)
 {
        TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
-       t->state = TRIGGER_READY;
+       t->state = TRIGGER_ON;
 }
 
 static inline void trigger_ready(struct trigger *t)
index 44ef9eba5a7a2181004676cfa28f330cfb576b6a..6c645eb77d4218fd102968fb2a24b19350e3edd9 100644 (file)
@@ -178,6 +178,55 @@ void idr_get_next_test(int base)
        idr_destroy(&idr);
 }
 
+int idr_u32_cb(int id, void *ptr, void *data)
+{
+       BUG_ON(id < 0);
+       BUG_ON(ptr != DUMMY_PTR);
+       return 0;
+}
+
+void idr_u32_test1(struct idr *idr, u32 handle)
+{
+       static bool warned = false;
+       u32 id = handle;
+       int sid = 0;
+       void *ptr;
+
+       BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL));
+       BUG_ON(id != handle);
+       BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL) != -ENOSPC);
+       BUG_ON(id != handle);
+       if (!warned && id > INT_MAX)
+               printk("vvv Ignore these warnings\n");
+       ptr = idr_get_next(idr, &sid);
+       if (id > INT_MAX) {
+               BUG_ON(ptr != NULL);
+               BUG_ON(sid != 0);
+       } else {
+               BUG_ON(ptr != DUMMY_PTR);
+               BUG_ON(sid != id);
+       }
+       idr_for_each(idr, idr_u32_cb, NULL);
+       if (!warned && id > INT_MAX) {
+               printk("^^^ Warnings over\n");
+               warned = true;
+       }
+       BUG_ON(idr_remove(idr, id) != DUMMY_PTR);
+       BUG_ON(!idr_is_empty(idr));
+}
+
+void idr_u32_test(int base)
+{
+       DEFINE_IDR(idr);
+       idr_init_base(&idr, base);
+       idr_u32_test1(&idr, 10);
+       idr_u32_test1(&idr, 0x7fffffff);
+       idr_u32_test1(&idr, 0x80000000);
+       idr_u32_test1(&idr, 0x80000001);
+       idr_u32_test1(&idr, 0xffe00000);
+       idr_u32_test1(&idr, 0xffffffff);
+}
+
 void idr_checks(void)
 {
        unsigned long i;
@@ -248,6 +297,9 @@ void idr_checks(void)
        idr_get_next_test(0);
        idr_get_next_test(1);
        idr_get_next_test(4);
+       idr_u32_test(4);
+       idr_u32_test(1);
+       idr_u32_test(0);
 }
 
 /*
index 6903ccf35595f560350a26d03e3eb4369fbb128b..44a0d1ad4408429d67378bf185773f77b3461ab8 100644 (file)
@@ -29,7 +29,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 {
        struct radix_tree_node *node;
 
-       if (flags & __GFP_NOWARN)
+       if (!(flags & __GFP_DIRECT_RECLAIM))
                return NULL;
 
        pthread_mutex_lock(&cachep->lock);
@@ -73,10 +73,17 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 
 void *kmalloc(size_t size, gfp_t gfp)
 {
-       void *ret = malloc(size);
+       void *ret;
+
+       if (!(gfp & __GFP_DIRECT_RECLAIM))
+               return NULL;
+
+       ret = malloc(size);
        uatomic_inc(&nr_allocated);
        if (kmalloc_verbose)
                printf("Allocating %p from malloc\n", ret);
+       if (gfp & __GFP_ZERO)
+               memset(ret, 0, size);
        return ret;
 }
 
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h
new file mode 100644 (file)
index 0000000..e69de29
index e9fff59dfd8a86cb6022d0dc6e5321a3d9a4f636..e3201ccf54c3c46dc7ffaf9bc6185edd406a6b95 100644 (file)
@@ -11,6 +11,7 @@
 #define __GFP_IO               0x40u
 #define __GFP_FS               0x80u
 #define __GFP_NOWARN           0x200u
+#define __GFP_ZERO             0x8000u
 #define __GFP_ATOMIC           0x80000u
 #define __GFP_ACCOUNT          0x100000u
 #define __GFP_DIRECT_RECLAIM   0x400000u
index 979baeec7e706aee05ef6bdfbf8dd91494284dd2..a037def0dec637ef9a38b034de5ac1fed4d32d8e 100644 (file)
@@ -3,6 +3,7 @@
 #define SLAB_H
 
 #include <linux/types.h>
+#include <linux/gfp.h>
 
 #define SLAB_HWCACHE_ALIGN 1
 #define SLAB_PANIC 2
 void *kmalloc(size_t size, gfp_t);
 void kfree(void *);
 
+static inline void *kzalloc(size_t size, gfp_t gfp)
+{
+        return kmalloc(size, gfp | __GFP_ZERO);
+}
+
 void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
 void kmem_cache_free(struct kmem_cache *cachep, void *objp);
 
index 1a74922689930ce1829afaa5f91b701f97861b07..f6304d2be90c16e7b818415d867704abda3ecf09 100644 (file)
@@ -11,11 +11,11 @@ all:
                BUILD_TARGET=$(OUTPUT)/$$DIR;   \
                mkdir $$BUILD_TARGET  -p;       \
                make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-               #SUBDIR test prog name should be in the form: SUBDIR_test.sh
+               #SUBDIR test prog name should be in the form: SUBDIR_test.sh \
                TEST=$$DIR"_test.sh"; \
-               if [ -e $$DIR/$$TEST ]; then
-                       rsync -a $$DIR/$$TEST $$BUILD_TARGET/;
-               fi
+               if [ -e $$DIR/$$TEST ]; then \
+                       rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+               fi \
        done
 
 override define RUN_TESTS
index 5c43c187f27c14748957c048255a834ce89217ee..f35fb02bdf568a91432dc3f45fccc9cadc2fa1b7 100644 (file)
@@ -13,6 +13,14 @@ endif
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf -lrt -lpthread
 
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+all: $(TEST_CUSTOM_PROGS)
+
+$(TEST_CUSTOM_PROGS): urandom_read
+
+urandom_read: urandom_read.c
+       $(CC) -o $(TEST_CUSTOM_PROGS) -static $<
+
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
        test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
@@ -21,7 +29,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
        test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
        sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
        test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
-       sample_map_ret0.o test_tcpbpf_kern.o
+       sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
+       sockmap_tcp_msg_prog.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -35,12 +44,14 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open
 
 include ../lib.mk
 
-BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
 
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+
 .PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
@@ -72,3 +83,5 @@ $(OUTPUT)/%.o: %.c
        $(CLANG) $(CLANG_FLAGS) \
                 -O2 -target bpf -emit-llvm -c $< -o - |      \
        $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
index dde2c11d7771078071ccca4c1e9f2a85014e0443..7cae376d8d0c76c556dee7e6ce3c2a2de448a16e 100644 (file)
@@ -86,6 +86,14 @@ static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
        (void *) BPF_FUNC_perf_prog_read_value;
 static int (*bpf_override_return)(void *ctx, unsigned long rc) =
        (void *) BPF_FUNC_override_return;
+static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
+       (void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
+       (void *) BPF_FUNC_msg_apply_bytes;
+static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
+       (void *) BPF_FUNC_msg_cork_bytes;
+static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
+       (void *) BPF_FUNC_msg_pull_data;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -123,6 +131,8 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
        (void *) BPF_FUNC_skb_under_cgroup;
 static int (*bpf_skb_change_head)(void *, int len, int flags) =
        (void *) BPF_FUNC_skb_change_head;
+static int (*bpf_skb_pull_data)(void *, int len) =
+       (void *) BPF_FUNC_skb_pull_data;
 
 /* Scan the ARCH passed in from ARCH env variable (see Makefile) */
 #if defined(__TARGET_ARCH_x86)
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
new file mode 100644 (file)
index 0000000..9dac9b3
--- /dev/null
@@ -0,0 +1,28 @@
+#include <sys/resource.h>
+#include <stdio.h>
+
+static  __attribute__((constructor)) void bpf_rlimit_ctor(void)
+{
+       struct rlimit rlim_old, rlim_new = {
+               .rlim_cur       = RLIM_INFINITY,
+               .rlim_max       = RLIM_INFINITY,
+       };
+
+       getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+       /* For the sake of running the test cases, we temporarily
+        * set rlimit to infinity in order for kernel to focus on
+        * errors from actual test cases and not getting noise
+        * from hitting memlock limits. The limit is on per-process
+        * basis and not a global one, hence destructor not really
+        * needed here.
+        */
+       if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
+               perror("Unable to lift memlock rlimit");
+               /* Trying out lower limit, but expect potential test
+                * case failures from this!
+                */
+               rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+               rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+               setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+       }
+}
index a1dec2b6d9c5b28618996fe6e8312df814d2b4cf..0f92858f6226a37feb921eeea494902a2520d8bc 100644 (file)
@@ -20,14 +20,25 @@ int bpf_prog1(struct __sk_buff *skb)
        __u32 lport = skb->local_port;
        __u32 rport = skb->remote_port;
        __u8 *d = data;
+       __u32 len = (__u32) data_end - (__u32) data;
+       int err;
 
-       if (data + 10 > data_end)
-               return skb->len;
+       if (data + 10 > data_end) {
+               err = bpf_skb_pull_data(skb, 10);
+               if (err)
+                       return SK_DROP;
+
+               data_end = (void *)(long)skb->data_end;
+               data = (void *)(long)skb->data;
+               if (data + 10 > data_end)
+                       return SK_DROP;
+       }
 
        /* This write/read is a bit pointless but tests the verifier and
         * strparser handler for read/write pkt data and access into sk
         * fields.
         */
+       d = data;
        d[7] = 1;
        return skb->len;
 }
diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
new file mode 100644 (file)
index 0000000..12a7b5c
--- /dev/null
@@ -0,0 +1,33 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)                                   \
+({                                                             \
+              char ____fmt[] = fmt;                            \
+              bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                               ##__VA_ARGS__);                 \
+})
+
+SEC("sk_msg1")
+int bpf_prog1(struct sk_msg_md *msg)
+{
+       void *data_end = (void *)(long) msg->data_end;
+       void *data = (void *)(long) msg->data;
+
+       char *d;
+
+       if (data + 8 > data_end)
+               return SK_DROP;
+
+       bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
+       d = (char *)data;
+       bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
+
+       return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index d7bea972cb21bf09c84b38938df9951a0eedf5fc..2ce7634a4012a19c6b35c3bdf35d4518c5ae5c46 100644 (file)
@@ -26,6 +26,13 @@ struct bpf_map_def SEC("maps") sock_map_tx = {
        .max_entries = 20,
 };
 
+struct bpf_map_def SEC("maps") sock_map_msg = {
+       .type = BPF_MAP_TYPE_SOCKMAP,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 20,
+};
+
 struct bpf_map_def SEC("maps") sock_map_break = {
        .type = BPF_MAP_TYPE_ARRAY,
        .key_size = sizeof(int),
index ff8bd7e3e50c19231f35233714e499d8b2504d24..6b1b302310feb6881629ecaab0911c79d6f32efd 100644 (file)
@@ -9,8 +9,6 @@
 #include <stddef.h>
 #include <stdbool.h>
 
-#include <sys/resource.h>
-
 #include <linux/unistd.h>
 #include <linux/filter.h>
 #include <linux/bpf_perf_event.h>
@@ -19,6 +17,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 #ifndef ARRAY_SIZE
 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
@@ -702,9 +701,6 @@ static int do_test(unsigned int from, unsigned int to)
 int main(int argc, char **argv)
 {
        unsigned int from = 0, to = ARRAY_SIZE(tests);
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-       setrlimit(RLIMIT_MEMLOCK, &rinf);
 
        if (argc == 3) {
                unsigned int l = atoi(argv[argc - 2]);
index 3489cc28343351aa0e160a3d67f513eca65f843b..9c8b50bac7e01fd60dc436a5f986e384e2343b43 100644 (file)
 #include <errno.h>
 #include <assert.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
 #include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
 
 #define DEV_CGROUP_PROG "./dev_cgroup.o"
 
 
 int main(int argc, char **argv)
 {
-       struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
        struct bpf_object *obj;
        int error = EXIT_FAILURE;
        int prog_fd, cgroup_fd;
        __u32 prog_cnt;
 
-       if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-               perror("Unable to lift memlock rlimit");
-
        if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
                          &obj, &prog_fd)) {
                printf("Failed to load DEV_CGROUP program\n");
index 2be87e9ee28d2c24594ff1d7540084e9dcf459d0..147e34cfceb79bef10672643c0ba265ccf68293d 100644 (file)
 #include <unistd.h>
 #include <arpa/inet.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 struct tlpm_node {
        struct tlpm_node *next;
@@ -736,17 +737,11 @@ static void test_lpm_multi_thread(void)
 
 int main(void)
 {
-       struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
-       int i, ret;
+       int i;
 
        /* we want predictable, pseudo random tests */
        srand(0xf00ba1);
 
-       /* allow unlimited locked memory */
-       ret = setrlimit(RLIMIT_MEMLOCK, &limit);
-       if (ret < 0)
-               perror("Unable to lift memlock rlimit");
-
        test_lpm_basic();
        test_lpm_order();
 
@@ -755,11 +750,8 @@ int main(void)
                test_lpm_map(i);
 
        test_lpm_ipaddr();
-
        test_lpm_delete();
-
        test_lpm_get_next_key();
-
        test_lpm_multi_thread();
 
        printf("test_lpm: OK\n");
index 8c10c9180c1a6535f18caafcdf7195f69d04ecd2..781c7de343be005a92bffd7a2f6da0f0eaae22bb 100644 (file)
 #include <time.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 #define LOCAL_FREE_TARGET      (128)
 #define PERCPU_FREE_TARGET     (4)
@@ -613,7 +614,6 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
 
 int main(int argc, char **argv)
 {
-       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
                             BPF_MAP_TYPE_LRU_PERCPU_HASH};
        int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
@@ -621,8 +621,6 @@ int main(int argc, char **argv)
 
        setbuf(stdout, NULL);
 
-       assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
        nr_cpus = bpf_num_possible_cpus();
        assert(nr_cpus != -1);
        printf("nr_cpus:%d\n\n", nr_cpus);
index 9e03a4c356a496e3f57d18058254df464c206278..6c253343a6f96e3d5eb9fbd67d0ad8a4eb7f2bf6 100644 (file)
 #include <stdlib.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 static int map_flags;
 
@@ -463,15 +464,17 @@ static void test_devmap(int task, void *data)
 #include <linux/err.h>
 #define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
 #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
 static void test_sockmap(int tasks, void *data)
 {
-       int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc;
-       struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
+       struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
+       int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
        int ports[] = {50200, 50201, 50202, 50204};
        int err, i, fd, udp, sfd[6] = {0xdeadbeef};
        u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
-       int parse_prog, verdict_prog;
+       int parse_prog, verdict_prog, msg_prog;
        struct sockaddr_in addr;
+       int one = 1, s, sc, rc;
        struct bpf_object *obj;
        struct timeval to;
        __u32 key, value;
@@ -583,6 +586,12 @@ static void test_sockmap(int tasks, void *data)
                goto out_sockmap;
        }
 
+       err = bpf_prog_attach(-1, fd, BPF_SK_MSG_VERDICT, 0);
+       if (!err) {
+               printf("Failed invalid msg verdict prog attach\n");
+               goto out_sockmap;
+       }
+
        err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
        if (!err) {
                printf("Failed unknown prog attach\n");
@@ -601,6 +610,12 @@ static void test_sockmap(int tasks, void *data)
                goto out_sockmap;
        }
 
+       err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT);
+       if (err) {
+               printf("Failed empty msg verdict prog detach\n");
+               goto out_sockmap;
+       }
+
        err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
        if (!err) {
                printf("Detach invalid prog successful\n");
@@ -615,6 +630,13 @@ static void test_sockmap(int tasks, void *data)
                goto out_sockmap;
        }
 
+       err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
+                           BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+       if (err) {
+               printf("Failed to load SK_SKB msg prog\n");
+               goto out_sockmap;
+       }
+
        err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
                            BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
        if (err) {
@@ -630,7 +652,7 @@ static void test_sockmap(int tasks, void *data)
 
        map_fd_rx = bpf_map__fd(bpf_map_rx);
        if (map_fd_rx < 0) {
-               printf("Failed to get map fd\n");
+               printf("Failed to get map rx fd\n");
                goto out_sockmap;
        }
 
@@ -646,6 +668,18 @@ static void test_sockmap(int tasks, void *data)
                goto out_sockmap;
        }
 
+       bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
+       if (IS_ERR(bpf_map_msg)) {
+               printf("Failed to load map msg from msg_verdict prog\n");
+               goto out_sockmap;
+       }
+
+       map_fd_msg = bpf_map__fd(bpf_map_msg);
+       if (map_fd_msg < 0) {
+               printf("Failed to get map msg fd\n");
+               goto out_sockmap;
+       }
+
        bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
        if (IS_ERR(bpf_map_break)) {
                printf("Failed to load map tx from verdict prog\n");
@@ -679,6 +713,12 @@ static void test_sockmap(int tasks, void *data)
                goto out_sockmap;
        }
 
+       err = bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0);
+       if (err) {
+               printf("Failed msg verdict bpf prog attach\n");
+               goto out_sockmap;
+       }
+
        err = bpf_prog_attach(verdict_prog, map_fd_rx,
                              __MAX_BPF_ATTACH_TYPE, 0);
        if (!err) {
@@ -718,6 +758,14 @@ static void test_sockmap(int tasks, void *data)
                }
        }
 
+       /* Put sfd[2] (sending fd below) into msg map to test sendmsg bpf */
+       i = 0;
+       err = bpf_map_update_elem(map_fd_msg, &i, &sfd[2], BPF_ANY);
+       if (err) {
+               printf("Failed map_fd_msg update sockmap %i\n", err);
+               goto out_sockmap;
+       }
+
        /* Test map send/recv */
        for (i = 0; i < 2; i++) {
                buf[0] = i;
@@ -1126,10 +1174,6 @@ static void run_all_tests(void)
 
 int main(void)
 {
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-       setrlimit(RLIMIT_MEMLOCK, &rinf);
-
        map_flags = 0;
        run_all_tests();
 
index b549308abd19860953bb063ad382f414ce8a3c45..e9df48b306df6a2cdde861a6571a4759bd9158f9 100644 (file)
@@ -26,7 +26,6 @@ typedef __u16 __sum16;
 
 #include <sys/ioctl.h>
 #include <sys/wait.h>
-#include <sys/resource.h>
 #include <sys/types.h>
 #include <fcntl.h>
 
@@ -34,9 +33,11 @@ typedef __u16 __sum16;
 #include <linux/err.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "test_iptunnel_common.h"
 #include "bpf_util.h"
 #include "bpf_endian.h"
+#include "bpf_rlimit.h"
 
 static int error_cnt, pass_cnt;
 
@@ -840,7 +841,8 @@ static void test_tp_attach_query(void)
 static int compare_map_keys(int map1_fd, int map2_fd)
 {
        __u32 key, next_key;
-       char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)];
+       char val_buf[PERF_MAX_STACK_DEPTH *
+                    sizeof(struct bpf_stack_build_id)];
        int err;
 
        err = bpf_map_get_next_key(map1_fd, NULL, &key);
@@ -963,12 +965,168 @@ static void test_stacktrace_map()
        return;
 }
 
-int main(void)
+static int extract_build_id(char *build_id, size_t size)
+{
+       FILE *fp;
+       char *line = NULL;
+       size_t len = 0;
+
+       fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
+       if (fp == NULL)
+               return -1;
+
+       if (getline(&line, &len, fp) == -1)
+               goto err;
+       fclose(fp);
+
+       if (len > size)
+               len = size;
+       memcpy(build_id, line, len);
+       build_id[len] = '\0';
+       return 0;
+err:
+       fclose(fp);
+       return -1;
+}
+
+static void test_stacktrace_build_id(void)
 {
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+       int control_map_fd, stackid_hmap_fd, stackmap_fd;
+       const char *file = "./test_stacktrace_build_id.o";
+       int bytes, efd, err, pmu_fd, prog_fd;
+       struct perf_event_attr attr = {};
+       __u32 key, previous_key, val, duration = 0;
+       struct bpf_object *obj;
+       char buf[256];
+       int i, j;
+       struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+       int build_id_matches = 0;
 
-       setrlimit(RLIMIT_MEMLOCK, &rinf);
+       err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+       if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+               goto out;
+
+       /* Get the ID for the sched/sched_switch tracepoint */
+       snprintf(buf, sizeof(buf),
+                "/sys/kernel/debug/tracing/events/random/urandom_read/id");
+       efd = open(buf, O_RDONLY, 0);
+       if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+               goto close_prog;
 
+       bytes = read(efd, buf, sizeof(buf));
+       close(efd);
+       if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+                 "read", "bytes %d errno %d\n", bytes, errno))
+               goto close_prog;
+
+       /* Open the perf event and attach bpf progrram */
+       attr.config = strtol(buf, NULL, 0);
+       attr.type = PERF_TYPE_TRACEPOINT;
+       attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+       attr.sample_period = 1;
+       attr.wakeup_events = 1;
+       pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+                        0 /* cpu 0 */, -1 /* group id */,
+                        0 /* flags */);
+       if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+                 pmu_fd, errno))
+               goto close_prog;
+
+       err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+       if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+                 err, errno))
+               goto close_pmu;
+
+       err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+       if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+                 err, errno))
+               goto disable_pmu;
+
+       /* find map fds */
+       control_map_fd = bpf_find_map(__func__, obj, "control_map");
+       if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+       if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+       if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+                 err, errno))
+               goto disable_pmu;
+
+       assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+              == 0);
+       assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
+       /* disable stack trace collection */
+       key = 0;
+       val = 1;
+       bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+       /* for every element in stackid_hmap, we can find a corresponding one
+        * in stackmap, and vise versa.
+        */
+       err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+       if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+       if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       err = extract_build_id(buf, 256);
+
+       if (CHECK(err, "get build_id with readelf",
+                 "err %d errno %d\n", err, errno))
+               goto disable_pmu;
+
+       err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+       if (CHECK(err, "get_next_key from stackmap",
+                 "err %d, errno %d\n", err, errno))
+               goto disable_pmu;
+
+       do {
+               char build_id[64];
+
+               err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+               if (CHECK(err, "lookup_elem from stackmap",
+                         "err %d, errno %d\n", err, errno))
+                       goto disable_pmu;
+               for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+                       if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+                           id_offs[i].offset != 0) {
+                               for (j = 0; j < 20; ++j)
+                                       sprintf(build_id + 2 * j, "%02x",
+                                               id_offs[i].build_id[j] & 0xff);
+                               if (strstr(buf, build_id) != NULL)
+                                       build_id_matches = 1;
+                       }
+               previous_key = key;
+       } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+       CHECK(build_id_matches < 1, "build id match",
+             "Didn't find expected build ID from the map");
+
+disable_pmu:
+       ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+       close(pmu_fd);
+
+close_prog:
+       bpf_object__close(obj);
+
+out:
+       return;
+}
+
+int main(void)
+{
        test_pkt_access();
        test_xdp();
        test_l4lb_all();
@@ -979,6 +1137,7 @@ int main(void)
        test_obj_name();
        test_tp_attach_query();
        test_stacktrace_map();
+       test_stacktrace_build_id();
 
        printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
        return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
new file mode 100644 (file)
index 0000000..b755bd7
--- /dev/null
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(__u32),
+       .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(__u32),
+       .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+       .type = BPF_MAP_TYPE_STACK_TRACE,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(struct bpf_stack_build_id)
+               * PERF_MAX_STACK_DEPTH,
+       .max_entries = 128,
+       .map_flags = BPF_F_STACK_BUILD_ID,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+struct random_urandom_args {
+       unsigned long long pad;
+       int got_bits;
+       int pool_left;
+       int input_left;
+};
+
+SEC("tracepoint/random/urandom_read")
+int oncpu(struct random_urandom_args *args)
+{
+       __u32 key = 0, val = 0, *value_p;
+
+       value_p = bpf_map_lookup_elem(&control_map, &key);
+       if (value_p && *value_p)
+               return 0; /* skip if non-zero *value_p */
+
+       /* The size of stackmap and stackid_hmap should be the same */
+       key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
+       if ((int)key >= 0)
+               bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
index 8b201895c569f9cf9cf5dc1af34cc3234bd7b4a2..6272c784ca2a4803dd4610db655aa7a36a172121 100644 (file)
@@ -12,7 +12,6 @@
 #include <assert.h>
 
 #include <sys/socket.h>
-#include <sys/resource.h>
 
 #include <linux/filter.h>
 #include <linux/bpf.h>
@@ -21,6 +20,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 static struct bpf_insn prog[BPF_MAXINSNS];
 
@@ -184,11 +184,9 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map,
 
 int main(void)
 {
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
        uint32_t tests = 0;
        int i, fd_map;
 
-       setrlimit(RLIMIT_MEMLOCK, &rinf);
        fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
                                sizeof(int), 1, BPF_F_NO_PREALLOC);
        assert(fd_map > 0);
index 95a370f3d3784574eec0787c45d2a849a98f5666..84ab5163c8281211f606167cfc23ed152efa0fcb 100644 (file)
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
 #include <sys/ioctl.h>
+#include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 #include <linux/perf_event.h>
 #include "test_tcpbpf.h"
 
index c73592fa3d4174815de60cbc8ecb411c95cffd40..3e7718b1a9ae49c176a407d9f14ad31704255701 100644 (file)
@@ -24,7 +24,6 @@
 #include <limits.h>
 
 #include <sys/capability.h>
-#include <sys/resource.h>
 
 #include <linux/unistd.h>
 #include <linux/filter.h>
@@ -41,7 +40,7 @@
 #  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
 # endif
 #endif
-
+#include "bpf_rlimit.h"
 #include "../../../include/linux/filter.h"
 
 #ifndef ARRAY_SIZE
@@ -57,6 +56,9 @@
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS     (1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT           (1 << 1)
 
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
 struct bpf_test {
        const char *descr;
        struct bpf_insn insns[MAX_INSNS];
@@ -1594,6 +1596,60 @@ static struct bpf_test tests[] = {
                .result = ACCEPT,
                .prog_type = BPF_PROG_TYPE_SK_SKB,
        },
+       {
+               "direct packet read for SK_MSG",
+               .insns = {
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct sk_msg_md, data)),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct sk_msg_md, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SK_MSG,
+       },
+       {
+               "direct packet write for SK_MSG",
+               .insns = {
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct sk_msg_md, data)),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct sk_msg_md, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+                       BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SK_MSG,
+       },
+       {
+               "overlapping checks for direct packet access SK_MSG",
+               .insns = {
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct sk_msg_md, data)),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct sk_msg_md, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+                       BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SK_MSG,
+       },
        {
                "check skb->mark is not writeable by sockets",
                .insns = {
@@ -2586,6 +2642,62 @@ static struct bpf_test tests[] = {
                .result_unpriv = REJECT,
                .result = ACCEPT,
        },
+       {
+               "runtime/jit: tail_call within bounds, prog once",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_3, 0),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 1 },
+               .result = ACCEPT,
+               .retval = 42,
+       },
+       {
+               "runtime/jit: tail_call within bounds, prog loop",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_3, 1),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 1 },
+               .result = ACCEPT,
+               .retval = 41,
+       },
+       {
+               "runtime/jit: tail_call within bounds, no prog",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_3, 2),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 1 },
+               .result = ACCEPT,
+               .retval = 1,
+       },
+       {
+               "runtime/jit: tail_call out of bounds",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_3, 256),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_tail_call),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_prog = { 1 },
+               .result = ACCEPT,
+               .retval = 2,
+       },
        {
                "runtime/jit: pass negative index to tail_call",
                .insns = {
@@ -2593,11 +2705,12 @@ static struct bpf_test tests[] = {
                        BPF_LD_MAP_FD(BPF_REG_2, 0),
                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
                                     BPF_FUNC_tail_call),
-                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
                        BPF_EXIT_INSN(),
                },
                .fixup_prog = { 1 },
                .result = ACCEPT,
+               .retval = 2,
        },
        {
                "runtime/jit: pass > 32bit index to tail_call",
@@ -2606,11 +2719,12 @@ static struct bpf_test tests[] = {
                        BPF_LD_MAP_FD(BPF_REG_2, 0),
                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
                                     BPF_FUNC_tail_call),
-                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
                        BPF_EXIT_INSN(),
                },
                .fixup_prog = { 2 },
                .result = ACCEPT,
+               .retval = 42,
        },
        {
                "stack pointer arithmetic",
@@ -11163,6 +11277,152 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_TRACEPOINT,
        },
+       {
+               "jit: lsh, rsh, arsh by 1",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_MOV64_IMM(BPF_REG_1, 0xff),
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+                       BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+                       BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "jit: mov32 for ldimm64, 1",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+                       BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "jit: mov32 for ldimm64, 2",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+                       BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "jit: various mul tests",
+               .insns = {
+                       BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+                       BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+                       BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+                       BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+                       BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+                       BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+                       BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+                       BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+                       BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+                       BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+                       BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+                       BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_0, 2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .retval = 2,
+       },
+       {
+               "xadd/w check unaligned stack",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+                       BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+                       BPF_EXIT_INSN(),
+               },
+               .result = REJECT,
+               .errstr = "misaligned stack access off",
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "xadd/w check unaligned map",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_1, 1),
+                       BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = REJECT,
+               .errstr = "misaligned value access off",
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "xadd/w check unaligned pkt",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct xdp_md, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct xdp_md, data_end)),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+                       BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 99),
+                       BPF_JMP_IMM(BPF_JA, 0, 0, 6),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+                       BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0),
+                       BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1),
+                       BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = REJECT,
+               .errstr = "BPF_XADD stores into R2 packet",
+               .prog_type = BPF_PROG_TYPE_XDP,
+       },
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -11187,16 +11447,61 @@ static int create_map(uint32_t size_value, uint32_t max_elem)
        return fd;
 }
 
+static int create_prog_dummy1(void)
+{
+       struct bpf_insn prog[] = {
+               BPF_MOV64_IMM(BPF_REG_0, 42),
+               BPF_EXIT_INSN(),
+       };
+
+       return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+                               ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_dummy2(int mfd, int idx)
+{
+       struct bpf_insn prog[] = {
+               BPF_MOV64_IMM(BPF_REG_3, idx),
+               BPF_LD_MAP_FD(BPF_REG_2, mfd),
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                            BPF_FUNC_tail_call),
+               BPF_MOV64_IMM(BPF_REG_0, 41),
+               BPF_EXIT_INSN(),
+       };
+
+       return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+                               ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
 static int create_prog_array(void)
 {
-       int fd;
+       int p1key = 0, p2key = 1;
+       int mfd, p1fd, p2fd;
 
-       fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
-                           sizeof(int), 4, 0);
-       if (fd < 0)
+       mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+                            sizeof(int), 4, 0);
+       if (mfd < 0) {
                printf("Failed to create prog array '%s'!\n", strerror(errno));
+               return -1;
+       }
 
-       return fd;
+       p1fd = create_prog_dummy1();
+       p2fd = create_prog_dummy2(mfd, p2key);
+       if (p1fd < 0 || p2fd < 0)
+               goto out;
+       if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
+               goto out;
+       if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
+               goto out;
+       close(p2fd);
+       close(p1fd);
+
+       return mfd;
+out:
+       close(p2fd);
+       close(p1fd);
+       close(mfd);
+       return -1;
 }
 
 static int create_map_in_map(void)
@@ -11317,7 +11622,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                        goto fail_log;
                }
                if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
-                       printf("FAIL\nUnexpected error message!\n");
+                       printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+                             expected_err, bpf_vlog);
                        goto fail_log;
                }
        }
@@ -11401,9 +11707,20 @@ static int set_admin(bool admin)
        return ret;
 }
 
+static void get_unpriv_disabled()
+{
+       char buf[2];
+       FILE *fd;
+
+       fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+       if (fgets(buf, 2, fd) == buf && atoi(buf))
+               unpriv_disabled = true;
+       fclose(fd);
+}
+
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
-       int i, passes = 0, errors = 0;
+       int i, passes = 0, errors = 0, skips = 0;
 
        for (i = from; i < to; i++) {
                struct bpf_test *test = &tests[i];
@@ -11411,7 +11728,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
                /* Program types that are not supported by non-root we
                 * skip right away.
                 */
-               if (!test->prog_type) {
+               if (!test->prog_type && unpriv_disabled) {
+                       printf("#%d/u %s SKIP\n", i, test->descr);
+                       skips++;
+               } else if (!test->prog_type) {
                        if (!unpriv)
                                set_admin(false);
                        printf("#%d/u %s ", i, test->descr);
@@ -11420,20 +11740,22 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
                                set_admin(true);
                }
 
-               if (!unpriv) {
+               if (unpriv) {
+                       printf("#%d/p %s SKIP\n", i, test->descr);
+                       skips++;
+               } else {
                        printf("#%d/p %s ", i, test->descr);
                        do_test_single(test, false, &passes, &errors);
                }
        }
 
-       printf("Summary: %d PASSED, %d FAILED\n", passes, errors);
+       printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+              skips, errors);
        return errors ? EXIT_FAILURE : EXIT_SUCCESS;
 }
 
 int main(int argc, char **argv)
 {
-       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-       struct rlimit rlim = { 1 << 20, 1 << 20 };
        unsigned int from = 0, to = ARRAY_SIZE(tests);
        bool unpriv = !is_admin();
 
@@ -11454,6 +11776,12 @@ int main(int argc, char **argv)
                }
        }
 
-       setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf);
+       get_unpriv_disabled();
+       if (unpriv && unpriv_disabled) {
+               printf("Cannot run as unprivileged user with sysctl %s.\n",
+                      UNPRIV_SYSCTL);
+               return EXIT_FAILURE;
+       }
+
        return do_test(unpriv, from, to);
 }
index e9626cf5607ad060b070680d25986a270c5cd59c..8d6918c3b4a2c60f6cf610aaaccaa7db1e66515f 100644 (file)
@@ -4,7 +4,6 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <linux/filter.h>
@@ -12,6 +11,8 @@
 
 #include <bpf/bpf.h>
 
+#include "bpf_rlimit.h"
+
 #define LOG_SIZE (1 << 20)
 
 #define err(str...)    printf("ERROR: " str)
@@ -133,16 +134,11 @@ static void test_log_bad(char *log, size_t log_len, int log_level)
 
 int main(int argc, char **argv)
 {
-       struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
        char full_log[LOG_SIZE];
        char log[LOG_SIZE];
        size_t want_len;
        int i;
 
-       /* allow unlimited locked memory to have more consistent error code */
-       if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-               perror("Unable to lift memlock rlimit");
-
        memset(log, 1, LOG_SIZE);
 
        /* Test incorrect attr */
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
new file mode 100644 (file)
index 0000000..4acfdeb
--- /dev/null
@@ -0,0 +1,22 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#define BUF_SIZE 256
+int main(void)
+{
+       int fd = open("/dev/urandom", O_RDONLY);
+       int i;
+       char buf[BUF_SIZE];
+
+       if (fd < 0)
+               return 1;
+       for (i = 0; i < 4; ++i)
+               read(fd, buf, BUF_SIZE);
+
+       close(fd);
+       return 0;
+}
index cea4adcd42b8877f7e5a05d57a837bcc61c1d97d..a63e8453984d2793eb38b706236f31dc49527e9b 100644 (file)
@@ -12,9 +12,9 @@ all:
                BUILD_TARGET=$(OUTPUT)/$$DIR;   \
                mkdir $$BUILD_TARGET  -p;       \
                make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-               if [ -e $$DIR/$(TEST_PROGS) ]; then
-                       rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
-               fi
+               if [ -e $$DIR/$(TEST_PROGS) ]; then \
+                       rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
+               fi \
        done
 
 override define RUN_TESTS
diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config
new file mode 100644 (file)
index 0000000..835c7f4
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_FUSE_FS=m
index 86636d207adf72c12403355cae88d3bfe819fd80..686da510f9895bf850a96a17a6eeeb1d17607cad 100644 (file)
@@ -4,8 +4,9 @@ all:
 include ../lib.mk
 
 TEST_PROGS := mem-on-off-test.sh
-override RUN_TESTS := ./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
-override EMIT_TESTS := echo "$(RUN_TESTS)"
+override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
+
+override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))"
 
 run_full_test:
        @/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
index d7c30d36693536ce30a7dbee9d9e33877e2fdb4e..785fc18a16b4701f3ef875b60648726750b0cd26 100644 (file)
@@ -5,7 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
index 7177bea1fdfa62a1aa4e424d4dab665d8a9b7aaf..6a75a3ea44adb5bde3cc1c00000a96307f88471e 100644 (file)
@@ -2,3 +2,8 @@ CONFIG_USER_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
 CONFIG_NUMA=y
+CONFIG_NET_VRF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_VETH=y
index 06b1d7cc12cce329581acb9b6bb132142b2418c2..3991ad1a368d54abc19ca1d1f0c8adbb2124ae0b 100755 (executable)
@@ -56,7 +56,8 @@ TEST_NET6[2]=2001:db8:102
 
 # connected gateway
 CONGW[1]=169.254.1.254
-CONGW[2]=169.254.5.254
+CONGW[2]=169.254.3.254
+CONGW[3]=169.254.5.254
 
 # recursive gateway
 RECGW4[1]=169.254.11.254
@@ -232,6 +233,23 @@ run_ip()
        log_test $? ${exp_rc} "${desc}"
 }
 
+run_ip_mpath()
+{
+       local table="$1"
+       local prefix="$2"
+       local nh1="$3"
+       local nh2="$4"
+       local exp_rc="$5"
+       local desc="$6"
+
+       # dev arg may be empty
+       [ -n "${dev}" ] && dev="dev ${dev}"
+
+       run_cmd ip ro add table "${table}" "${prefix}"/32 \
+               nexthop via ${nh1} nexthop via ${nh2}
+       log_test $? ${exp_rc} "${desc}"
+}
+
 valid_onlink_ipv4()
 {
        # - unicast connected, unicast recursive
@@ -243,13 +261,37 @@ valid_onlink_ipv4()
 
        log_subsection "VRF ${VRF}"
 
-       run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+       run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
        run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
 
        log_subsection "VRF device, PBR table"
 
-       run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+       run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
        run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+       # multipath version
+       #
+       log_subsection "default VRF - main table - multipath"
+
+       run_ip_mpath 254 ${TEST_NET4[1]}.5 \
+               "${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+               "${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+               0 "unicast connected - multipath"
+
+       run_ip_mpath 254 ${TEST_NET4[1]}.6 \
+               "${RECGW4[1]} dev ${NETIFS[p1]} onlink" \
+               "${RECGW4[2]} dev ${NETIFS[p3]} onlink" \
+               0 "unicast recursive - multipath"
+
+       run_ip_mpath 254 ${TEST_NET4[1]}.7 \
+               "${CONGW[1]} dev ${NETIFS[p1]}"        \
+               "${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+               0 "unicast connected - multipath onlink first only"
+
+       run_ip_mpath 254 ${TEST_NET4[1]}.8 \
+               "${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+               "${CONGW[2]} dev ${NETIFS[p3]}"        \
+               0 "unicast connected - multipath onlink second only"
 }
 
 invalid_onlink_ipv4()
@@ -289,6 +331,21 @@ run_ip6()
        log_test $? ${exp_rc} "${desc}"
 }
 
+run_ip6_mpath()
+{
+       local table="$1"
+       local prefix="$2"
+       local opts="$3"
+       local nh1="$4"
+       local nh2="$5"
+       local exp_rc="$6"
+       local desc="$7"
+
+       run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \
+               nexthop via ${nh1} nexthop via ${nh2}
+       log_test $? ${exp_rc} "${desc}"
+}
+
 valid_onlink_ipv6()
 {
        # - unicast connected, unicast recursive, v4-mapped
@@ -310,6 +367,40 @@ valid_onlink_ipv6()
        run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
        run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
        run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+       # multipath version
+       #
+       log_subsection "default VRF - main table - multipath"
+
+       run_ip6_mpath 254 ${TEST_NET6[1]}::4 "onlink" \
+               "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \
+               "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+               0 "unicast connected - multipath onlink"
+
+       run_ip6_mpath 254 ${TEST_NET6[1]}::5 "onlink" \
+               "${RECGW6[1]} dev ${NETIFS[p1]}" \
+               "${RECGW6[2]} dev ${NETIFS[p3]}" \
+               0 "unicast recursive - multipath onlink"
+
+       run_ip6_mpath 254 ${TEST_NET6[1]}::6 "onlink" \
+               "::ffff:${TEST_NET4IN6[1]} dev ${NETIFS[p1]}" \
+               "::ffff:${TEST_NET4IN6[2]} dev ${NETIFS[p3]}" \
+               0 "v4-mapped - multipath onlink"
+
+       run_ip6_mpath 254 ${TEST_NET6[1]}::7 "" \
+               "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+               "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+               0 "unicast connected - multipath onlink both nexthops"
+
+       run_ip6_mpath 254 ${TEST_NET6[1]}::8 "" \
+               "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+               "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+               0 "unicast connected - multipath onlink first only"
+
+       run_ip6_mpath 254 ${TEST_NET6[1]}::9 "" \
+               "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}"        \
+               "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+               0 "unicast connected - multipath onlink second only"
 }
 
 invalid_onlink_ipv6()
@@ -355,6 +446,7 @@ run_onlink_tests()
        log_section "IPv6 onlink"
        log_subsection "Valid onlink commands"
        valid_onlink_ipv6
+       log_subsection "Invalid onlink commands"
        invalid_onlink_ipv6
 }
 
index b617985ecdc10323136c54272b4398be95305948..9164e60d4b66c88100bc8c43e2c59102265d96a1 100755 (executable)
@@ -6,7 +6,9 @@
 
 ret=0
 
+VERBOSE=${VERBOSE:=0}
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
 
 log_test()
 {
@@ -15,10 +17,10 @@ log_test()
        local msg="$3"
 
        if [ ${rc} -eq ${expected} ]; then
-               printf "        %-60s  [ OK ]\n" "${msg}"
+               printf "    TEST: %-60s  [ OK ]\n" "${msg}"
        else
                ret=1
-               printf "        %-60s  [FAIL]\n" "${msg}"
+               printf "    TEST: %-60s  [FAIL]\n" "${msg}"
                if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
                echo
                        echo "hit enter to continue, 'q' to quit"
@@ -32,22 +34,44 @@ setup()
 {
        set -e
        ip netns add testns
-       ip -netns testns link set dev lo up
+       $IP link set dev lo up
 
-       ip -netns testns link add dummy0 type dummy
-       ip -netns testns link set dev dummy0 up
-       ip -netns testns address add 198.51.100.1/24 dev dummy0
-       ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+       $IP link add dummy0 type dummy
+       $IP link set dev dummy0 up
+       $IP address add 198.51.100.1/24 dev dummy0
+       $IP -6 address add 2001:db8:1::1/64 dev dummy0
        set +e
 
 }
 
 cleanup()
 {
-       ip -netns testns link del dev dummy0 &> /dev/null
+       $IP link del dev dummy0 &> /dev/null
        ip netns del testns
 }
 
+get_linklocal()
+{
+       local dev=$1
+       local addr
+
+       addr=$($IP -6 -br addr show dev ${dev} | \
+       awk '{
+               for (i = 3; i <= NF; ++i) {
+                       if ($i ~ /^fe80/)
+                               print $i
+               }
+       }'
+       )
+       addr=${addr/\/*}
+
+       [ -z "$addr" ] && return 1
+
+       echo $addr
+
+       return 0
+}
+
 fib_unreg_unicast_test()
 {
        echo
@@ -56,19 +80,19 @@ fib_unreg_unicast_test()
        setup
 
        echo "    Start point"
-       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       $IP route get fibmatch 198.51.100.2 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
        set -e
-       ip -netns testns link del dev dummy0
+       $IP link del dev dummy0
        set +e
 
        echo "    Nexthop device deleted"
-       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       $IP route get fibmatch 198.51.100.2 &> /dev/null
        log_test $? 2 "IPv4 fibmatch - no route"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
        log_test $? 2 "IPv6 fibmatch - no route"
 
        cleanup
@@ -83,43 +107,43 @@ fib_unreg_multipath_test()
        setup
 
        set -e
-       ip -netns testns link add dummy1 type dummy
-       ip -netns testns link set dev dummy1 up
-       ip -netns testns address add 192.0.2.1/24 dev dummy1
-       ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
+       $IP link add dummy1 type dummy
+       $IP link set dev dummy1 up
+       $IP address add 192.0.2.1/24 dev dummy1
+       $IP -6 address add 2001:db8:2::1/64 dev dummy1
 
-       ip -netns testns route add 203.0.113.0/24 \
+       $IP route add 203.0.113.0/24 \
                nexthop via 198.51.100.2 dev dummy0 \
                nexthop via 192.0.2.2 dev dummy1
-       ip -netns testns -6 route add 2001:db8:3::/64 \
+       $IP -6 route add 2001:db8:3::/64 \
                nexthop via 2001:db8:1::2 dev dummy0 \
                nexthop via 2001:db8:2::2 dev dummy1
        set +e
 
        echo "    Start point"
-       ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+       $IP route get fibmatch 203.0.113.1 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
        set -e
-       ip -netns testns link del dev dummy0
+       $IP link del dev dummy0
        set +e
 
        echo "    One nexthop device deleted"
-       ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+       $IP route get fibmatch 203.0.113.1 &> /dev/null
        log_test $? 2 "IPv4 - multipath route removed on delete"
 
-       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
        # In IPv6 we do not flush the entire multipath route.
        log_test $? 0 "IPv6 - multipath down to single path"
 
        set -e
-       ip -netns testns link del dev dummy1
+       $IP link del dev dummy1
        set +e
 
        echo "    Second nexthop device deleted"
-       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
        log_test $? 2 "IPv6 - no route"
 
        cleanup
@@ -139,19 +163,19 @@ fib_down_unicast_test()
        setup
 
        echo "    Start point"
-       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       $IP route get fibmatch 198.51.100.2 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
        set -e
-       ip -netns testns link set dev dummy0 down
+       $IP link set dev dummy0 down
        set +e
 
        echo "    Route deleted on down"
-       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       $IP route get fibmatch 198.51.100.2 &> /dev/null
        log_test $? 2 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
        log_test $? 2 "IPv6 fibmatch"
 
        cleanup
@@ -162,31 +186,31 @@ fib_down_multipath_test_do()
        local down_dev=$1
        local up_dev=$2
 
-       ip -netns testns route get fibmatch 203.0.113.1 \
+       $IP route get fibmatch 203.0.113.1 \
                oif $down_dev &> /dev/null
        log_test $? 2 "IPv4 fibmatch on down device"
-       ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
+       $IP -6 route get fibmatch 2001:db8:3::1 \
                oif $down_dev &> /dev/null
        log_test $? 2 "IPv6 fibmatch on down device"
 
-       ip -netns testns route get fibmatch 203.0.113.1 \
+       $IP route get fibmatch 203.0.113.1 \
                oif $up_dev &> /dev/null
        log_test $? 0 "IPv4 fibmatch on up device"
-       ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
+       $IP -6 route get fibmatch 2001:db8:3::1 \
                oif $up_dev &> /dev/null
        log_test $? 0 "IPv6 fibmatch on up device"
 
-       ip -netns testns route get fibmatch 203.0.113.1 | \
+       $IP route get fibmatch 203.0.113.1 | \
                grep $down_dev | grep -q "dead linkdown"
        log_test $? 0 "IPv4 flags on down device"
-       ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
+       $IP -6 route get fibmatch 2001:db8:3::1 | \
                grep $down_dev | grep -q "dead linkdown"
        log_test $? 0 "IPv6 flags on down device"
 
-       ip -netns testns route get fibmatch 203.0.113.1 | \
+       $IP route get fibmatch 203.0.113.1 | \
                grep $up_dev | grep -q "dead linkdown"
        log_test $? 1 "IPv4 flags on up device"
-       ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
+       $IP -6 route get fibmatch 2001:db8:3::1 | \
                grep $up_dev | grep -q "dead linkdown"
        log_test $? 1 "IPv6 flags on up device"
 }
@@ -199,53 +223,53 @@ fib_down_multipath_test()
        setup
 
        set -e
-       ip -netns testns link add dummy1 type dummy
-       ip -netns testns link set dev dummy1 up
+       $IP link add dummy1 type dummy
+       $IP link set dev dummy1 up
 
-       ip -netns testns address add 192.0.2.1/24 dev dummy1
-       ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
+       $IP address add 192.0.2.1/24 dev dummy1
+       $IP -6 address add 2001:db8:2::1/64 dev dummy1
 
-       ip -netns testns route add 203.0.113.0/24 \
+       $IP route add 203.0.113.0/24 \
                nexthop via 198.51.100.2 dev dummy0 \
                nexthop via 192.0.2.2 dev dummy1
-       ip -netns testns -6 route add 2001:db8:3::/64 \
+       $IP -6 route add 2001:db8:3::/64 \
                nexthop via 2001:db8:1::2 dev dummy0 \
                nexthop via 2001:db8:2::2 dev dummy1
        set +e
 
        echo "    Verify start point"
-       ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+       $IP route get fibmatch 203.0.113.1 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
 
-       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
        set -e
-       ip -netns testns link set dev dummy0 down
+       $IP link set dev dummy0 down
        set +e
 
        echo "    One device down, one up"
        fib_down_multipath_test_do "dummy0" "dummy1"
 
        set -e
-       ip -netns testns link set dev dummy0 up
-       ip -netns testns link set dev dummy1 down
+       $IP link set dev dummy0 up
+       $IP link set dev dummy1 down
        set +e
 
        echo "    Other device down and up"
        fib_down_multipath_test_do "dummy1" "dummy0"
 
        set -e
-       ip -netns testns link set dev dummy0 down
+       $IP link set dev dummy0 down
        set +e
 
        echo "    Both devices down"
-       ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+       $IP route get fibmatch 203.0.113.1 &> /dev/null
        log_test $? 2 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
        log_test $? 2 "IPv6 fibmatch"
 
-       ip -netns testns link del dev dummy1
+       $IP link del dev dummy1
        cleanup
 }
 
@@ -264,55 +288,55 @@ fib_carrier_local_test()
        setup
 
        set -e
-       ip -netns testns link set dev dummy0 carrier on
+       $IP link set dev dummy0 carrier on
        set +e
 
        echo "    Start point"
-       ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
+       $IP route get fibmatch 198.51.100.1 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
-       ip -netns testns route get fibmatch 198.51.100.1 | \
+       $IP route get fibmatch 198.51.100.1 | \
                grep -q "linkdown"
        log_test $? 1 "IPv4 - no linkdown flag"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
+       $IP -6 route get fibmatch 2001:db8:1::1 | \
                grep -q "linkdown"
        log_test $? 1 "IPv6 - no linkdown flag"
 
        set -e
-       ip -netns testns link set dev dummy0 carrier off
+       $IP link set dev dummy0 carrier off
        sleep 1
        set +e
 
        echo "    Carrier off on nexthop"
-       ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
+       $IP route get fibmatch 198.51.100.1 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
-       ip -netns testns route get fibmatch 198.51.100.1 | \
+       $IP route get fibmatch 198.51.100.1 | \
                grep -q "linkdown"
        log_test $? 1 "IPv4 - linkdown flag set"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
+       $IP -6 route get fibmatch 2001:db8:1::1 | \
                grep -q "linkdown"
        log_test $? 1 "IPv6 - linkdown flag set"
 
        set -e
-       ip -netns testns address add 192.0.2.1/24 dev dummy0
-       ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+       $IP address add 192.0.2.1/24 dev dummy0
+       $IP -6 address add 2001:db8:2::1/64 dev dummy0
        set +e
 
        echo "    Route to local address with carrier down"
-       ip -netns testns route get fibmatch 192.0.2.1 &> /dev/null
+       $IP route get fibmatch 192.0.2.1 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:2::1 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
-       ip -netns testns route get fibmatch 192.0.2.1 | \
+       $IP route get fibmatch 192.0.2.1 | \
                grep -q "linkdown"
        log_test $? 1 "IPv4 linkdown flag set"
-       ip -netns testns -6 route get fibmatch 2001:db8:2::1 | \
+       $IP -6 route get fibmatch 2001:db8:2::1 | \
                grep -q "linkdown"
        log_test $? 1 "IPv6 linkdown flag set"
 
@@ -329,54 +353,54 @@ fib_carrier_unicast_test()
        setup
 
        set -e
-       ip -netns testns link set dev dummy0 carrier on
+       $IP link set dev dummy0 carrier on
        set +e
 
        echo "    Start point"
-       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       $IP route get fibmatch 198.51.100.2 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
-       ip -netns testns route get fibmatch 198.51.100.2 | \
+       $IP route get fibmatch 198.51.100.2 | \
                grep -q "linkdown"
        log_test $? 1 "IPv4 no linkdown flag"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
+       $IP -6 route get fibmatch 2001:db8:1::2 | \
                grep -q "linkdown"
        log_test $? 1 "IPv6 no linkdown flag"
 
        set -e
-       ip -netns testns link set dev dummy0 carrier off
+       $IP link set dev dummy0 carrier off
        set +e
 
        echo "    Carrier down"
-       ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+       $IP route get fibmatch 198.51.100.2 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
-       ip -netns testns route get fibmatch 198.51.100.2 | \
+       $IP route get fibmatch 198.51.100.2 | \
                grep -q "linkdown"
        log_test $? 0 "IPv4 linkdown flag set"
-       ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
+       $IP -6 route get fibmatch 2001:db8:1::2 | \
                grep -q "linkdown"
        log_test $? 0 "IPv6 linkdown flag set"
 
        set -e
-       ip -netns testns address add 192.0.2.1/24 dev dummy0
-       ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+       $IP address add 192.0.2.1/24 dev dummy0
+       $IP -6 address add 2001:db8:2::1/64 dev dummy0
        set +e
 
        echo "    Second address added with carrier down"
-       ip -netns testns route get fibmatch 192.0.2.2 &> /dev/null
+       $IP route get fibmatch 192.0.2.2 &> /dev/null
        log_test $? 0 "IPv4 fibmatch"
-       ip -netns testns -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+       $IP -6 route get fibmatch 2001:db8:2::2 &> /dev/null
        log_test $? 0 "IPv6 fibmatch"
 
-       ip -netns testns route get fibmatch 192.0.2.2 | \
+       $IP route get fibmatch 192.0.2.2 | \
                grep -q "linkdown"
        log_test $? 0 "IPv4 linkdown flag set"
-       ip -netns testns -6 route get fibmatch 2001:db8:2::2 | \
+       $IP -6 route get fibmatch 2001:db8:2::2 | \
                grep -q "linkdown"
        log_test $? 0 "IPv6 linkdown flag set"
 
@@ -389,11 +413,168 @@ fib_carrier_test()
        fib_carrier_unicast_test
 }
 
+################################################################################
+# Tests on nexthop spec
+
+# run 'ip route add' with given spec
+add_rt()
+{
+       local desc="$1"
+       local erc=$2
+       local vrf=$3
+       local pfx=$4
+       local gw=$5
+       local dev=$6
+       local cmd out rc
+
+       [ "$vrf" = "-" ] && vrf="default"
+       [ -n "$gw" ] && gw="via $gw"
+       [ -n "$dev" ] && dev="dev $dev"
+
+       cmd="$IP route add vrf $vrf $pfx $gw $dev"
+       if [ "$VERBOSE" = "1" ]; then
+               printf "\n    COMMAND: $cmd\n"
+       fi
+
+       out=$(eval $cmd 2>&1)
+       rc=$?
+       if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+               echo "    $out"
+       fi
+       log_test $rc $erc "$desc"
+}
+
+fib4_nexthop()
+{
+       echo
+       echo "IPv4 nexthop tests"
+
+       echo "<<< write me >>>"
+}
+
+fib6_nexthop()
+{
+       local lldummy=$(get_linklocal dummy0)
+       local llv1=$(get_linklocal dummy0)
+
+       if [ -z "$lldummy" ]; then
+               echo "Failed to get linklocal address for dummy0"
+               return 1
+       fi
+       if [ -z "$llv1" ]; then
+               echo "Failed to get linklocal address for veth1"
+               return 1
+       fi
+
+       echo
+       echo "IPv6 nexthop tests"
+
+       add_rt "Directly connected nexthop, unicast address" 0 \
+               - 2001:db8:101::/64 2001:db8:1::2
+       add_rt "Directly connected nexthop, unicast address with device" 0 \
+               - 2001:db8:102::/64 2001:db8:1::2 "dummy0"
+       add_rt "Gateway is linklocal address" 0 \
+               - 2001:db8:103::1/64 $llv1 "veth0"
+
+       # fails because LL address requires a device
+       add_rt "Gateway is linklocal address, no device" 2 \
+               - 2001:db8:104::1/64 $llv1
+
+       # local address can not be a gateway
+       add_rt "Gateway can not be local unicast address" 2 \
+               - 2001:db8:105::/64 2001:db8:1::1
+       add_rt "Gateway can not be local unicast address, with device" 2 \
+               - 2001:db8:106::/64 2001:db8:1::1 "dummy0"
+       add_rt "Gateway can not be a local linklocal address" 2 \
+               - 2001:db8:107::1/64 $lldummy "dummy0"
+
+       # VRF tests
+       add_rt "Gateway can be local address in a VRF" 0 \
+               - 2001:db8:108::/64 2001:db8:51::2
+       add_rt "Gateway can be local address in a VRF, with device" 0 \
+               - 2001:db8:109::/64 2001:db8:51::2 "veth0"
+       add_rt "Gateway can be local linklocal address in a VRF" 0 \
+               - 2001:db8:110::1/64 $llv1 "veth0"
+
+       add_rt "Redirect to VRF lookup" 0 \
+               - 2001:db8:111::/64 "" "red"
+
+       add_rt "VRF route, gateway can be local address in default VRF" 0 \
+               red 2001:db8:112::/64 2001:db8:51::1
+
+       # local address in same VRF fails
+       add_rt "VRF route, gateway can not be a local address" 2 \
+               red 2001:db8:113::1/64 2001:db8:2::1
+       add_rt "VRF route, gateway can not be a local addr with device" 2 \
+               red 2001:db8:114::1/64 2001:db8:2::1 "dummy1"
+}
+
+# Default VRF:
+#   dummy0 - 198.51.100.1/24 2001:db8:1::1/64
+#   veth0  - 192.0.2.1/24    2001:db8:51::1/64
+#
+# VRF red:
+#   dummy1 - 192.168.2.1/24 2001:db8:2::1/64
+#   veth1  - 192.0.2.2/24   2001:db8:51::2/64
+#
+#  [ dummy0   veth0 ]--[ veth1   dummy1 ]
+
+fib_nexthop_test()
+{
+       setup
+
+       set -e
+
+       $IP -4 rule add pref 32765 table local
+       $IP -4 rule del pref 0
+       $IP -6 rule add pref 32765 table local
+       $IP -6 rule del pref 0
+
+       $IP link add red type vrf table 1
+       $IP link set red up
+       $IP -4 route add vrf red unreachable default metric 4278198272
+       $IP -6 route add vrf red unreachable default metric 4278198272
+
+       $IP link add veth0 type veth peer name veth1
+       $IP link set dev veth0 up
+       $IP address add 192.0.2.1/24 dev veth0
+       $IP -6 address add 2001:db8:51::1/64 dev veth0
+
+       $IP link set dev veth1 vrf red up
+       $IP address add 192.0.2.2/24 dev veth1
+       $IP -6 address add 2001:db8:51::2/64 dev veth1
+
+       $IP link add dummy1 type dummy
+       $IP link set dev dummy1 vrf red up
+       $IP address add 192.168.2.1/24 dev dummy1
+       $IP -6 address add 2001:db8:2::1/64 dev dummy1
+       set +e
+
+       sleep 1
+       fib4_nexthop
+       fib6_nexthop
+
+       (
+       $IP link del dev dummy1
+       $IP link del veth0
+       $IP link del red
+       ) 2>/dev/null
+       cleanup
+}
+
+################################################################################
+#
+
 fib_test()
 {
-       fib_unreg_test
-       fib_down_test
-       fib_carrier_test
+       if [ -n "$TEST" ]; then
+               eval $TEST
+       else
+               fib_unreg_test
+               fib_down_test
+               fib_carrier_test
+               fib_nexthop_test
+       fi
 }
 
 if [ "$(id -u)" -ne 0 ];then
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644 (file)
index 0000000..a793eef
--- /dev/null
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644 (file)
index 0000000..4a0964c
--- /dev/null
@@ -0,0 +1,56 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+                             br0
+                              +
+               vrf-h1         |           vrf-h2
+                 +        +---+----+        +
+                 |        |        |        |
+    192.0.2.1/24 +        +        +        + 192.0.2.2/24
+               swp1     swp2     swp3     swp4
+                 +        +        +        +
+                 |        |        |        |
+                 +--------+        +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+  of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+  RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+  multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755 (executable)
index 0000000..75d9224
--- /dev/null
@@ -0,0 +1,88 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+       # 10 Seconds ageing time.
+       ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+               mcast_snooping 0
+
+       ip link set dev $swp1 master br0
+       ip link set dev $swp2 master br0
+
+       ip link set dev br0 up
+       ip link set dev $swp1 up
+       ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+       ip link set dev $swp2 down
+       ip link set dev $swp1 down
+
+       ip link del dev br0
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
new file mode 100755 (executable)
index 0000000..1cddf06
--- /dev/null
@@ -0,0 +1,86 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+       # 10 Seconds ageing time.
+       ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+
+       ip link set dev $swp1 master br0
+       ip link set dev $swp2 master br0
+
+       ip link set dev br0 up
+       ip link set dev $swp1 up
+       ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+       ip link set dev $swp2 down
+       ip link set dev $swp1 down
+
+       ip link del dev br0
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644 (file)
index 0000000..5cd2aed
--- /dev/null
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644 (file)
index 0000000..e819d04
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
+# Type of network interface to create
+NETIF_TYPE=veth
+# Whether to create virtual interfaces (veth) or not
+NETIF_CREATE=yes
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644 (file)
index 0000000..1ac6c62
--- /dev/null
@@ -0,0 +1,577 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+NETIF_TYPE=${NETIF_TYPE:=veth}
+NETIF_CREATE=${NETIF_CREATE:=yes}
+
+if [[ -f forwarding.config ]]; then
+       source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+       tc -j &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: iproute2 too old; tc is missing JSON support"
+               exit 1
+       fi
+
+       tc filter help 2>&1 | grep block &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: iproute2 too old; tc is missing shared block support"
+               exit 1
+       fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+       echo "SKIP: need root privileges"
+       exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+       check_tc_version
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+       echo "SKIP: jq not installed"
+       exit 1
+fi
+
+if [[ ! -x "$(command -v $MZ)" ]]; then
+       echo "SKIP: $MZ not installed"
+       exit 1
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+       echo "SKIP: importer does not define \"NUM_NETIFS\""
+       exit 1
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+       if [[ "$count" -eq "0" ]]; then
+               unset NETIFS
+               declare -A NETIFS
+       fi
+       count=$((count + 1))
+       NETIFS[p$count]="$1"
+       shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+create_netif_veth()
+{
+       local i
+
+       for i in $(eval echo {1..$NUM_NETIFS}); do
+               local j=$((i+1))
+
+               ip link show dev ${NETIFS[p$i]} &> /dev/null
+               if [[ $? -ne 0 ]]; then
+                       ip link add ${NETIFS[p$i]} type veth \
+                               peer name ${NETIFS[p$j]}
+                       if [[ $? -ne 0 ]]; then
+                               echo "Failed to create netif"
+                               exit 1
+                       fi
+               fi
+               i=$j
+       done
+}
+
+create_netif()
+{
+       case "$NETIF_TYPE" in
+       veth) create_netif_veth
+             ;;
+       *) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
+          exit 1
+          ;;
+       esac
+}
+
+if [[ "$NETIF_CREATE" = "yes" ]]; then
+       create_netif
+fi
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+       ip link show dev ${NETIFS[p$i]} &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: could not find all required interfaces"
+               exit 1
+       fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+       local err=$1
+       local msg=$2
+
+       if [[ $RET -eq 0 && $err -ne 0 ]]; then
+               RET=$err
+               retmsg=$msg
+       fi
+}
+
+check_fail()
+{
+       local err=$1
+       local msg=$2
+
+       if [[ $RET -eq 0 && $err -eq 0 ]]; then
+               RET=1
+               retmsg=$msg
+       fi
+}
+
+log_test()
+{
+       local test_name=$1
+       local opt_str=$2
+
+       if [[ $# -eq 2 ]]; then
+               opt_str="($opt_str)"
+       fi
+
+       if [[ $RET -ne 0 ]]; then
+               EXIT_STATUS=1
+               printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
+               if [[ ! -z "$retmsg" ]]; then
+                       printf "\t%s\n" "$retmsg"
+               fi
+               if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+                       echo "Hit enter to continue, 'q' to quit"
+                       read a
+                       [ "$a" = "q" ] && exit 1
+               fi
+               return 1
+       fi
+
+       printf "TEST: %-60s  [PASS]\n" "$test_name $opt_str"
+       return 0
+}
+
+log_info()
+{
+       local msg=$1
+
+       echo "INFO: $msg"
+}
+
+setup_wait()
+{
+       for i in $(eval echo {1..$NUM_NETIFS}); do
+               while true; do
+                       ip link show dev ${NETIFS[p$i]} up \
+                               | grep 'state UP' &> /dev/null
+                       if [[ $? -ne 0 ]]; then
+                               sleep 1
+                       else
+                               break
+                       fi
+               done
+       done
+
+       # Make sure links are ready.
+       sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+       if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+               echo "Pausing before cleanup, hit any key to continue"
+               read
+       fi
+}
+
+vrf_prepare()
+{
+       ip -4 rule add pref 32765 table local
+       ip -4 rule del pref 0
+       ip -6 rule add pref 32765 table local
+       ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+       ip -6 rule add pref 0 table local
+       ip -6 rule del pref 32765
+       ip -4 rule add pref 0 table local
+       ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+       local vrf_name=$1
+
+       __last_tb_id=$((__last_tb_id + 1))
+       __TB_IDS[$vrf_name]=$__last_tb_id
+       return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+       local vrf_name=$1
+
+       return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+       local vrf_name=$1
+       local tb_id
+
+       __vrf_td_id_assign $vrf_name
+       tb_id=$?
+
+       ip link add dev $vrf_name type vrf table $tb_id
+       ip -4 route add table $tb_id unreachable default metric 4278198272
+       ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+       local vrf_name=$1
+       local tb_id
+
+       __vrf_td_id_lookup $vrf_name
+       tb_id=$?
+
+       ip -6 route del table $tb_id unreachable default metric 4278198272
+       ip -4 route del table $tb_id unreachable default metric 4278198272
+       ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+       local if_name=$1
+       local add_del=$2
+       local array
+
+       shift
+       shift
+       array=("${@}")
+
+       for addrstr in "${array[@]}"; do
+               ip address $add_del $addrstr dev $if_name
+       done
+}
+
+simple_if_init()
+{
+       local if_name=$1
+       local vrf_name
+       local array
+
+       shift
+       vrf_name=v$if_name
+       array=("${@}")
+
+       vrf_create $vrf_name
+       ip link set dev $if_name master $vrf_name
+       ip link set dev $vrf_name up
+       ip link set dev $if_name up
+
+       __addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+       local if_name=$1
+       local vrf_name
+       local array
+
+       shift
+       vrf_name=v$if_name
+       array=("${@}")
+
+       __addr_add_del $if_name del "${array[@]}"
+
+       ip link set dev $if_name down
+       vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+       local if_name=$1
+
+       ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+       local if_name=$1
+
+       ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+mac_get()
+{
+       local if_name=$1
+
+       ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+       local bridge=$1
+       local ageing_time
+
+       # Need to divide by 100 to convert to seconds.
+       ageing_time=$(ip -j -d link show dev $bridge \
+                     | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+       echo $((ageing_time / 100))
+}
+
+forwarding_enable()
+{
+       ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+       ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+       sysctl -q -w net.ipv4.conf.all.forwarding=1
+       sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+       sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+       sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
+tc_offload_check()
+{
+       for i in $(eval echo {1..$NUM_NETIFS}); do
+               ethtool -k ${NETIFS[p$i]} \
+                       | grep "hw-tc-offload: on" &> /dev/null
+               if [[ $? -ne 0 ]]; then
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+       local if_name=$1
+       local dip=$2
+       local vrf_name
+
+       RET=0
+
+       vrf_name=$(master_name_get $if_name)
+       ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+       check_err $?
+       log_test "ping"
+}
+
+ping6_test()
+{
+       local if_name=$1
+       local dip=$2
+       local vrf_name
+
+       RET=0
+
+       vrf_name=$(master_name_get $if_name)
+       ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+       check_err $?
+       log_test "ping6"
+}
+
+learning_test()
+{
+       local bridge=$1
+       local br_port1=$2       # Connected to `host1_if`.
+       local host1_if=$3
+       local host2_if=$4
+       local mac=de:ad:be:ef:13:37
+       local ageing_time
+
+       RET=0
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_fail $? "Found FDB record when should not"
+
+       # Disable unknown unicast flooding on `br_port1` to make sure
+       # packets are only forwarded through the port after a matching
+       # FDB entry was installed.
+       bridge link set dev $br_port1 flood off
+
+       tc qdisc add dev $host1_if ingress
+       tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+               flower dst_mac $mac action drop
+
+       $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+       sleep 1
+
+       tc -j -s filter show dev $host1_if ingress \
+               | jq -e ".[] | select(.options.handle == 101) \
+               | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+       check_fail $? "Packet reached second host when should not"
+
+       $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+       sleep 1
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_err $? "Did not find FDB record when should"
+
+       $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+       sleep 1
+
+       tc -j -s filter show dev $host1_if ingress \
+               | jq -e ".[] | select(.options.handle == 101) \
+               | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+       check_err $? "Packet did not reach second host when should"
+
+       # Wait for 10 seconds after the ageing time to make sure FDB
+       # record was aged-out.
+       ageing_time=$(bridge_ageing_time_get $bridge)
+       sleep $((ageing_time + 10))
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_fail $? "Found FDB record when should not"
+
+       bridge link set dev $br_port1 learning off
+
+       $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+       sleep 1
+
+       bridge -j fdb show br $bridge brport $br_port1 \
+               | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+       check_fail $? "Found FDB record when should not"
+
+       bridge link set dev $br_port1 learning on
+
+       tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+       tc qdisc del dev $host1_if ingress
+
+       bridge link set dev $br_port1 flood on
+
+       log_test "FDB learning"
+}
+
+flood_test_do()
+{
+       local should_flood=$1
+       local mac=$2
+       local ip=$3
+       local host1_if=$4
+       local host2_if=$5
+       local err=0
+
+       # Add an ACL on `host2_if` which will tell us whether the packet
+       # was flooded to it or not.
+       tc qdisc add dev $host2_if ingress
+       tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+               flower dst_mac $mac action drop
+
+       $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+       sleep 1
+
+       tc -j -s filter show dev $host2_if ingress \
+               | jq -e ".[] | select(.options.handle == 101) \
+               | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+       if [[ $? -ne 0 && $should_flood == "true" || \
+             $? -eq 0 && $should_flood == "false" ]]; then
+               err=1
+       fi
+
+       tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+       tc qdisc del dev $host2_if ingress
+
+       return $err
+}
+
+flood_unicast_test()
+{
+       local br_port=$1
+       local host1_if=$2
+       local host2_if=$3
+       local mac=de:ad:be:ef:13:37
+       local ip=192.0.2.100
+
+       RET=0
+
+       bridge link set dev $br_port flood off
+
+       flood_test_do false $mac $ip $host1_if $host2_if
+       check_err $? "Packet flooded when should not"
+
+       bridge link set dev $br_port flood on
+
+       flood_test_do true $mac $ip $host1_if $host2_if
+       check_err $? "Packet was not flooded when should"
+
+       log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+       local br_port=$1
+       local host1_if=$2
+       local host2_if=$3
+       local mac=01:00:5e:00:00:01
+       local ip=239.0.0.1
+
+       RET=0
+
+       bridge link set dev $br_port mcast_flood off
+
+       flood_test_do false $mac $ip $host1_if $host2_if
+       check_err $? "Packet flooded when should not"
+
+       bridge link set dev $br_port mcast_flood on
+
+       flood_test_do true $mac $ip $host1_if $host2_if
+       check_err $? "Packet was not flooded when should"
+
+       log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+       # `br_port` is connected to `host2_if`
+       local br_port=$1
+       local host1_if=$2
+       local host2_if=$3
+
+       flood_unicast_test $br_port $host1_if $host2_if
+       flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755 (executable)
index 0000000..cc6a14a
--- /dev/null
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+       vrf_create "vrf-h1"
+       ip link set dev $h1 master vrf-h1
+
+       ip link set dev vrf-h1 up
+       ip link set dev $h1 up
+
+       ip address add 192.0.2.2/24 dev $h1
+       ip address add 2001:db8:1::2/64 dev $h1
+
+       ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+       ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+       ip route del 2001:db8:2::/64 vrf vrf-h1
+       ip route del 198.51.100.0/24 vrf vrf-h1
+
+       ip address del 2001:db8:1::2/64 dev $h1
+       ip address del 192.0.2.2/24 dev $h1
+
+       ip link set dev $h1 down
+       vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+       vrf_create "vrf-h2"
+       ip link set dev $h2 master vrf-h2
+
+       ip link set dev vrf-h2 up
+       ip link set dev $h2 up
+
+       ip address add 198.51.100.2/24 dev $h2
+       ip address add 2001:db8:2::2/64 dev $h2
+
+       ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+       ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+       ip route del 2001:db8:1::/64 vrf vrf-h2
+       ip route del 192.0.2.0/24 vrf vrf-h2
+
+       ip address del 2001:db8:2::2/64 dev $h2
+       ip address del 198.51.100.2/24 dev $h2
+
+       ip link set dev $h2 down
+       vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+       ip link set dev $rp1 up
+       ip link set dev $rp2 up
+
+       ip address add 192.0.2.1/24 dev $rp1
+       ip address add 2001:db8:1::1/64 dev $rp1
+
+       ip address add 198.51.100.1/24 dev $rp2
+       ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+       ip address del 2001:db8:2::1/64 dev $rp2
+       ip address del 198.51.100.1/24 dev $rp2
+
+       ip address del 2001:db8:1::1/64 dev $rp1
+       ip address del 192.0.2.1/24 dev $rp1
+
+       ip link set dev $rp2 down
+       ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       rp1=${NETIFS[p2]}
+
+       rp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       router_create
+
+       forwarding_enable
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       forwarding_restore
+
+       router_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755 (executable)
index 0000000..3bc3510
--- /dev/null
@@ -0,0 +1,376 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+       vrf_create "vrf-h1"
+       ip link set dev $h1 master vrf-h1
+
+       ip link set dev vrf-h1 up
+       ip link set dev $h1 up
+
+       ip address add 192.0.2.2/24 dev $h1
+       ip address add 2001:db8:1::2/64 dev $h1
+
+       ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+       ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+       ip route del 2001:db8:2::/64 vrf vrf-h1
+       ip route del 198.51.100.0/24 vrf vrf-h1
+
+       ip address del 2001:db8:1::2/64 dev $h1
+       ip address del 192.0.2.2/24 dev $h1
+
+       ip link set dev $h1 down
+       vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+       vrf_create "vrf-h2"
+       ip link set dev $h2 master vrf-h2
+
+       ip link set dev vrf-h2 up
+       ip link set dev $h2 up
+
+       ip address add 198.51.100.2/24 dev $h2
+       ip address add 2001:db8:2::2/64 dev $h2
+
+       ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+       ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+       ip route del 2001:db8:1::/64 vrf vrf-h2
+       ip route del 192.0.2.0/24 vrf vrf-h2
+
+       ip address del 2001:db8:2::2/64 dev $h2
+       ip address del 198.51.100.2/24 dev $h2
+
+       ip link set dev $h2 down
+       vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+       vrf_create "vrf-r1"
+       ip link set dev $rp11 master vrf-r1
+       ip link set dev $rp12 master vrf-r1
+       ip link set dev $rp13 master vrf-r1
+
+       ip link set dev vrf-r1 up
+       ip link set dev $rp11 up
+       ip link set dev $rp12 up
+       ip link set dev $rp13 up
+
+       ip address add 192.0.2.1/24 dev $rp11
+       ip address add 2001:db8:1::1/64 dev $rp11
+
+       ip address add 169.254.2.12/24 dev $rp12
+       ip address add fe80:2::12/64 dev $rp12
+
+       ip address add 169.254.3.13/24 dev $rp13
+       ip address add fe80:3::13/64 dev $rp13
+
+       ip route add 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       ip route add 2001:db8:2::/64 vrf vrf-r1 \
+               nexthop via fe80:2::22 dev $rp12 \
+               nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+       ip route del 2001:db8:2::/64 vrf vrf-r1
+       ip route del 198.51.100.0/24 vrf vrf-r1
+
+       ip address del fe80:3::13/64 dev $rp13
+       ip address del 169.254.3.13/24 dev $rp13
+
+       ip address del fe80:2::12/64 dev $rp12
+       ip address del 169.254.2.12/24 dev $rp12
+
+       ip address del 2001:db8:1::1/64 dev $rp11
+       ip address del 192.0.2.1/24 dev $rp11
+
+       ip link set dev $rp13 down
+       ip link set dev $rp12 down
+       ip link set dev $rp11 down
+
+       vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+       vrf_create "vrf-r2"
+       ip link set dev $rp21 master vrf-r2
+       ip link set dev $rp22 master vrf-r2
+       ip link set dev $rp23 master vrf-r2
+
+       ip link set dev vrf-r2 up
+       ip link set dev $rp21 up
+       ip link set dev $rp22 up
+       ip link set dev $rp23 up
+
+       ip address add 198.51.100.1/24 dev $rp21
+       ip address add 2001:db8:2::1/64 dev $rp21
+
+       ip address add 169.254.2.22/24 dev $rp22
+       ip address add fe80:2::22/64 dev $rp22
+
+       ip address add 169.254.3.23/24 dev $rp23
+       ip address add fe80:3::23/64 dev $rp23
+
+       ip route add 192.0.2.0/24 vrf vrf-r2 \
+               nexthop via 169.254.2.12 dev $rp22 \
+               nexthop via 169.254.3.13 dev $rp23
+       ip route add 2001:db8:1::/64 vrf vrf-r2 \
+               nexthop via fe80:2::12 dev $rp22 \
+               nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+       ip route del 2001:db8:1::/64 vrf vrf-r2
+       ip route del 192.0.2.0/24 vrf vrf-r2
+
+       ip address del fe80:3::23/64 dev $rp23
+       ip address del 169.254.3.23/24 dev $rp23
+
+       ip address del fe80:2::22/64 dev $rp22
+       ip address del 169.254.2.22/24 dev $rp22
+
+       ip address del 2001:db8:2::1/64 dev $rp21
+       ip address del 198.51.100.1/24 dev $rp21
+
+       ip link set dev $rp23 down
+       ip link set dev $rp22 down
+       ip link set dev $rp21 down
+
+       vrf_destroy "vrf-r2"
+}
+
+multipath_eval()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local packets_rp12=$4
+       local packets_rp13=$5
+       local weights_ratio packets_ratio diff
+
+       RET=0
+
+       if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+              check_err 1 "Packet difference is 0"
+              log_test "Multipath"
+              log_info "Expected ratio $weights_ratio"
+              return
+       fi
+
+       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+               weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+                      | bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+                      | bc -l)
+       else
+               weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+                      bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+                      bc -l)
+       fi
+
+       diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+       diff=${diff#-}
+
+       test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+       check_err $? "Too large discrepancy between expected and measured ratios"
+       log_test "$desc"
+       log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+               nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+              -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       # Restore settings.
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_l4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+              nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+              -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 \
+              nexthop via fe80:3::23 dev $rp13
+
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+              nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       # Generate 16384 echo requests, each with a random flow label.
+       for _ in $(seq 1 16384); do
+              ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+       done
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+              nexthop via fe80:2::22 dev $rp12 \
+              nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+       log_info "Running IPv4 multipath tests"
+       multipath4_test "ECMP" 1 1
+       multipath4_test "Weighted MP 2:1" 2 1
+       multipath4_test "Weighted MP 11:45" 11 45
+
+       log_info "Running IPv6 multipath tests"
+       multipath6_test "ECMP" 1 1
+       multipath6_test "Weighted MP 2:1" 2 1
+       multipath6_test "Weighted MP 11:45" 11 45
+
+       log_info "Running IPv6 L4 hash multipath tests"
+       multipath6_l4_test "ECMP" 1 1
+       multipath6_l4_test "Weighted MP 2:1" 2 1
+       multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       rp11=${NETIFS[p2]}
+
+       rp12=${NETIFS[p3]}
+       rp22=${NETIFS[p4]}
+
+       rp13=${NETIFS[p5]}
+       rp23=${NETIFS[p6]}
+
+       rp21=${NETIFS[p7]}
+       h2=${NETIFS[p8]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       router1_create
+       router2_create
+
+       forwarding_enable
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       forwarding_restore
+
+       router2_destroy
+       router1_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+multipath_test
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755 (executable)
index 0000000..3a6385e
--- /dev/null
@@ -0,0 +1,202 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24
+       tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+       simple_if_init $swp1 192.0.2.2/24
+       tc qdisc add dev $swp1 clsact
+
+       simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+       simple_if_fini $swp2 192.0.2.1/24
+
+       tc qdisc del dev $swp1 clsact
+       simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_test()
+{
+       local action=$1
+
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched without redirect rule inserted"
+
+       tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action mirred egress $action \
+               dev $swp2
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_err $? "Did not match incoming $action packet"
+
+       tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "mirred egress $action ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+       RET=0
+
+       tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 102 1
+       check_err $? "Packet was not dropped"
+
+       tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action ok
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 101 1
+       check_err $? "Did not see passed packet"
+
+       tc_check_packets "dev $swp1 ingress" 102 2
+       check_fail $? "Packet was dropped and it should not reach here"
+
+       tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+       RET=0
+
+       tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+               skip_hw dst_ip 192.0.2.2 action drop
+       tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+               dev $swp2
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 101 1
+       check_fail $? "Saw packet without trap rule inserted"
+
+       tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_ip 192.0.2.2 action trap
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $swp1 ingress" 102 1
+       check_err $? "Packet was not trapped"
+
+       tc_check_packets "dev $swp1 ingress" 101 1
+       check_err $? "Did not see trapped packet"
+
+       tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+       tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+       log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       swp1origmac=$(mac_get $swp1)
+       swp2origmac=$(mac_get $swp2)
+       ip link set $swp1 address $h2mac
+       ip link set $swp2 address $h1mac
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+
+       ip link set $swp2 address $swp2origmac
+       ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_test "redirect"
+mirred_egress_test "mirror"
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       gact_drop_and_ok_test
+       mirred_egress_test "redirect"
+       mirred_egress_test "mirror"
+       gact_trap_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755 (executable)
index 0000000..2fd1522
--- /dev/null
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24
+       tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower $tcflags dst_mac $h2mac action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 1101 1
+       check_fail $? "matched on filter in unreachable chain"
+
+       tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower
+
+       log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower $tcflags dst_mac $h2mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_mac $h2mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_mac $h2mac action goto chain 1
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_err $? "Did not match on correct filter with goto chain action"
+
+       tc_check_packets "dev $h2 ingress" 1101 1
+       check_err $? "Did not match on correct filter in chain 1"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+       tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower
+
+       log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       h2=${NETIFS[p2]}
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       unreachable_chain_test
+       gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644 (file)
index 0000000..9d3b64a
--- /dev/null
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+       local id=$1
+       local handle=$2
+       local count=$3
+       local ret
+
+       output="$(tc -j -s filter show $id)"
+       # workaround the jq bug which causes jq to return 0 in case input is ""
+       ret=$?
+       if [[ $ret -ne 0 ]]; then
+               return $ret
+       fi
+       echo $output | \
+               jq -e ".[] \
+               | select(.options.handle == $handle) \
+               | select(.options.actions[0].stats.packets == $count)" \
+               &> /dev/null
+       return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755 (executable)
index 0000000..032b882
--- /dev/null
@@ -0,0 +1,196 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+       tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+       local dummy_mac=de:ad:be:ef:aa:aa
+
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_mac $dummy_mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_mac $h2mac action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+       local dummy_mac=de:ad:be:ef:aa:aa
+
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags src_mac $dummy_mac action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags src_mac $h1mac action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 198.51.100.2 action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+       tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags dst_ip 192.0.2.0/24 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 103 1
+       check_err $? "Did not match on correct filter with mask"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+       log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+       RET=0
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+               $tcflags src_ip 198.51.100.1 action drop
+       tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+               $tcflags src_ip 192.0.2.1 action drop
+       tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+               $tcflags src_ip 192.0.2.0/24 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched on a wrong filter"
+
+       tc_check_packets "dev $h2 ingress" 102 1
+       check_err $? "Did not match on correct filter"
+
+       tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "dev $h2 ingress" 103 1
+       check_err $? "Did not match on correct filter with mask"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+       tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+       log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       h2=${NETIFS[p2]}
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       match_dst_mac_test
+       match_src_mac_test
+       match_dst_ip_test
+       match_src_ip_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755 (executable)
index 0000000..077b980
--- /dev/null
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+       simple_if_init $swp1 192.0.2.2/24
+       tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+       simple_if_init $swp2 192.0.2.2/24
+       tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+       tc qdisc del dev $swp2 clsact
+       simple_if_fini $swp2 192.0.2.2/24
+
+       tc qdisc del dev $swp1 clsact
+       simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+       RET=0
+
+       tc filter add block 22 protocol ip pref 1 handle 101 flower \
+               $tcflags dst_ip 192.0.2.2 action drop
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "block 22" 101 1
+       check_err $? "Did not match first incoming packet on a block"
+
+       $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+
+       tc_check_packets "block 22" 101 2
+       check_err $? "Did not match second incoming packet on a block"
+
+       tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+       log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       h1mac=$(mac_get $h1)
+       h2mac=$(mac_get $h2)
+
+       swmac=$(mac_get $swp1)
+       swp2origmac=$(mac_get $swp2)
+       ip link set $swp2 address $swmac
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+
+       ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+       log_info "Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       shared_block_test
+fi
+
+exit $EXIT_STATUS
index f57a2eaad069303f5888d21fb333c43e03af5cb3..88795b510b32c02229a52e5441a048482fba7e93 100755 (executable)
@@ -19,5 +19,5 @@ cleanup() {
 trap cleanup EXIT
 setup
 
-"$@"
+ip netns exec "${NETNS}" "$@"
 exit "$?"
index 5cc2a53bb71cd68e6a2ada5a93c4f07b598b3086..406cc70c571dea0c1bfef19165e8cfa251a2cf39 100644 (file)
@@ -344,27 +344,53 @@ static int do_setup_tx(int domain, int type, int protocol)
        return fd;
 }
 
-static int do_process_zerocopy_cookies(struct sock_extended_err *serr,
-                                      uint32_t *ckbuf, size_t nbytes)
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
 {
-       int ncookies, i;
+       int i;
 
-       if (serr->ee_errno != 0)
-               error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
-       ncookies = serr->ee_data;
-       if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES)
+       if (ck->num > RDS_MAX_ZCOOKIES)
                error(1, 0, "Returned %d cookies, max expected %d\n",
-                     ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES);
-       if (nbytes != ncookies * sizeof(uint32_t))
-               error(1, 0, "Expected %d cookies, got %ld\n",
-                     ncookies, nbytes/sizeof(uint32_t));
-       for (i = 0; i < ncookies; i++)
+                     ck->num, RDS_MAX_ZCOOKIES);
+       for (i = 0; i < ck->num; i++)
                if (cfg_verbose >= 2)
-                       fprintf(stderr, "%d\n", ckbuf[i]);
-       return ncookies;
+                       fprintf(stderr, "%d\n", ck->cookies[i]);
+       return ck->num;
 }
 
-static bool do_recv_completion(int fd)
+static bool do_recvmsg_completion(int fd)
+{
+       char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+       struct rds_zcopy_cookies *ck;
+       struct cmsghdr *cmsg;
+       struct msghdr msg;
+       bool ret = false;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_control = cmsgbuf;
+       msg.msg_controllen = sizeof(cmsgbuf);
+
+       if (recvmsg(fd, &msg, MSG_DONTWAIT))
+               return ret;
+
+       if (msg.msg_flags & MSG_CTRUNC)
+               error(1, errno, "recvmsg notification: truncated");
+
+       for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+               if (cmsg->cmsg_level == SOL_RDS &&
+                   cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+                       ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+                       completions += do_process_zerocopy_cookies(ck);
+                       ret = true;
+                       break;
+               }
+               error(0, 0, "ignoring cmsg at level %d type %d\n",
+                           cmsg->cmsg_level, cmsg->cmsg_type);
+       }
+       return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
 {
        struct sock_extended_err *serr;
        struct msghdr msg = {};
@@ -372,17 +398,13 @@ static bool do_recv_completion(int fd)
        uint32_t hi, lo, range;
        int ret, zerocopy;
        char control[100];
-       uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES];
-       struct iovec iov;
+
+       if (domain == PF_RDS)
+               return do_recvmsg_completion(fd);
 
        msg.msg_control = control;
        msg.msg_controllen = sizeof(control);
 
-       iov.iov_base = ckbuf;
-       iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0]));
-       msg.msg_iov = &iov;
-       msg.msg_iovlen = 1;
-
        ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
        if (ret == -1 && errno == EAGAIN)
                return false;
@@ -402,10 +424,6 @@ static bool do_recv_completion(int fd)
 
        serr = (void *) CMSG_DATA(cm);
 
-       if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) {
-               completions += do_process_zerocopy_cookies(serr, ckbuf, ret);
-               return true;
-       }
        if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
                error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
        if (serr->ee_errno != 0)
@@ -440,20 +458,20 @@ static bool do_recv_completion(int fd)
 }
 
 /* Read all outstanding messages on the errqueue */
-static void do_recv_completions(int fd)
+static void do_recv_completions(int fd, int domain)
 {
-       while (do_recv_completion(fd)) {}
+       while (do_recv_completion(fd, domain)) {}
 }
 
 /* Wait for all remaining completions on the errqueue */
-static void do_recv_remaining_completions(int fd)
+static void do_recv_remaining_completions(int fd, int domain)
 {
        int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
 
        while (completions < expected_completions &&
               gettimeofday_ms() < tstop) {
-               if (do_poll(fd, POLLERR))
-                       do_recv_completions(fd);
+               if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+                       do_recv_completions(fd, domain);
        }
 
        if (completions < expected_completions)
@@ -534,13 +552,13 @@ static void do_tx(int domain, int type, int protocol)
 
                while (!do_poll(fd, POLLOUT)) {
                        if (cfg_zerocopy)
-                               do_recv_completions(fd);
+                               do_recv_completions(fd, domain);
                }
 
        } while (gettimeofday_ms() < tstop);
 
        if (cfg_zerocopy)
-               do_recv_remaining_completions(fd);
+               do_recv_remaining_completions(fd, domain);
 
        if (close(fd))
                error(1, errno, "close");
@@ -631,40 +649,6 @@ static void do_flush_datagram(int fd, int type)
        bytes += cfg_payload_len;
 }
 
-
-static void do_recvmsg(int fd)
-{
-       int ret, off = 0;
-       char *buf;
-       struct iovec iov;
-       struct msghdr msg;
-       struct sockaddr_storage din;
-
-       buf = calloc(cfg_payload_len, sizeof(char));
-       iov.iov_base = buf;
-       iov.iov_len = cfg_payload_len;
-
-       memset(&msg, 0, sizeof(msg));
-       msg.msg_name = &din;
-       msg.msg_namelen = sizeof(din);
-       msg.msg_iov = &iov;
-       msg.msg_iovlen = 1;
-
-       ret = recvmsg(fd, &msg, MSG_TRUNC);
-
-       if (ret == -1)
-               error(1, errno, "recv");
-       if (ret != cfg_payload_len)
-               error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
-
-       if (memcmp(buf + off, payload, ret))
-               error(1, 0, "recv: data mismatch");
-
-       free(buf);
-       packets++;
-       bytes += cfg_payload_len;
-}
-
 static void do_rx(int domain, int type, int protocol)
 {
        uint64_t tstop;
@@ -676,8 +660,6 @@ static void do_rx(int domain, int type, int protocol)
        do {
                if (type == SOCK_STREAM)
                        do_flush_tcp(fd);
-               else if (domain == PF_RDS)
-                       do_recvmsg(fd);
                else
                        do_flush_datagram(fd, type);
 
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
new file mode 100755 (executable)
index 0000000..1e42878
--- /dev/null
@@ -0,0 +1,471 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that route PMTU values match expectations, and that initial device MTU
+# values are assigned correctly
+#
+# Tests currently implemented:
+#
+# - pmtu_vti4_exception
+#      Set up vti tunnel on top of veth, with xfrm states and policies, in two
+#      namespaces with matching endpoints. Check that route exception is not
+#      created if link layer MTU is not exceeded, then exceed it and check that
+#      exception is created with the expected PMTU. The approach described
+#      below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
+#      changes alone won't affect PMTU
+#
+# - pmtu_vti6_exception
+#      Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
+#      namespaces with matching endpoints. Check that route exception is
+#      created by exceeding link layer MTU with ping to other endpoint. Then
+#      decrease and increase MTU of tunnel, checking that route exception PMTU
+#      changes accordingly
+#
+# - pmtu_vti4_default_mtu
+#      Set up vti4 tunnel on top of veth, in two namespaces with matching
+#      endpoints. Check that MTU assigned to vti interface is the MTU of the
+#      lower layer (veth) minus additional lower layer headers (zero, for veth)
+#      minus IPv4 header length
+#
+# - pmtu_vti6_default_mtu
+#      Same as above, for IPv6
+#
+# - pmtu_vti4_link_add_mtu
+#      Set up vti4 interface passing MTU value at link creation, check MTU is
+#      configured, and that link is not created with invalid MTU values
+#
+# - pmtu_vti6_link_add_mtu
+#      Same as above, for IPv6
+#
+# - pmtu_vti6_link_change_mtu
+#      Set up two dummy interfaces with different MTUs, create a vti6 tunnel
+#      and check that configured MTU is used on link creation and changes, and
+#      that MTU is properly calculated instead when MTU is not configured from
+#      userspace
+
+tests="
+       pmtu_vti6_exception             vti6: PMTU exceptions
+       pmtu_vti4_exception             vti4: PMTU exceptions
+       pmtu_vti4_default_mtu           vti4: default MTU assignment
+       pmtu_vti6_default_mtu           vti6: default MTU assignment
+       pmtu_vti4_link_add_mtu          vti4: MTU setting on link creation
+       pmtu_vti6_link_add_mtu          vti6: MTU setting on link creation
+       pmtu_vti6_link_change_mtu       vti6: MTU changes on link changes"
+
+NS_A="ns-$(mktemp -u XXXXXX)"
+NS_B="ns-$(mktemp -u XXXXXX)"
+ns_a="ip netns exec ${NS_A}"
+ns_b="ip netns exec ${NS_B}"
+
+veth4_a_addr="192.168.1.1"
+veth4_b_addr="192.168.1.2"
+veth4_mask="24"
+veth6_a_addr="fd00:1::a"
+veth6_b_addr="fd00:1::b"
+veth6_mask="64"
+
+vti4_a_addr="192.168.2.1"
+vti4_b_addr="192.168.2.2"
+vti4_mask="24"
+vti6_a_addr="fd00:2::a"
+vti6_b_addr="fd00:2::b"
+vti6_mask="64"
+
+dummy6_0_addr="fc00:1000::0"
+dummy6_1_addr="fc00:1001::0"
+dummy6_mask="64"
+
+cleanup_done=1
+err_buf=
+
+err() {
+       err_buf="${err_buf}${1}
+"
+}
+
+err_flush() {
+       echo -n "${err_buf}"
+       err_buf=
+}
+
+setup_namespaces() {
+       ip netns add ${NS_A} || return 1
+       ip netns add ${NS_B}
+}
+
+setup_veth() {
+       ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
+       ${ns_a} ip link set veth_b netns ${NS_B}
+
+       ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+       ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+
+       ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+       ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+
+       ${ns_a} ip link set veth_a up
+       ${ns_b} ip link set veth_b up
+}
+
+setup_vti() {
+       proto=${1}
+       veth_a_addr="${2}"
+       veth_b_addr="${3}"
+       vti_a_addr="${4}"
+       vti_b_addr="${5}"
+       vti_mask=${6}
+
+       [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
+
+       ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+       ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+
+       ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+       ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+
+       ${ns_a} ip link set vti${proto}_a up
+       ${ns_b} ip link set vti${proto}_b up
+
+       sleep 1
+}
+
+setup_vti4() {
+       setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
+}
+
+setup_vti6() {
+       setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
+}
+
+setup_xfrm() {
+       proto=${1}
+       veth_a_addr="${2}"
+       veth_b_addr="${3}"
+
+       ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
+       ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+       ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+       ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+
+       ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+       ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+       ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+       ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+}
+
+setup_xfrm4() {
+       setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
+}
+
+setup_xfrm6() {
+       setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
+}
+
+setup() {
+       [ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return 1
+
+       cleanup_done=0
+       for arg do
+               eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
+       done
+}
+
+cleanup() {
+       [ ${cleanup_done} -eq 1 ] && return
+       ip netns del ${NS_A} 2 > /dev/null
+       ip netns del ${NS_B} 2 > /dev/null
+       cleanup_done=1
+}
+
+mtu() {
+       ns_cmd="${1}"
+       dev="${2}"
+       mtu="${3}"
+
+       ${ns_cmd} ip link set dev ${dev} mtu ${mtu}
+}
+
+mtu_parse() {
+       input="${1}"
+
+       next=0
+       for i in ${input}; do
+               [ ${next} -eq 1 ] && echo "${i}" && return
+               [ "${i}" = "mtu" ] && next=1
+       done
+}
+
+link_get() {
+       ns_cmd="${1}"
+       name="${2}"
+
+       ${ns_cmd} ip link show dev "${name}"
+}
+
+link_get_mtu() {
+       ns_cmd="${1}"
+       name="${2}"
+
+       mtu_parse "$(link_get "${ns_cmd}" ${name})"
+}
+
+route_get_dst_exception() {
+       ns_cmd="${1}"
+       dst="${2}"
+
+       ${ns_cmd} ip route get "${dst}"
+}
+
+route_get_dst_pmtu_from_exception() {
+       ns_cmd="${1}"
+       dst="${2}"
+
+       mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
+}
+
+test_pmtu_vti4_exception() {
+       setup namespaces veth vti4 xfrm4 || return 2
+
+       veth_mtu=1500
+       vti_mtu=$((veth_mtu - 20))
+
+       #                                SPI   SN   IV  ICV   pad length   next header
+       esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
+       ping_payload=$((esp_payload_rfc4106 - 28))
+
+       mtu "${ns_a}" veth_a ${veth_mtu}
+       mtu "${ns_b}" veth_b ${veth_mtu}
+       mtu "${ns_a}" vti4_a ${vti_mtu}
+       mtu "${ns_b}" vti4_b ${vti_mtu}
+
+       # Send DF packet without exceeding link layer MTU, check that no
+       # exception is created
+       ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
+       pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+       if [ "${pmtu}" != "" ]; then
+               err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
+               return 1
+       fi
+
+       # Now exceed link layer MTU by one byte, check that exception is created
+       ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
+       pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+       if [ "${pmtu}" = "" ]; then
+               err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
+               return 1
+       fi
+
+       # ...with the right PMTU value
+       if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
+               err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
+               return 1
+       fi
+}
+
+test_pmtu_vti6_exception() {
+       setup namespaces veth vti6 xfrm6 || return 2
+       fail=0
+
+       # Create route exception by exceeding link layer MTU
+       mtu "${ns_a}" veth_a 4000
+       mtu "${ns_b}" veth_b 4000
+       mtu "${ns_a}" vti6_a 5000
+       mtu "${ns_b}" vti6_b 5000
+       ${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+
+       # Check that exception was created
+       if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
+               err "  tunnel exceeding link layer MTU didn't create route exception"
+               return 1
+       fi
+
+       # Decrease tunnel MTU, check for PMTU decrease in route exception
+       mtu "${ns_a}" vti6_a 3000
+
+       if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
+               err "  decreasing tunnel MTU didn't decrease route exception PMTU"
+               fail=1
+       fi
+
+       # Increase tunnel MTU, check for PMTU increase in route exception
+       mtu "${ns_a}" vti6_a 9000
+       if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
+               err "  increasing tunnel MTU didn't increase route exception PMTU"
+               fail=1
+       fi
+
+       return ${fail}
+}
+
+test_pmtu_vti4_default_mtu() {
+       setup namespaces veth vti4 || return 2
+
+       # Check that MTU of vti device is MTU of veth minus IPv4 header length
+       veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+       vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+       if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
+               err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
+               return 1
+       fi
+}
+
+test_pmtu_vti6_default_mtu() {
+       setup namespaces veth vti6 || return 2
+
+       # Check that MTU of vti device is MTU of veth minus IPv6 header length
+       veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+       vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+       if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
+               err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
+               return 1
+       fi
+}
+
+test_pmtu_vti4_link_add_mtu() {
+       setup namespaces || return 2
+
+       ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+       [ $? -ne 0 ] && err "  vti not supported" && return 2
+       ${ns_a} ip link del vti4_a
+
+       fail=0
+
+       min=68
+       max=$((65528 - 20))
+       # Check invalid values first
+       for v in $((min - 1)) $((max + 1)); do
+               ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+               # This can fail, or MTU can be adjusted to a proper value
+               [ $? -ne 0 ] && continue
+               mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+               if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+                       err "  vti tunnel created with invalid MTU ${mtu}"
+                       fail=1
+               fi
+               ${ns_a} ip link del vti4_a
+       done
+
+       # Now check valid values
+       for v in ${min} 1300 ${max}; do
+               ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+               mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+               ${ns_a} ip link del vti4_a
+               if [ "${mtu}" != "${v}" ]; then
+                       err "  vti MTU ${mtu} doesn't match configured value ${v}"
+                       fail=1
+               fi
+       done
+
+       return ${fail}
+}
+
+test_pmtu_vti6_link_add_mtu() {
+       setup namespaces || return 2
+
+       ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+       [ $? -ne 0 ] && err "  vti6 not supported" && return 2
+       ${ns_a} ip link del vti6_a
+
+       fail=0
+
+       min=1280
+       max=$((65535 - 40))
+       # Check invalid values first
+       for v in $((min - 1)) $((max + 1)); do
+               ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+               # This can fail, or MTU can be adjusted to a proper value
+               [ $? -ne 0 ] && continue
+               mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+               if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+                       err "  vti6 tunnel created with invalid MTU ${v}"
+                       fail=1
+               fi
+               ${ns_a} ip link del vti6_a
+       done
+
+       # Now check valid values
+       for v in 1280 1300 $((65535 - 40)); do
+               ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+               mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+               ${ns_a} ip link del vti6_a
+               if [ "${mtu}" != "${v}" ]; then
+                       err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
+                       fail=1
+               fi
+       done
+
+       return ${fail}
+}
+
+test_pmtu_vti6_link_change_mtu() {
+       setup namespaces || return 2
+
+       ${ns_a} ip link add dummy0 mtu 1500 type dummy
+       [ $? -ne 0 ] && err "  dummy not supported" && return 2
+       ${ns_a} ip link add dummy1 mtu 3000 type dummy
+       ${ns_a} ip link set dummy0 up
+       ${ns_a} ip link set dummy1 up
+
+       ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+       ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+
+       fail=0
+
+       # Create vti6 interface bound to device, passing MTU, check it
+       ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+       mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+       if [ ${mtu} -ne 1300 ]; then
+               err "  vti6 MTU ${mtu} doesn't match configured value 1300"
+               fail=1
+       fi
+
+       # Move to another device with different MTU, without passing MTU, check
+       # MTU is adjusted
+       ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+       mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+       if [ ${mtu} -ne $((3000 - 40)) ]; then
+               err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
+               fail=1
+       fi
+
+       # Move it back, passing MTU, check MTU is not overridden
+       ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+       mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+       if [ ${mtu} -ne 1280 ]; then
+               err "  vti6 MTU ${mtu} doesn't match configured value 1280"
+               fail=1
+       fi
+
+       return ${fail}
+}
+
+trap cleanup EXIT
+
+exitcode=0
+desc=0
+IFS="  
+"
+for t in ${tests}; do
+       [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+
+       (
+               unset IFS
+               eval test_${name}
+               ret=$?
+               cleanup
+
+               if [ $ret -eq 0 ]; then
+                       printf "TEST: %-60s  [ OK ]\n" "${t}"
+               elif [ $ret -eq 1 ]; then
+                       printf "TEST: %-60s  [FAIL]\n" "${t}"
+                       err_flush
+                       exit 1
+               elif [ $ret -eq 2 ]; then
+                       printf "TEST: %-60s  [SKIP]\n" "${t}"
+                       err_flush
+               fi
+       )
+       [ $? -ne 0 ] && exitcode=1
+done
+
+exit ${exitcode}
index a622eeecc3a68d0cc81f3ee0614645e2c3ce27f8..e6f4852354353124df50e889f5c04ad220dc584e 100755 (executable)
@@ -517,6 +517,7 @@ kci_test_gretap()
        ip link help gretap 2>&1 | grep -q "^Usage:"
        if [ $? -ne 0 ];then
                echo "SKIP: gretap: iproute2 too old"
+               ip netns del "$testns"
                return 1
        fi
 
@@ -543,6 +544,7 @@ kci_test_gretap()
 
        if [ $ret -ne 0 ]; then
                echo "FAIL: gretap"
+               ip netns del "$testns"
                return 1
        fi
        echo "PASS: gretap"
@@ -565,6 +567,7 @@ kci_test_ip6gretap()
        ip link help ip6gretap 2>&1 | grep -q "^Usage:"
        if [ $? -ne 0 ];then
                echo "SKIP: ip6gretap: iproute2 too old"
+               ip netns del "$testns"
                return 1
        fi
 
@@ -591,6 +594,7 @@ kci_test_ip6gretap()
 
        if [ $ret -ne 0 ]; then
                echo "FAIL: ip6gretap"
+               ip netns del "$testns"
                return 1
        fi
        echo "PASS: ip6gretap"
@@ -655,6 +659,7 @@ kci_test_erspan()
 
        if [ $ret -ne 0 ]; then
                echo "FAIL: erspan"
+               ip netns del "$testns"
                return 1
        fi
        echo "PASS: erspan"
@@ -720,6 +725,7 @@ kci_test_ip6erspan()
 
        if [ $ret -ne 0 ]; then
                echo "FAIL: ip6erspan"
+               ip netns del "$testns"
                return 1
        fi
        echo "PASS: ip6erspan"
index 5df07047ca86607275443207a5d55e370ca7b244..81a98a240456339b9740753c8e8a62b33b2f1860 100644 (file)
@@ -68,9 +68,11 @@ static int cfg_num_pkts = 4;
 static int do_ipv4 = 1;
 static int do_ipv6 = 1;
 static int cfg_payload_len = 10;
+static int cfg_poll_timeout = 100;
 static bool cfg_show_payload;
 static bool cfg_do_pktinfo;
 static bool cfg_loop_nodata;
+static bool cfg_no_delay;
 static uint16_t dest_port = 9000;
 
 static struct sockaddr_in daddr;
@@ -171,7 +173,7 @@ static void __poll(int fd)
 
        memset(&pollfd, 0, sizeof(pollfd));
        pollfd.fd = fd;
-       ret = poll(&pollfd, 1, 100);
+       ret = poll(&pollfd, 1, cfg_poll_timeout);
        if (ret != 1)
                error(1, errno, "poll");
 }
@@ -371,7 +373,8 @@ static void do_test(int family, unsigned int opt)
                        error(1, errno, "send");
 
                /* wait for all errors to be queued, else ACKs arrive OOO */
-               usleep(50 * 1000);
+               if (!cfg_no_delay)
+                       usleep(50 * 1000);
 
                __poll(fd);
 
@@ -392,6 +395,9 @@ static void __attribute__((noreturn)) usage(const char *filepath)
                        "  -4:   only IPv4\n"
                        "  -6:   only IPv6\n"
                        "  -h:   show this message\n"
+                       "  -c N: number of packets for each test\n"
+                       "  -D:   no delay between packets\n"
+                       "  -F:   poll() waits forever for an event\n"
                        "  -I:   request PKTINFO\n"
                        "  -l N: send N bytes at a time\n"
                        "  -n:   set no-payload option\n"
@@ -409,7 +415,7 @@ static void parse_opt(int argc, char **argv)
        int proto_count = 0;
        char c;
 
-       while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
+       while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) {
                switch (c) {
                case '4':
                        do_ipv6 = 0;
@@ -417,6 +423,15 @@ static void parse_opt(int argc, char **argv)
                case '6':
                        do_ipv4 = 0;
                        break;
+               case 'c':
+                       cfg_num_pkts = strtoul(optarg, NULL, 10);
+                       break;
+               case 'D':
+                       cfg_no_delay = true;
+                       break;
+               case 'F':
+                       cfg_poll_timeout = -1;
+                       break;
                case 'I':
                        cfg_do_pktinfo = true;
                        break;
index 35ade7406dcdbbc778dbf7f39d46fafee0b0148f..3ae77ba93208f15f0d720325c269c7a95af2c6fa 100644 (file)
@@ -135,6 +135,16 @@ static int run_test(void *addr, unsigned long size)
        return 0;
 }
 
+static int syscall_available(void)
+{
+       int rc;
+
+       errno = 0;
+       rc = syscall(__NR_subpage_prot, 0, 0, 0);
+
+       return rc == 0 || (errno != ENOENT && errno != ENOSYS);
+}
+
 int test_anon(void)
 {
        unsigned long align;
@@ -145,6 +155,8 @@ int test_anon(void)
        void *mallocblock;
        unsigned long mallocsize;
 
+       SKIP_IF(!syscall_available());
+
        if (getpagesize() != 0x10000) {
                fprintf(stderr, "Kernel page size must be 64K!\n");
                return 1;
@@ -180,6 +192,8 @@ int test_file(void)
        off_t filesize;
        int fd;
 
+       SKIP_IF(!syscall_available());
+
        fd = open(file_name, O_RDWR);
        if (fd == -1) {
                perror("failed to open file");
index a23453943ad2b95538571015c066eaa2c2deb711..5c72ff978f2784babc71464b2baeb62c44089ce2 100644 (file)
@@ -16,7 +16,7 @@ $(OUTPUT)/tm-syscall: tm-syscall-asm.S
 $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
 $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
 $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
-$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o
+$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c
 $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
 $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
 
index 5d92c23ee6cbd2cf1ba157fc41b37168ee8887c7..179d592f0073c36498d91e15c2142a560ec1e88d 100644 (file)
@@ -255,6 +255,8 @@ int tm_trap_test(void)
 
        struct sigaction trap_sa;
 
+       SKIP_IF(!have_htm());
+
        trap_sa.sa_flags = SA_SIGINFO;
        trap_sa.sa_sigaction = trap_signal_handler;
        sigaction(SIGTRAP, &trap_sa, NULL);
index 6a8e5a9bfc1065a3e860cd5b31e7aea2d508bea8..d148f9f89fb64cf325a546a1855c1e1cc5fc8a21 100644 (file)
@@ -2,3 +2,4 @@ CONFIG_MISC_FILESYSTEMS=y
 CONFIG_PSTORE=y
 CONFIG_PSTORE_PMSG=y
 CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=m
index b3c8ba3cb66855ff93d6581f7428982be41fca60..d0121a8a3523a948af699e52b6adbf1cde37b030 100644 (file)
@@ -30,7 +30,7 @@ $(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
        $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
 
 $(OBJS): $(OUTPUT)/%.o: %.c
-       $(CC) -c $^ -o $@
+       $(CC) -c $^ -o $@ $(CFLAGS)
 
 $(TESTS): $(OUTPUT)/%.o: %.c
        $(CC) -c $^ -o $@
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
new file mode 100644 (file)
index 0000000..5b012f4
--- /dev/null
@@ -0,0 +1,289 @@
+[
+    {
+        "id": "d959",
+        "name": "Add cBPF action with valid bytecode",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 100",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "f84a",
+        "name": "Add cBPF action with invalid bytecode",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 100",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "e939",
+        "name": "Add eBPF action with valid object-file",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o",
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 667",
+        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf",
+            "rm -f _b.o"
+        ]
+    },
+    {
+        "id": "282d",
+        "name": "Add eBPF action with invalid object-file",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o",
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 667",
+        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf",
+            "rm -f _c.o"
+        ]
+    },
+    {
+        "id": "d819",
+        "name": "Replace cBPF bytecode and action control",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 555",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action replace action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 555",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 555",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' default-action drop.*index 555 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "6ae3",
+        "name": "Delete cBPF action ",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 444",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action bpf index 444",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 444",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 444 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "3e0d",
+        "name": "List cBPF actions",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+        ],
+        "cmdUnderTest": "$TC action list action bpf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode",
+        "matchCount": "3",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "55ce",
+        "name": "Flush BPF actions",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+        ],
+        "cmdUnderTest": "$TC action flush action bpf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "ccc3",
+        "name": "Add cBPF action with duplicate index",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 4294967295"
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967295",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 4294967295",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "89c7",
+        "name": "Add cBPF action with invalid index",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 12345",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action ls action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 12345",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "7ab9",
+        "name": "Add cBPF action with cookie",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' cookie d0d0d0d0d0d0d0d0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf.*cookie d0d0d0d0d0d0d0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
new file mode 100644 (file)
index 0000000..93cf8fe
--- /dev/null
@@ -0,0 +1,410 @@
+[
+    {
+        "id": "6d84",
+        "name": "Add csum iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum iph index 800",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 800",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 800 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "1862",
+        "name": "Add csum ip4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip4h index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "15c6",
+        "name": "Add csum ipv4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h index 1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1122",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 1122 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bf47",
+        "name": "Add csum icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "cc1d",
+        "name": "Add csum igmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum igmp index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 999",
+        "matchPattern": "action order [0-9]*: csum \\(igmp\\) action pass.*index 999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bccc",
+        "name": "Add csum foobar action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum foobar index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(foobar\\) action pass.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "3bb4",
+        "name": "Add csum tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum tcp index 9999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 9999",
+        "matchPattern": "action order [0-9]*: csum \\(tcp\\) action pass.*index 9999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "759c",
+        "name": "Add csum udp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp index 334455",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 334455",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pass.*index 334455 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bdb6",
+        "name": "Add csum udp xor iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp xor iph index 3",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(udp xor iph\\) action pass.*index 3 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "c220",
+        "name": "Add csum udplite action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udplite continue index 3",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 3",
+        "matchPattern": "action order [0-9]*: csum \\(udplite\\) action continue.*index 3 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "8993",
+        "name": "Add csum sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum sctp index 777",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 777",
+        "matchPattern": "action order [0-9]*: csum \\(sctp\\) action pass.*index 777 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "b138",
+        "name": "Add csum ip & icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip and icmp pipe index 123",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 123",
+        "matchPattern": "action order [0-9]*: csum \\(iph, icmp\\) action pipe.*index 123 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "eeda",
+        "name": "Add csum ip & sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h sctp continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 2",
+        "matchPattern": "action order [0-9]*: csum \\(iph, sctp\\) action continue.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "0017",
+        "name": "Add csum udp or tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp or tcp continue index 27",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 27",
+        "matchPattern": "action order [0-9]*: csum \\(tcp, udp\\) action continue.*index 27 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "ce92",
+        "name": "Add csum udp action with cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp pipe index 7 cookie 12345678",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pipe.*index 7.*cookie 12345678",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "912f",
+        "name": "Add csum icmp action with large cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp pipe index 17 cookie aabbccddeeff1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 17",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pipe.*index 17.*cookie aabbccddeeff1122",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "879b",
+        "name": "Add batch of 32 csum tcp actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "^[ \t]+index [0-9]* ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    }
+]
index e2187b6e0b7ac7df8b8bcbb633158ece85887471..ae96d0350d7edc43c477fd7127e88e887177393b 100644 (file)
         "teardown": [
             "$TC actions flush action gact"
         ]
+    },
+    {
+        "id": "1021",
+        "name": "Add batch of 32 gact pass actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action pass index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "da7a",
+        "name": "Add batch of 32 gact continue actions with cookie",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action continue index $i cookie aabbccddeeff112233445566778800a1 \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "8aa3",
+        "name": "Delete batch of 32 gact continue actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "for i in `seq 1 32`; do cmd=\"action continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args"
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action gact index $i \"; args=\"$args$cmd\"; done && $TC actions del $args",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
     }
-]
+]
\ No newline at end of file
index 0fcccf18399b656b6f7401c8bcea8a6f021df083..443c9b3c8664b9bc2e279e601d9585f4edad8e05 100644 (file)
             "$TC actions flush action mirred"
         ]
     },
+    {
+        "id": "8917",
+        "name": "Add mirred mirror action with control pass",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 1",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "1054",
+        "name": "Add mirred mirror action with control pipe",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 15",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 15",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 15 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "9887",
+        "name": "Add mirred mirror action with control continue",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo continue index 15",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 15",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) continue.*index 15 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "e4aa",
+        "name": "Add mirred mirror action with control reclassify",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify index 150",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 150",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*index 150 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "ece9",
+        "name": "Add mirred mirror action with control drop",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo drop index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 99",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 99 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "0031",
+        "name": "Add mirred mirror action with control jump",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo jump 10 index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 99",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) jump 10.*index 99 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "407c",
+        "name": "Add mirred mirror action with cookie",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify cookie aa11bb22cc33dd44ee55",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*cookie aa11bb22cc33dd44ee55",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "8b69",
+        "name": "Add mirred mirror action with maximum index",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 4294967295",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
     {
         "id": "a70e",
         "name": "Delete mirred mirror action",
index 0e602a3f93938a0d37f140925acd95971ebf28b7..38d85a1d7492d7aed5324a13cba14e352911bfa3 100644 (file)
             "$TC actions flush action police"
         ]
     },
+    {
+        "id": "ddd6",
+        "name": "Add police action with invalid rate value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3tb burst 250k conform-exceed pass/pipe index 5",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x5 rate 3Tb burst 250Kb mtu 2Kb action pass/pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f61c",
+        "name": "Add police action with invalid burst value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3kbit burst 250P conform-exceed pass/pipe index 5",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x5 rate 3Kbit burst 250Pb mtu 2Kb action pass/pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "c26f",
+        "name": "Add police action with invalid peakrate value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100T index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Tbit",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "db04",
+        "name": "Add police action with invalid mtu value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10kbit burst 10k mtu 2Pbit index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 10Kbit burst 1Kb mtu 2Pb",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f3c9",
+        "name": "Add police action with cookie",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 1 cookie a1b1c1d1e1f12233bb",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 10Mbit burst 10Kb mtu 2Kb.*cookie a1b1c1d1e1f12233bb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "d190",
+        "name": "Add police action with maximum index",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 4294967295",
+        "matchPattern": "action order [0-9]*:  police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
     {
         "id": "336e",
         "name": "Delete police action",
index 99635ea4722e6af26245b5594ca18e69ee4fda4a..37ecc2716fee5551101177a49db439b792666769 100644 (file)
             "$TC actions flush action skbedit"
         ]
     },
+    {
+        "id": "464a",
+        "name": "Add skbedit action with control pipe",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit ptype host pipe index 11",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 11",
+        "matchPattern": "action order [0-9]*:  skbedit ptype host pipe.*index 11 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "212f",
+        "name": "Add skbedit action with control reclassify",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 56789 reclassify index 90",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 90",
+        "matchPattern": "action order [0-9]*:  skbedit mark 56789 reclassify.*index 90 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "0651",
+        "name": "Add skbedit action with control pass",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 pass index 271",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 271",
+        "matchPattern": "action order [0-9]*:  skbedit queue_mapping 3 pass.*index 271 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "cc53",
+        "name": "Add skbedit action with control drop",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 drop index 271",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 271",
+        "matchPattern": "action order [0-9]*:  skbedit queue_mapping 3 drop.*index 271 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "ec16",
+        "name": "Add skbedit action with control jump",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 8 jump 9 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 2",
+        "matchPattern": "action order [0-9]*:  skbedit priority :8 jump 9.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "db54",
+        "name": "Add skbedit action with control continue",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 32",
+        "matchPattern": "action order [0-9]*:  skbedit priority :16 continue.*index 32 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "1055",
+        "name": "Add skbedit action with cookie",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32 cookie deadbeef",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 32",
+        "matchPattern": "action order [0-9]*:  skbedit priority :16 continue.*index 32 ref.*cookie deadbeef",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
     {
         "id": "5172",
         "name": "List skbedit actions",
index e34075059c26cb26143d3c992dfa7920fbb02485..fe3326e939c1b11bc008b46f452c336218906460 100644 (file)
             "$TC actions flush action skbmod"
         ]
     },
+    {
+        "id": "6046",
+        "name": "Add skbmod action with control reclassify and cookie",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set smac 00:01:02:03:04:01 reclassify index 1 cookie ddeeffaabb11cc22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbmod index 1",
+        "matchPattern": "action order [0-9]*: skbmod reclassify set smac 00:01:02:03:04:01.*index 1 ref.*cookie ddeeffaabb11cc22",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
     {
         "id": "58cb",
         "name": "List skbmod actions",
         "cmdUnderTest": "$TC actions ls action skbmod",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action skbmod index 4",
-        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x0031",
+        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x31",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbmod"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
new file mode 100644 (file)
index 0000000..4510ddf
--- /dev/null
@@ -0,0 +1,410 @@
+[
+    {
+        "id": "6f5a",
+        "name": "Add vlan pop action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "ee6f",
+        "name": "Add vlan pop action with large index",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "b6b9",
+        "name": "Add vlan pop action with jump opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop jump 10 index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*jump 10.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "87c3",
+        "name": "Add vlan pop action with trap opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop trap index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop trap.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "2b91",
+        "name": "Add vlan invalid action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan bad_mode",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*bad_mode",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "57fc",
+        "name": "Add vlan action with invalid protocol type",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push protocol ABCD",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "3989",
+        "name": "Add vlan push action with default protocol and priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 123 index 18",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 18",
+        "matchPattern": "action order [0-9]+: vlan.*push id 123 protocol 802.1Q priority 0 pipe.*index 18 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "79dc",
+        "name": "Add vlan push action with protocol 802.1Q and priority 3",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 77 protocol 802.1Q priority 3 continue index 734",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 734",
+        "matchPattern": "action order [0-9]+: vlan.*push id 77 protocol 802.1Q priority 3 continue.*index 734 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "4d73",
+        "name": "Add vlan push action with protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 1024 protocol 802.1AD pass index 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 10000",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1024 protocol 802.1ad priority 0 pass.*index 10000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "1f7b",
+        "name": "Add vlan push action with invalid vlan ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5678 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5678.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5d02",
+        "name": "Add vlan push action with invalid IEEE 802.1p priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5 priority 10 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "6812",
+        "name": "Add vlan modify action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 100",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5a31",
+        "name": "Add vlan modify action for protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 12",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "83a4",
+        "name": "Delete vlan pop action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan pop index 44"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 44 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ed1e",
+        "name": "Delete vlan push action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4094 protocol 802.1Q index 999"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4094 protocol 802.1Q priority 0 pipe.*index 999 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a2a3",
+        "name": "Flush vlan actions",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 10",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 11",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 12",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 13"
+        ],
+        "cmdUnderTest": "$TC actions flush action vlan",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4 protocol 802.1ad",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "1d78",
+        "name": "Add vlan action with cookie",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4 cookie a0a0a0a0a0a0a0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4.*cookie a0a0a0a0a0a0a0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    }
+]
index b3754b9aa302340d022c0e103c8321c53790c19f..44de4a272a118272d7485993bbce6ce4948148a9 100755 (executable)
@@ -15,6 +15,7 @@ import importlib
 import json
 import subprocess
 import time
+import traceback
 from collections import OrderedDict
 from string import Template
 
@@ -23,6 +24,13 @@ from tdc_helper import *
 
 import TdcPlugin
 
+
+class PluginMgrTestFail(Exception):
+    def __init__(self, stage, output, message):
+        self.stage = stage
+        self.output = output
+        self.message = message
+
 class PluginMgr:
     def __init__(self, argparser):
         super().__init__()
@@ -135,7 +143,7 @@ def exec_cmd(args, pm, stage, command):
     return proc, foutput
 
 
-def prepare_env(args, pm, stage, prefix, cmdlist):
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
     """
     Execute the setup/teardown commands for a test case.
     Optionally terminate test execution if the command fails.
@@ -164,9 +172,12 @@ def prepare_env(args, pm, stage, prefix, cmdlist):
             print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
             print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
             print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
-            raise Exception('"{}" did not complete successfully'.format(prefix))
+            raise PluginMgrTestFail(
+                stage, output,
+                '"{}" did not complete successfully'.format(prefix))
 
 def run_one_test(pm, args, index, tidx):
+    global NAMES
     result = True
     tresult = ""
     tap = ""
@@ -174,6 +185,9 @@ def run_one_test(pm, args, index, tidx):
         print("\t====================\n=====> ", end="")
     print("Test " + tidx["id"] + ": " + tidx["name"])
 
+    # populate NAMES with TESTID for this test
+    NAMES['TESTID'] = tidx['id']
+
     pm.call_pre_case(index, tidx['id'])
     prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
 
@@ -194,8 +208,11 @@ def run_one_test(pm, args, index, tidx):
         match_pattern = re.compile(
             str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
         (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
-        match_index = re.findall(match_pattern, procout)
-        if len(match_index) != int(tidx["matchCount"]):
+        if procout:
+            match_index = re.findall(match_pattern, procout)
+            if len(match_index) != int(tidx["matchCount"]):
+                result = False
+        elif int(tidx["matchCount"]) != 0:
             result = False
 
     if not result:
@@ -204,13 +221,18 @@ def run_one_test(pm, args, index, tidx):
     tap += tresult
 
     if result == False:
-        tap += procout
+        if procout:
+            tap += procout
+        else:
+            tap += 'No output!\n'
 
-    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'])
+    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
     pm.call_post_case()
 
     index += 1
 
+    # remove TESTID from NAMES
+    del(NAMES['TESTID'])
     return tap
 
 def test_runner(pm, args, filtered_tests):
@@ -225,37 +247,89 @@ def test_runner(pm, args, filtered_tests):
     testlist = filtered_tests
     tcount = len(testlist)
     index = 1
-    tap = str(index) + ".." + str(tcount) + "\n"
+    tap = ''
     badtest = None
+    stage = None
+    emergency_exit = False
+    emergency_exit_message = ''
 
-    pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
-
+    if args.notap:
+        if args.verbose:
+            tap = 'notap requested:  omitting test plan\n'
+    else:
+        tap = str(index) + ".." + str(tcount) + "\n"
+    try:
+        pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+    except Exception as ee:
+        ex_type, ex, ex_tb = sys.exc_info()
+        print('Exception {} {} (caught in pre_suite).'.
+              format(ex_type, ex))
+        # when the extra print statements are uncommented,
+        # the traceback does not appear between them
+        # (it appears way earlier in the tdc.py output)
+        # so don't bother ...
+        # print('--------------------(')
+        # print('traceback')
+        traceback.print_tb(ex_tb)
+        # print('--------------------)')
+        emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+        emergency_exit = True
+        stage = 'pre-SUITE'
+
+    if emergency_exit:
+        pm.call_post_suite(index)
+        return emergency_exit_message
     if args.verbose > 1:
-        print('Run tests here')
+        print('give test rig 2 seconds to stabilize')
+    time.sleep(2)
     for tidx in testlist:
         if "flower" in tidx["category"] and args.device == None:
+            if args.verbose > 1:
+                print('Not executing test {} {} because DEV2 not defined'.
+                      format(tidx['id'], tidx['name']))
             continue
         try:
             badtest = tidx  # in case it goes bad
             tap += run_one_test(pm, args, index, tidx)
-        except Exception as ee:
-            print('Exception {} (caught in test_runner, running test {} {} {})'.
-                  format(ee, index, tidx['id'], tidx['name']))
+        except PluginMgrTestFail as pmtf:
+            ex_type, ex, ex_tb = sys.exc_info()
+            stage = pmtf.stage
+            message = pmtf.message
+            output = pmtf.output
+            print(message)
+            print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+                  format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+            print('---------------')
+            print('traceback')
+            traceback.print_tb(ex_tb)
+            print('---------------')
+            if stage == 'teardown':
+                print('accumulated output for this test:')
+                if pmtf.output:
+                    print(pmtf.output)
+            print('---------------')
             break
         index += 1
 
     # if we failed in setup or teardown,
-    # fill in the remaining tests with not ok
+    # fill in the remaining tests with ok-skipped
     count = index
-    tap += 'about to flush the tap output if tests need to be skipped\n'
-    if tcount + 1 != index:
-        for tidx in testlist[index - 1:]:
-            msg = 'skipped - previous setup or teardown failed'
-            tap += 'ok {} - {} # {} {} {}\n'.format(
-                count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
-            count += 1
-
-    tap += 'done flushing skipped test tap output\n'
+    if not args.notap:
+        tap += 'about to flush the tap output if tests need to be skipped\n'
+        if tcount + 1 != index:
+            for tidx in testlist[index - 1:]:
+                msg = 'skipped - previous {} failed'.format(stage)
+                tap += 'ok {} - {} # {} {} {}\n'.format(
+                    count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+                count += 1
+
+        tap += 'done flushing skipped test tap output\n'
+
+    if args.pause:
+        print('Want to pause\nPress enter to continue ...')
+        if input(sys.stdin):
+            print('got something on stdin')
+
     pm.call_post_suite(index)
 
     return tap
@@ -333,8 +407,14 @@ def set_args(parser):
     parser.add_argument(
         '-v', '--verbose', action='count', default=0,
         help='Show the commands that are being run')
+    parser.add_argument(
+        '-N', '--notap', action='store_true',
+        help='Suppress tap results for command under test')
     parser.add_argument('-d', '--device',
                         help='Execute the test case in flower category')
+    parser.add_argument(
+        '-P', '--pause', action='store_true',
+        help='Pause execution just before post-suite stage')
     return parser
 
 
@@ -347,9 +427,9 @@ def check_default_settings(args, remaining, pm):
     global NAMES
 
     if args.path != None:
-         NAMES['TC'] = args.path
+        NAMES['TC'] = args.path
     if args.device != None:
-         NAMES['DEV2'] = args.device
+        NAMES['DEV2'] = args.device
     if not os.path.isfile(NAMES['TC']):
         print("The specified tc path " + NAMES['TC'] + " does not exist.")
         exit(1)
@@ -389,7 +469,7 @@ def generate_case_ids(alltests):
     for c in alltests:
         if (c["id"] == ""):
             while True:
-                newid = str('%04x' % random.randrange(16**4))
+                newid = str('{:04x}'.format(random.randrange(16**4)))
                 if (does_id_exist(alltests, newid)):
                     continue
                 else:
@@ -545,7 +625,10 @@ def set_operation_mode(pm, args):
         catresults = test_runner(pm, args, alltests)
     else:
         catresults = 'No tests found\n'
-    print('All test results: \n\n{}'.format(catresults))
+    if args.notap:
+        print('Tap output suppression requested\n')
+    else:
+        print('All test results: \n\n{}'.format(catresults))
 
 def main():
     """
index 707c6bfef689947e1bd3832db9ab420286dbb59c..52fa539dc662b48fe6127ca571c0785ba014fc81 100755 (executable)
@@ -49,13 +49,13 @@ index = 0
 for i in range(0x100):
     for j in range(0x100):
         for k in range(0x100):
-            mac = ("%02x:%02x:%02x" % (i, j, k))
+            mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
             src_mac = "e4:11:00:" + mac
             dst_mac = "e4:12:00:" + mac
-            cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s "
-                   "src_mac %s dst_mac %s action drop %s" %
+            cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+                   "src_mac {} dst_mac {} action drop {}".format
                    (device, prio, skip, src_mac, dst_mac, share_action))
-            file.write("%s\n" % cmd)
+            file.write("{}\n".format(cmd))
             index += 1
             if index >= number:
                 file.close()
index 3d5a62ff7d31ed437fc5457a501ec9b606839f92..f5d7a7851e2177b315111f4e8ae3f0b0a716487d 100644 (file)
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+include ../lib.mk
+
 ifndef CROSS_COMPILE
 CFLAGS := -std=gnu99
 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
@@ -6,16 +8,14 @@ ifeq ($(CONFIG_X86_32),y)
 LDLIBS += -lgcc_s
 endif
 
-TEST_PROGS := vdso_test vdso_standalone_test_x86
+TEST_PROGS := $(OUTPUT)/vdso_test $(OUTPUT)/vdso_standalone_test_x86
 
 all: $(TEST_PROGS)
-vdso_test: parse_vdso.c vdso_test.c
-vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
+$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
        $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
                vdso_standalone_test_x86.c parse_vdso.c \
-               -o vdso_standalone_test_x86
+               -o $@
 
-include ../lib.mk
-clean:
-       rm -fr $(TEST_PROGS)
+EXTRA_CLEAN := $(TEST_PROGS)
 endif
index 63c94d776e8970de2703b7416a76bffdeb101361..342c7bc9dc8c5d29441e27da5131d5312ea4bca8 100644 (file)
@@ -11,3 +11,4 @@ mlock-intersect-test
 mlock-random-test
 virtual_address_range
 gup_benchmark
+va_128TBswitch
index d2561895a021a4fc87822938c4485fcf85aff933..22d56467383029b24b52b95e6ee09cc0bb6bf835 100755 (executable)
@@ -2,25 +2,33 @@
 # SPDX-License-Identifier: GPL-2.0
 #please run as root
 
-#we need 256M, below is the size in kB
-needmem=262144
 mnt=./huge
 exitcode=0
 
-#get pagesize and freepages from /proc/meminfo
+#get huge pagesize and freepages from /proc/meminfo
 while read name size unit; do
        if [ "$name" = "HugePages_Free:" ]; then
                freepgs=$size
        fi
        if [ "$name" = "Hugepagesize:" ]; then
-               pgsize=$size
+               hpgsize_KB=$size
        fi
 done < /proc/meminfo
 
+# Simple hugetlbfs tests have a hardcoded minimum requirement of
+# huge pages totaling 256MB (262144KB) in size.  The userfaultfd
+# hugetlb test requires a minimum of 2 * nr_cpus huge pages.  Take
+# both of these requirements into account and attempt to increase
+# number of huge pages available.
+nr_cpus=$(nproc)
+hpgsize_MB=$((hpgsize_KB / 1024))
+half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
+needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+
 #set proper nr_hugepages
-if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then
+if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
        nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
-       needpgs=`expr $needmem / $pgsize`
+       needpgs=$((needmem_KB / hpgsize_KB))
        tries=2
        while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
                lackpgs=$(( $needpgs - $freepgs ))
@@ -107,8 +115,9 @@ fi
 echo "---------------------------"
 echo "running userfaultfd_hugetlb"
 echo "---------------------------"
-# 256MB total huge pages == 128MB src and 128MB dst
-./userfaultfd hugetlb 128 32 $mnt/ufd_test_file
+# Test requires source and destination huge pages.  Size of source
+# (half_ufd_size_MB) is passed as argument to test.
+./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
 if [ $? -ne 0 ]; then
        echo "[FAIL]"
        exitcode=1
index 361466a2eaef34492e24588d4ef52d8aa68a53ac..ade443a8842106e49aabc1f2d36dc9d4f8b75b65 100644 (file)
@@ -95,6 +95,10 @@ asm (
        "int3\n\t"
        "vmcode_int80:\n\t"
        "int $0x80\n\t"
+       "vmcode_popf_hlt:\n\t"
+       "push %ax\n\t"
+       "popf\n\t"
+       "hlt\n\t"
        "vmcode_umip:\n\t"
        /* addressing via displacements */
        "smsw (2052)\n\t"
@@ -124,8 +128,8 @@ asm (
 
 extern unsigned char vmcode[], end_vmcode[];
 extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
-       vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_umip[],
-       vmcode_umip_str[], vmcode_umip_sldt[];
+       vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_popf_hlt[],
+       vmcode_umip[], vmcode_umip_str[], vmcode_umip_sldt[];
 
 /* Returns false if the test was skipped. */
 static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
@@ -175,7 +179,7 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
            (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
                printf("[OK]\tReturned correctly\n");
        } else {
-               printf("[FAIL]\tIncorrect return reason\n");
+               printf("[FAIL]\tIncorrect return reason (started at eip = 0x%lx, ended at eip = 0x%lx)\n", eip, v86->regs.eip);
                nerrs++;
        }
 
@@ -264,6 +268,9 @@ int main(void)
        v86.regs.ds = load_addr / 16;
        v86.regs.es = load_addr / 16;
 
+       /* Use the end of the page as our stack. */
+       v86.regs.esp = 4096;
+
        assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
 
        /* #BR -- should deliver SIG??? */
@@ -295,6 +302,23 @@ int main(void)
        v86.regs.eflags &= ~X86_EFLAGS_IF;
        do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
 
+       /* POPF with VIP set but IF clear: should not trap */
+       v86.regs.eflags = X86_EFLAGS_VIP;
+       v86.regs.eax = 0;
+       do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP set and IF clear");
+
+       /* POPF with VIP set and IF set: should trap */
+       v86.regs.eflags = X86_EFLAGS_VIP;
+       v86.regs.eax = X86_EFLAGS_IF;
+       do_test(&v86, vmcode_popf_hlt - vmcode, VM86_STI, 0, "POPF with VIP and IF set");
+
+       /* POPF with VIP clear and IF set: should not trap */
+       v86.regs.eflags = 0;
+       v86.regs.eax = X86_EFLAGS_IF;
+       do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP clear and IF set");
+
+       v86.regs.eflags = 0;
+
        /* INT3 -- should cause #BP */
        do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
 
@@ -318,7 +342,7 @@ int main(void)
        clearhandler(SIGSEGV);
 
        /* Make sure nothing explodes if we fork. */
-       if (fork() > 0)
+       if (fork() == 0)
                return 0;
 
        return (nerrs == 0 ? 0 : 1);
index be81621446f01cf020c68d690e9772e49e736baf..0b4f1cc2291c660df97a984065b4cdc3384e2c98 100644 (file)
@@ -450,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
                num_vsyscall_traps++;
 }
 
-static int test_native_vsyscall(void)
+static int test_emulation(void)
 {
        time_t tmp;
        bool is_native;
@@ -458,7 +458,7 @@ static int test_native_vsyscall(void)
        if (!vtime)
                return 0;
 
-       printf("[RUN]\tchecking for native vsyscall\n");
+       printf("[RUN]\tchecking that vsyscalls are emulated\n");
        sethandler(SIGTRAP, sigtrap, 0);
        set_eflags(get_eflags() | X86_EFLAGS_TF);
        vtime(&tmp);
@@ -474,11 +474,12 @@ static int test_native_vsyscall(void)
         */
        is_native = (num_vsyscall_traps > 1);
 
-       printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+       printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+              (is_native ? "FAIL" : "OK"),
               (is_native ? "native" : "emulated"),
               (int)num_vsyscall_traps);
 
-       return 0;
+       return is_native;
 }
 #endif
 
@@ -498,7 +499,7 @@ int main(int argc, char **argv)
        nerrs += test_vsys_r();
 
 #ifdef __x86_64__
-       nerrs += test_native_vsyscall();
+       nerrs += test_emulation();
 #endif
 
        return nerrs ? 1 : 0;
index 70268c0bec799c0ce85c2f27267e0ab514c5f852..282389eb204f40f5857fe34f5f4ddb4e8c6e2200 100644 (file)
@@ -36,6 +36,8 @@ static struct timecounter *timecounter;
 static unsigned int host_vtimer_irq;
 static u32 host_vtimer_irq_flags;
 
+static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
+
 static const struct kvm_irq_level default_ptimer_irq = {
        .irq    = 30,
        .level  = 1,
@@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void)
        return timecounter->cc->read(timecounter->cc);
 }
 
+static inline bool userspace_irqchip(struct kvm *kvm)
+{
+       return static_branch_unlikely(&userspace_irqchip_in_use) &&
+               unlikely(!irqchip_in_kernel(kvm));
+}
+
 static void soft_timer_start(struct hrtimer *hrt, u64 ns)
 {
        hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
@@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
                cancel_work_sync(work);
 }
 
-static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
-{
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
-       /*
-        * When using a userspace irqchip with the architected timers, we must
-        * prevent continuously exiting from the guest, and therefore mask the
-        * physical interrupt by disabling it on the host interrupt controller
-        * when the virtual level is high, such that the guest can make
-        * forward progress.  Once we detect the output level being
-        * de-asserted, we unmask the interrupt again so that we exit from the
-        * guest when the timer fires.
-        */
-       if (vtimer->irq.level)
-               disable_percpu_irq(host_vtimer_irq);
-       else
-               enable_percpu_irq(host_vtimer_irq, 0);
-}
-
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
        struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
        if (kvm_timer_should_fire(vtimer))
                kvm_timer_update_irq(vcpu, true, vtimer);
 
-       if (static_branch_unlikely(&userspace_irqchip_in_use) &&
-           unlikely(!irqchip_in_kernel(vcpu->kvm)))
-               kvm_vtimer_update_mask_user(vcpu);
+       if (userspace_irqchip(vcpu->kvm) &&
+           !static_branch_unlikely(&has_gic_active_state))
+               disable_percpu_irq(host_vtimer_irq);
 
        return IRQ_HANDLED;
 }
@@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
        trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
                                   timer_ctx->irq.level);
 
-       if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
-           likely(irqchip_in_kernel(vcpu->kvm))) {
+       if (!userspace_irqchip(vcpu->kvm)) {
                ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
                                          timer_ctx->irq.irq,
                                          timer_ctx->irq.level,
@@ -350,12 +338,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
        phys_timer_emulate(vcpu);
 }
 
-static void __timer_snapshot_state(struct arch_timer_context *timer)
-{
-       timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-       timer->cnt_cval = read_sysreg_el0(cntv_cval);
-}
-
 static void vtimer_save_state(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -367,8 +349,10 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
        if (!vtimer->loaded)
                goto out;
 
-       if (timer->enabled)
-               __timer_snapshot_state(vtimer);
+       if (timer->enabled) {
+               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+               vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+       }
 
        /* Disable the virtual timer */
        write_sysreg_el0(0, cntv_ctl);
@@ -460,23 +444,43 @@ static void set_cntvoff(u64 cntvoff)
        kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
 }
 
-static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
+static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
+{
+       int r;
+       r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
+       WARN_ON(r);
+}
+
+static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
        bool phys_active;
-       int ret;
-
-       phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
 
-       ret = irq_set_irqchip_state(host_vtimer_irq,
-                                   IRQCHIP_STATE_ACTIVE,
-                                   phys_active);
-       WARN_ON(ret);
+       if (irqchip_in_kernel(vcpu->kvm))
+               phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+       else
+               phys_active = vtimer->irq.level;
+       set_vtimer_irq_phys_active(vcpu, phys_active);
 }
 
-static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
 {
-       kvm_vtimer_update_mask_user(vcpu);
+       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+       /*
+        * When using a userspace irqchip with the architected timers and a
+        * host interrupt controller that doesn't support an active state, we
+        * must still prevent continuously exiting from the guest, and
+        * therefore mask the physical interrupt by disabling it on the host
+        * interrupt controller when the virtual level is high, such that the
+        * guest can make forward progress.  Once we detect the output level
+        * being de-asserted, we unmask the interrupt again so that we exit
+        * from the guest when the timer fires.
+        */
+       if (vtimer->irq.level)
+               disable_percpu_irq(host_vtimer_irq);
+       else
+               enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 }
 
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
@@ -487,10 +491,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
        if (unlikely(!timer->enabled))
                return;
 
-       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
-               kvm_timer_vcpu_load_user(vcpu);
+       if (static_branch_likely(&has_gic_active_state))
+               kvm_timer_vcpu_load_gic(vcpu);
        else
-               kvm_timer_vcpu_load_vgic(vcpu);
+               kvm_timer_vcpu_load_nogic(vcpu);
 
        set_cntvoff(vtimer->cntvoff);
 
@@ -555,22 +559,29 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 
-       if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
-               __timer_snapshot_state(vtimer);
-               if (!kvm_timer_should_fire(vtimer)) {
-                       kvm_timer_update_irq(vcpu, false, vtimer);
-                       kvm_vtimer_update_mask_user(vcpu);
-               }
+       if (!kvm_timer_should_fire(vtimer)) {
+               kvm_timer_update_irq(vcpu, false, vtimer);
+               if (static_branch_likely(&has_gic_active_state))
+                       set_vtimer_irq_phys_active(vcpu, false);
+               else
+                       enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
        }
 }
 
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-       unmask_vtimer_irq_user(vcpu);
+       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+       if (unlikely(!timer->enabled))
+               return;
+
+       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+               unmask_vtimer_irq_user(vcpu);
 }
 
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 {
+       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
@@ -584,6 +595,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
        ptimer->cnt_ctl = 0;
        kvm_timer_update_state(vcpu);
 
+       if (timer->enabled && irqchip_in_kernel(vcpu->kvm))
+               kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq);
+
        return 0;
 }
 
@@ -753,9 +767,11 @@ int kvm_timer_hyp_init(bool has_gic)
                        kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
                        goto out_free_irq;
                }
+
+               static_branch_enable(&has_gic_active_state);
        }
 
-       kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
+       kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
 
        cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
                          "kvm/arm/timer:starting", kvm_timer_starting_cpu,
index 86941f6181bb0ee168918ce2436ed19c12dc4a07..53572304843b2e8e78d6010c6ba7d5003bb97ded 100644 (file)
@@ -384,14 +384,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
-       vcpu_load(vcpu);
-
        if (vcpu->arch.power_off)
                mp_state->mp_state = KVM_MP_STATE_STOPPED;
        else
                mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
 
-       vcpu_put(vcpu);
        return 0;
 }
 
@@ -400,8 +397,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 {
        int ret = 0;
 
-       vcpu_load(vcpu);
-
        switch (mp_state->mp_state) {
        case KVM_MP_STATE_RUNNABLE:
                vcpu->arch.power_off = false;
@@ -413,7 +408,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                ret = -EINVAL;
        }
 
-       vcpu_put(vcpu);
        return ret;
 }
 
@@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        struct kvm_device_attr attr;
        long r;
 
-       vcpu_load(vcpu);
-
        switch (ioctl) {
        case KVM_ARM_VCPU_INIT: {
                struct kvm_vcpu_init init;
@@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = -EINVAL;
        }
 
-       vcpu_put(vcpu);
        return r;
 }
 
index f5c3d6d7019ea63a7d2376c68f6c392cd2036aaf..b89ce5432214358e594c536bedf371a1b986e5ae 100644 (file)
@@ -215,7 +215,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
         * are now visible to the system register interface.
         */
        if (!cpu_if->vgic_sre) {
-               dsb(st);
+               dsb(sy);
+               isb();
                cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
        }
 
index ec62d1cccab7c92dddb4eea86f2be22f7d4d0adc..b960acdd0c0518c1b0ccae81f847e6a0f4094f4d 100644 (file)
@@ -1810,9 +1810,9 @@ int kvm_mmu_init(void)
         */
        BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
 
-       kvm_info("IDMAP page: %lx\n", hyp_idmap_start);
-       kvm_info("HYP VA range: %lx:%lx\n",
-                kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
+       kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
+       kvm_debug("HYP VA range: %lx:%lx\n",
+                 kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
 
        if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
            hyp_idmap_start <  kern_hyp_va(~0UL) &&
index 83d82bd7dc4e714f61669e6aaf9e937b67f5eea1..dbe99d635c80435ffd938999c4246ce6f45307c7 100644 (file)
@@ -113,9 +113,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
        /* Loop over all IRQs affected by this read */
        for (i = 0; i < len * 8; i++) {
                struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+               unsigned long flags;
 
+               spin_lock_irqsave(&irq->irq_lock, flags);
                if (irq_is_pending(irq))
                        value |= (1U << i);
+               spin_unlock_irqrestore(&irq->irq_lock, flags);
 
                vgic_put_irq(vcpu->kvm, irq);
        }
index c32d7b93ffd194313f8cc062d2960d6a04a38d16..29556f71b691fb401baf31be47367b7c49c696fd 100644 (file)
@@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void)
                vgic_v2_write_lr(i, 0);
 }
 
+void vgic_v2_set_npie(struct kvm_vcpu *vcpu)
+{
+       struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
+
+       cpuif->vgic_hcr |= GICH_HCR_NPIE;
+}
+
 void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
 {
        struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
@@ -64,7 +71,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
        int lr;
        unsigned long flags;
 
-       cpuif->vgic_hcr &= ~GICH_HCR_UIE;
+       cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE);
 
        for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
                u32 val = cpuif->vgic_lr[lr];
@@ -410,7 +417,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
        kvm_vgic_global_state.type = VGIC_V2;
        kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
 
-       kvm_info("vgic-v2@%llx\n", info->vctrl.start);
+       kvm_debug("vgic-v2@%llx\n", info->vctrl.start);
 
        return 0;
 out:
index 6b329414e57a3c16207ab8e5ad81a0cc002b7f51..0ff2006f37817c0ee9fc8887fecb6ea6dd2f30ad 100644 (file)
@@ -26,6 +26,13 @@ static bool group1_trap;
 static bool common_trap;
 static bool gicv4_enable;
 
+void vgic_v3_set_npie(struct kvm_vcpu *vcpu)
+{
+       struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
+
+       cpuif->vgic_hcr |= ICH_HCR_NPIE;
+}
+
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
 {
        struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -47,7 +54,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
        int lr;
        unsigned long flags;
 
-       cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+       cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE);
 
        for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
                u64 val = cpuif->vgic_lr[lr];
index c7c5ef190afa0c3984d5b9051f59ca72c696eb20..8201899126f6bf338247be84dc1b91cd390b608b 100644 (file)
@@ -495,6 +495,32 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
        return ret;
 }
 
+/**
+ * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
+ * @vcpu: The VCPU pointer
+ * @vintid: The INTID of the interrupt
+ *
+ * Reset the active and pending states of a mapped interrupt.  Kernel
+ * subsystems injecting mapped interrupts should reset their interrupt lines
+ * when we are doing a reset of the VM.
+ */
+void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
+{
+       struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
+       unsigned long flags;
+
+       if (!irq->hw)
+               goto out;
+
+       spin_lock_irqsave(&irq->irq_lock, flags);
+       irq->active = false;
+       irq->pending_latch = false;
+       irq->line_level = false;
+       spin_unlock_irqrestore(&irq->irq_lock, flags);
+out:
+       vgic_put_irq(vcpu->kvm, irq);
+}
+
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 {
        struct vgic_irq *irq;
@@ -684,22 +710,37 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
                vgic_v3_set_underflow(vcpu);
 }
 
+static inline void vgic_set_npie(struct kvm_vcpu *vcpu)
+{
+       if (kvm_vgic_global_state.type == VGIC_V2)
+               vgic_v2_set_npie(vcpu);
+       else
+               vgic_v3_set_npie(vcpu);
+}
+
 /* Requires the ap_list_lock to be held. */
-static int compute_ap_list_depth(struct kvm_vcpu *vcpu)
+static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
+                                bool *multi_sgi)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_irq *irq;
        int count = 0;
 
+       *multi_sgi = false;
+
        DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 
        list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
                spin_lock(&irq->irq_lock);
                /* GICv2 SGIs can count for more than one... */
-               if (vgic_irq_is_sgi(irq->intid) && irq->source)
-                       count += hweight8(irq->source);
-               else
+               if (vgic_irq_is_sgi(irq->intid) && irq->source) {
+                       int w = hweight8(irq->source);
+
+                       count += w;
+                       *multi_sgi |= (w > 1);
+               } else {
                        count++;
+               }
                spin_unlock(&irq->irq_lock);
        }
        return count;
@@ -710,28 +751,43 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_irq *irq;
-       int count = 0;
+       int count;
+       bool npie = false;
+       bool multi_sgi;
+       u8 prio = 0xff;
 
        DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 
-       if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr)
+       count = compute_ap_list_depth(vcpu, &multi_sgi);
+       if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
                vgic_sort_ap_list(vcpu);
 
+       count = 0;
+
        list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
                spin_lock(&irq->irq_lock);
 
-               if (unlikely(vgic_target_oracle(irq) != vcpu))
-                       goto next;
-
                /*
-                * If we get an SGI with multiple sources, try to get
-                * them in all at once.
+                * If we have multi-SGIs in the pipeline, we need to
+                * guarantee that they are all seen before any IRQ of
+                * lower priority. In that case, we need to filter out
+                * these interrupts by exiting early. This is easy as
+                * the AP list has been sorted already.
                 */
-               do {
+               if (multi_sgi && irq->priority > prio) {
+                       spin_unlock(&irq->irq_lock);
+                       break;
+               }
+
+               if (likely(vgic_target_oracle(irq) == vcpu)) {
                        vgic_populate_lr(vcpu, irq, count++);
-               } while (irq->source && count < kvm_vgic_global_state.nr_lr);
 
-next:
+                       if (irq->source) {
+                               npie = true;
+                               prio = irq->priority;
+                       }
+               }
+
                spin_unlock(&irq->irq_lock);
 
                if (count == kvm_vgic_global_state.nr_lr) {
@@ -742,6 +798,9 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
                }
        }
 
+       if (npie)
+               vgic_set_npie(vcpu);
+
        vcpu->arch.vgic_cpu.used_lrs = count;
 
        /* Nuke remaining LRs */
index 12c37b89f7a38212c5eec4a115b9e2bb20d28fdc..f5b8519e55463d298e05888b3f2c5a71cf1ee7d6 100644 (file)
@@ -96,6 +96,7 @@
 /* we only support 64 kB translation table page size */
 #define KVM_ITS_L1E_ADDR_MASK          GENMASK_ULL(51, 16)
 
+/* Requires the irq_lock to be held by the caller. */
 static inline bool irq_is_pending(struct vgic_irq *irq)
 {
        if (irq->config == VGIC_CONFIG_EDGE)
@@ -159,6 +160,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
+void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
                         int offset, u32 *val);
@@ -188,6 +190,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
+void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_enable(struct kvm_vcpu *vcpu);
index 4501e658e8d6fc97f39b54bceff8f8b1f36cb95a..65dea3ffef68ede4ca8d8e5bb7d9c2d3edd3352a 100644 (file)
@@ -969,8 +969,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
                /* Check for overlaps */
                r = -EEXIST;
                kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
-                       if ((slot->id >= KVM_USER_MEM_SLOTS) ||
-                           (slot->id == id))
+                       if (slot->id == id)
                                continue;
                        if (!((base_gfn + npages <= slot->base_gfn) ||
                              (base_gfn >= slot->base_gfn + slot->npages)))