Files
android_kernel_xiaomi_sm8450/net/ipv6/tcp_ipv6.c
Greg Kroah-Hartman 477f5e6b9e Merge 5.10.188 into android12-5.10-lts
Changes in 5.10.188
	media: atomisp: fix "variable dereferenced before check 'asd'"
	x86/smp: Use dedicated cache-line for mwait_play_dead()
	can: isotp: isotp_sendmsg(): fix return error fix on TX path
	video: imsttfb: check for ioremap() failures
	fbdev: imsttfb: Fix use after free bug in imsttfb_probe
	HID: wacom: Use ktime_t rather than int when dealing with timestamps
	HID: logitech-hidpp: add HIDPP_QUIRK_DELAYED_INIT for the T651.
	Revert "thermal/drivers/mediatek: Use devm_of_iomap to avoid resource leak in mtk_thermal_probe"
	scripts/tags.sh: Resolve gtags empty index generation
	drm/amdgpu: Validate VM ioctl flags.
	nubus: Partially revert proc_create_single_data() conversion
	fs: pipe: reveal missing function protoypes
	x86/resctrl: Only show tasks' pid in current pid namespace
	blk-iocost: use spin_lock_irqsave in adjust_inuse_and_calc_cost
	md/raid10: check slab-out-of-bounds in md_bitmap_get_counter
	md/raid10: fix overflow of md/safe_mode_delay
	md/raid10: fix wrong setting of max_corr_read_errors
	md/raid10: fix null-ptr-deref of mreplace in raid10_sync_request
	md/raid10: fix io loss while replacement replace rdev
	irqchip/jcore-aic: Kill use of irq_create_strict_mappings()
	irqchip/jcore-aic: Fix missing allocation of IRQ descriptors
	posix-timers: Prevent RT livelock in itimer_delete()
	tracing/timer: Add missing hrtimer modes to decode_hrtimer_mode().
	clocksource/drivers/cadence-ttc: Fix memory leak in ttc_timer_probe
	PM: domains: fix integer overflow issues in genpd_parse_state()
	perf/arm-cmn: Fix DTC reset
	powercap: RAPL: Fix CONFIG_IOSF_MBI dependency
	ARM: 9303/1: kprobes: avoid missing-declaration warnings
	cpufreq: intel_pstate: Fix energy_performance_preference for passive
	thermal/drivers/sun8i: Fix some error handling paths in sun8i_ths_probe()
	rcuscale: Console output claims too few grace periods
	rcuscale: Always log error message
	rcuscale: Move shutdown from wait_event() to wait_event_idle()
	rcu/rcuscale: Move rcu_scale_*() after kfree_scale_cleanup()
	rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading rcuscale
	perf/ibs: Fix interface via core pmu events
	x86/mm: Fix __swp_entry_to_pte() for Xen PV guests
	evm: Complete description of evm_inode_setattr()
	ima: Fix build warnings
	pstore/ram: Add check for kstrdup
	igc: Enable and fix RX hash usage by netstack
	wifi: ath9k: fix AR9003 mac hardware hang check register offset calculation
	wifi: ath9k: avoid referencing uninit memory in ath9k_wmi_ctrl_rx
	samples/bpf: Fix buffer overflow in tcp_basertt
	spi: spi-geni-qcom: Correct CS_TOGGLE bit in SPI_TRANS_CFG
	wifi: wilc1000: fix for absent RSN capabilities WFA testcase
	wifi: mwifiex: Fix the size of a memory allocation in mwifiex_ret_802_11_scan()
	bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE
	sctp: add bpf_bypass_getsockopt proto callback
	libbpf: fix offsetof() and container_of() to work with CO-RE
	nfc: constify several pointers to u8, char and sk_buff
	nfc: llcp: fix possible use of uninitialized variable in nfc_llcp_send_connect()
	bpftool: JIT limited misreported as negative value on aarch64
	regulator: core: Fix more error checking for debugfs_create_dir()
	regulator: core: Streamline debugfs operations
	wifi: orinoco: Fix an error handling path in spectrum_cs_probe()
	wifi: orinoco: Fix an error handling path in orinoco_cs_probe()
	wifi: atmel: Fix an error handling path in atmel_probe()
	wl3501_cs: Fix misspelling and provide missing documentation
	net: create netdev->dev_addr assignment helpers
	wl3501_cs: use eth_hw_addr_set()
	wifi: wl3501_cs: Fix an error handling path in wl3501_probe()
	wifi: ray_cs: Utilize strnlen() in parse_addr()
	wifi: ray_cs: Drop useless status variable in parse_addr()
	wifi: ray_cs: Fix an error handling path in ray_probe()
	wifi: ath9k: don't allow to overwrite ENDPOINT0 attributes
	wifi: rsi: Do not configure WoWlan in shutdown hook if not enabled
	wifi: rsi: Do not set MMC_PM_KEEP_POWER in shutdown
	watchdog/perf: define dummy watchdog_update_hrtimer_threshold() on correct config
	watchdog/perf: more properly prevent false positives with turbo modes
	kexec: fix a memory leak in crash_shrink_memory()
	memstick r592: make memstick_debug_get_tpc_name() static
	wifi: ath9k: Fix possible stall on ath9k_txq_list_has_key()
	rtnetlink: extend RTEXT_FILTER_SKIP_STATS to IFLA_VF_INFO
	wifi: iwlwifi: pull from TXQs with softirqs disabled
	wifi: cfg80211: rewrite merging of inherited elements
	wifi: ath9k: convert msecs to jiffies where needed
	igc: Fix race condition in PTP tx code
	net: stmmac: fix double serdes powerdown
	netlink: fix potential deadlock in netlink_set_err()
	netlink: do not hard code device address lenth in fdb dumps
	selftests: rtnetlink: remove netdevsim device after ipsec offload test
	gtp: Fix use-after-free in __gtp_encap_destroy().
	net: axienet: Move reset before 64-bit DMA detection
	sfc: fix crash when reading stats while NIC is resetting
	nfc: llcp: simplify llcp_sock_connect() error paths
	net: nfc: Fix use-after-free caused by nfc_llcp_find_local
	lib/ts_bm: reset initial match offset for every block of text
	netfilter: conntrack: dccp: copy entire header to stack buffer, not just basic one
	netfilter: nf_conntrack_sip: fix the ct_sip_parse_numerical_param() return value.
	ipvlan: Fix return value of ipvlan_queue_xmit()
	netlink: Add __sock_i_ino() for __netlink_diag_dump().
	radeon: avoid double free in ci_dpm_init()
	drm/amd/display: Explicitly specify update type per plane info change
	Input: drv260x - sleep between polling GO bit
	drm/bridge: tc358768: always enable HS video mode
	drm/bridge: tc358768: fix PLL parameters computation
	drm/bridge: tc358768: fix PLL target frequency
	drm/bridge: tc358768: fix TCLK_ZEROCNT computation
	drm/bridge: tc358768: Add atomic_get_input_bus_fmts() implementation
	drm/bridge: tc358768: fix TCLK_TRAILCNT computation
	drm/bridge: tc358768: fix THS_ZEROCNT computation
	drm/bridge: tc358768: fix TXTAGOCNT computation
	drm/bridge: tc358768: fix THS_TRAILCNT computation
	drm/vram-helper: fix function names in vram helper doc
	ARM: dts: BCM5301X: Drop "clock-names" from the SPI node
	ARM: dts: meson8b: correct uart_B and uart_C clock references
	Input: adxl34x - do not hardcode interrupt trigger type
	drm: sun4i_tcon: use devm_clk_get_enabled in `sun4i_tcon_init_clocks`
	drm/panel: sharp-ls043t1le01: adjust mode settings
	ARM: dts: stm32: Move ethernet MAC EEPROM from SoM to carrier boards
	bus: ti-sysc: Fix dispc quirk masking bool variables
	arm64: dts: microchip: sparx5: do not use PSCI on reference boards
	RDMA/bnxt_re: Disable/kill tasklet only if it is enabled
	RDMA/bnxt_re: Fix to remove unnecessary return labels
	RDMA/bnxt_re: Use unique names while registering interrupts
	RDMA/bnxt_re: Remove a redundant check inside bnxt_re_update_gid
	RDMA/bnxt_re: Fix to remove an unnecessary log
	ARM: dts: gta04: Move model property out of pinctrl node
	arm64: dts: qcom: msm8916: correct camss unit address
	arm64: dts: qcom: msm8994: correct SPMI unit address
	arm64: dts: qcom: msm8996: correct camss unit address
	drm/panel: simple: fix active size for Ampire AM-480272H3TMQW-T01H
	ARM: ep93xx: fix missing-prototype warnings
	ARM: omap2: fix missing tick_broadcast() prototype
	arm64: dts: qcom: apq8096: fix fixed regulator name property
	ARM: dts: stm32: Shorten the AV96 HDMI sound card name
	memory: brcmstb_dpfe: fix testing array offset after use
	ASoC: es8316: Increment max value for ALC Capture Target Volume control
	ASoC: es8316: Do not set rate constraints for unsupported MCLKs
	ARM: dts: meson8: correct uart_B and uart_C clock references
	soc/fsl/qe: fix usb.c build errors
	IB/hfi1: Use bitmap_zalloc() when applicable
	IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors
	IB/hfi1: Fix wrong mmu_node used for user SDMA packet after invalidate
	RDMA: Remove uverbs_ex_cmd_mask values that are linked to functions
	RDMA/hns: Fix coding style issues
	RDMA/hns: Use refcount_t APIs for HEM
	RDMA/hns: Clean the hardware related code for HEM
	RDMA/hns: Fix hns_roce_table_get return value
	ARM: dts: iwg20d-q7-common: Fix backlight pwm specifier
	arm64: dts: renesas: ulcb-kf: Remove flow control for SCIF1
	fbdev: omapfb: lcd_mipid: Fix an error handling path in mipid_spi_probe()
	arm64: dts: ti: k3-j7200: Fix physical address of pin
	ARM: dts: stm32: Fix audio routing on STM32MP15xx DHCOM PDK2
	ARM: dts: stm32: fix i2s endpoint format property for stm32mp15xx-dkx
	hwmon: (gsc-hwmon) fix fan pwm temperature scaling
	hwmon: (adm1275) enable adm1272 temperature reporting
	hwmon: (adm1275) Allow setting sample averaging
	hwmon: (pmbus/adm1275) Fix problems with temperature monitoring on ADM1272
	ARM: dts: BCM5301X: fix duplex-full => full-duplex
	drm/amdkfd: Fix potential deallocation of previously deallocated memory.
	drm/radeon: fix possible division-by-zero errors
	amdgpu: validate offset_in_bo of drm_amdgpu_gem_va
	RDMA/bnxt_re: wraparound mbox producer index
	RDMA/bnxt_re: Avoid calling wake_up threads from spin_lock context
	clk: imx: clk-imx8mn: fix memory leak in imx8mn_clocks_probe
	clk: imx: clk-imx8mp: improve error handling in imx8mp_clocks_probe()
	clk: tegra: tegra124-emc: Fix potential memory leak
	ALSA: ac97: Fix possible NULL dereference in snd_ac97_mixer
	drm/msm/dpu: do not enable color-management if DSPPs are not available
	drm/msm/dp: Free resources after unregistering them
	clk: vc5: check memory returned by kasprintf()
	clk: cdce925: check return value of kasprintf()
	clk: si5341: Allow different output VDD_SEL values
	clk: si5341: Add sysfs properties to allow checking/resetting device faults
	clk: si5341: return error if one synth clock registration fails
	clk: si5341: check return value of {devm_}kasprintf()
	clk: si5341: free unused memory on probe failure
	clk: keystone: sci-clk: check return value of kasprintf()
	clk: ti: clkctrl: check return value of kasprintf()
	drivers: meson: secure-pwrc: always enable DMA domain
	ovl: update of dentry revalidate flags after copy up
	ASoC: imx-audmix: check return value of devm_kasprintf()
	PCI: cadence: Fix Gen2 Link Retraining process
	scsi: qedf: Fix NULL dereference in error handling
	pinctrl: bcm2835: Handle gpiochip_add_pin_range() errors
	PCI/ASPM: Disable ASPM on MFD function removal to avoid use-after-free
	scsi: 3w-xxxx: Add error handling for initialization failure in tw_probe()
	PCI: pciehp: Cancel bringup sequence if card is not present
	PCI: ftpci100: Release the clock resources
	PCI: Add pci_clear_master() stub for non-CONFIG_PCI
	perf bench: Use unbuffered output when pipe/tee'ing to a file
	perf bench: Add missing setlocale() call to allow usage of %'d style formatting
	pinctrl: cherryview: Return correct value if pin in push-pull mode
	kcsan: Don't expect 64 bits atomic builtins from 32 bits architectures
	perf script: Fixup 'struct evsel_script' method prefix
	perf script: Fix allocation of evsel->priv related to per-event dump files
	perf dwarf-aux: Fix off-by-one in die_get_varname()
	pinctrl: at91-pio4: check return value of devm_kasprintf()
	powerpc/powernv/sriov: perform null check on iov before dereferencing iov
	mm: rename pud_page_vaddr to pud_pgtable and make it return pmd_t *
	mm: rename p4d_page_vaddr to p4d_pgtable and make it return pud_t *
	powerpc/book3s64/mm: Fix DirectMap stats in /proc/meminfo
	powerpc/mm/dax: Fix the condition when checking if altmap vmemap can cross-boundary
	hwrng: virtio - add an internal buffer
	hwrng: virtio - don't wait on cleanup
	hwrng: virtio - don't waste entropy
	hwrng: virtio - always add a pending request
	hwrng: virtio - Fix race on data_avail and actual data
	crypto: nx - fix build warnings when DEBUG_FS is not enabled
	modpost: fix section mismatch message for R_ARM_ABS32
	modpost: fix section mismatch message for R_ARM_{PC24,CALL,JUMP24}
	crypto: marvell/cesa - Fix type mismatch warning
	modpost: fix off by one in is_executable_section()
	ARC: define ASM_NL and __ALIGN(_STR) outside #ifdef __ASSEMBLY__ guard
	NFSv4.1: freeze the session table upon receiving NFS4ERR_BADSESSION
	dax: Fix dax_mapping_release() use after free
	dax: Introduce alloc_dev_dax_id()
	hwrng: st - keep clock enabled while hwrng is registered
	io_uring: ensure IOPOLL locks around deferred work
	USB: serial: option: add LARA-R6 01B PIDs
	usb: dwc3: gadget: Propagate core init errors to UDC during pullup
	phy: tegra: xusb: Clear the driver reference in usb-phy dev
	block: fix signed int overflow in Amiga partition support
	block: change all __u32 annotations to __be32 in affs_hardblocks.h
	SUNRPC: Fix UAF in svc_tcp_listen_data_ready()
	w1: w1_therm: fix locking behavior in convert_t
	w1: fix loop in w1_fini()
	sh: j2: Use ioremap() to translate device tree address into kernel memory
	serial: 8250: omap: Fix freeing of resources on failed register
	clk: qcom: gcc-ipq6018: Use floor ops for sdcc clocks
	media: usb: Check az6007_read() return value
	media: videodev2.h: Fix struct v4l2_input tuner index comment
	media: usb: siano: Fix warning due to null work_func_t function pointer
	clk: qcom: reset: Allow specifying custom reset delay
	clk: qcom: reset: support resetting multiple bits
	clk: qcom: ipq6018: fix networking resets
	usb: dwc3: qcom: Fix potential memory leak
	usb: gadget: u_serial: Add null pointer check in gserial_suspend
	extcon: Fix kernel doc of property fields to avoid warnings
	extcon: Fix kernel doc of property capability fields to avoid warnings
	usb: phy: phy-tahvo: fix memory leak in tahvo_usb_probe()
	usb: hide unused usbfs_notify_suspend/resume functions
	serial: 8250: lock port for stop_rx() in omap8250_irq()
	serial: 8250: lock port for UART_IER access in omap8250_irq()
	kernfs: fix missing kernfs_idr_lock to remove an ID from the IDR
	coresight: Fix loss of connection info when a module is unloaded
	mfd: rt5033: Drop rt5033-battery sub-device
	media: venus: helpers: Fix ALIGN() of non power of two
	media: atomisp: gmin_platform: fix out_len in gmin_get_config_dsm_var()
	KVM: s390: fix KVM_S390_GET_CMMA_BITS for GFNs in memslot holes
	usb: dwc3: qcom: Release the correct resources in dwc3_qcom_remove()
	usb: dwc3: qcom: Fix an error handling path in dwc3_qcom_probe()
	usb: common: usb-conn-gpio: Set last role to unknown before initial detection
	usb: dwc3-meson-g12a: Fix an error handling path in dwc3_meson_g12a_probe()
	mfd: intel-lpss: Add missing check for platform_get_resource
	Revert "usb: common: usb-conn-gpio: Set last role to unknown before initial detection"
	serial: 8250_omap: Use force_suspend and resume for system suspend
	test_firmware: return ENOMEM instead of ENOSPC on failed memory allocation
	mfd: stmfx: Fix error path in stmfx_chip_init
	mfd: stmfx: Nullify stmfx->vdd in case of error
	KVM: s390: vsie: fix the length of APCB bitmap
	mfd: stmpe: Only disable the regulators if they are enabled
	phy: tegra: xusb: check return value of devm_kzalloc()
	pwm: imx-tpm: force 'real_period' to be zero in suspend
	pwm: sysfs: Do not apply state to already disabled PWMs
	rtc: st-lpc: Release some resources in st_rtc_probe() in case of error
	media: cec: i2c: ch7322: also select REGMAP
	sctp: fix potential deadlock on &net->sctp.addr_wq_lock
	Add MODULE_FIRMWARE() for FIRMWARE_TG357766.
	net: dsa: vsc73xx: fix MTU configuration
	spi: bcm-qspi: return error if neither hif_mspi nor mspi is available
	mailbox: ti-msgmgr: Fill non-message tx data fields with 0x0
	f2fs: fix error path handling in truncate_dnode()
	octeontx2-af: Fix mapping for NIX block from CGX connection
	powerpc: allow PPC_EARLY_DEBUG_CPM only when SERIAL_CPM=y
	net: bridge: keep ports without IFF_UNICAST_FLT in BR_PROMISC mode
	tcp: annotate data races in __tcp_oow_rate_limited()
	xsk: Honor SO_BINDTODEVICE on bind
	net/sched: act_pedit: Add size check for TCA_PEDIT_PARMS_EX
	pptp: Fix fib lookup calls.
	net: dsa: tag_sja1105: fix MAC DA patching from meta frames
	s390/qeth: Fix vipa deletion
	sh: dma: Fix DMA channel offset calculation
	apparmor: fix missing error check for rhashtable_insert_fast
	i2c: xiic: Defer xiic_wakeup() and __xiic_start_xfer() in xiic_process()
	i2c: xiic: Don't try to handle more interrupt events after error
	ALSA: jack: Fix mutex call in snd_jack_report()
	i2c: qup: Add missing unwind goto in qup_i2c_probe()
	NFSD: add encoding of op_recall flag for write delegation
	io_uring: wait interruptibly for request completions on exit
	mmc: core: disable TRIM on Kingston EMMC04G-M627
	mmc: core: disable TRIM on Micron MTFC4GACAJCN-1M
	mmc: mmci: Set PROBE_PREFER_ASYNCHRONOUS
	mmc: sdhci: fix DMA configure compatibility issue when 64bit DMA mode is used.
	bcache: fixup btree_cache_wait list damage
	bcache: Remove unnecessary NULL point check in node allocations
	bcache: Fix __bch_btree_node_alloc to make the failure behavior consistent
	um: Use HOST_DIR for mrproper
	integrity: Fix possible multiple allocation in integrity_inode_get()
	autofs: use flexible array in ioctl structure
	shmem: use ramfs_kill_sb() for kill_sb method of ramfs-based tmpfs
	jffs2: reduce stack usage in jffs2_build_xattr_subsystem()
	fs: avoid empty option when generating legacy mount string
	ext4: Remove ext4 locking of moved directory
	Revert "f2fs: fix potential corruption when moving a directory"
	fs: Establish locking order for unrelated directories
	fs: Lock moved directories
	btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile
	btrfs: fix race when deleting quota root from the dirty cow roots list
	ASoC: mediatek: mt8173: Fix irq error path
	ASoC: mediatek: mt8173: Fix snd_soc_component_initialize error path
	ARM: orion5x: fix d2net gpio initialization
	leds: trigger: netdev: Recheck NETDEV_LED_MODE_LINKUP on dev rename
	fs: no need to check source
	fanotify: disallow mount/sb marks on kernel internal pseudo fs
	tpm, tpm_tis: Claim locality in interrupt handler
	selftests/bpf: Add verifier test for PTR_TO_MEM spill
	block: add overflow checks for Amiga partition support
	sh: pgtable-3level: Fix cast to pointer from integer of different size
	netfilter: nf_tables: use net_generic infra for transaction data
	netfilter: nf_tables: add rescheduling points during loop detection walks
	netfilter: nf_tables: incorrect error path handling with NFT_MSG_NEWRULE
	netfilter: nf_tables: fix chain binding transaction logic
	netfilter: nf_tables: add NFT_TRANS_PREPARE_ERROR to deal with bound set/chain
	netfilter: nf_tables: reject unbound anonymous set before commit phase
	netfilter: nf_tables: reject unbound chain set before commit phase
	netfilter: nftables: rename set element data activation/deactivation functions
	netfilter: nf_tables: drop map element references from preparation phase
	netfilter: nf_tables: unbind non-anonymous set if rule construction fails
	netfilter: nf_tables: fix scheduling-while-atomic splat
	netfilter: conntrack: Avoid nf_ct_helper_hash uses after free
	netfilter: nf_tables: do not ignore genmask when looking up chain by id
	netfilter: nf_tables: prevent OOB access in nft_byteorder_eval
	wireguard: queueing: use saner cpu selection wrapping
	wireguard: netlink: send staged packets when setting initial private key
	tty: serial: fsl_lpuart: add earlycon for imx8ulp platform
	rcu-tasks: Mark ->trc_reader_nesting data races
	rcu-tasks: Mark ->trc_reader_special.b.need_qs data races
	rcu-tasks: Simplify trc_read_check_handler() atomic operations
	block/partition: fix signedness issue for Amiga partitions
	io_uring: Use io_schedule* in cqring wait
	io_uring: add reschedule point to handle_tw_list()
	net: lan743x: Don't sleep in atomic context
	workqueue: clean up WORK_* constant types, clarify masking
	drm/panel: simple: Add connector_type for innolux_at043tn24
	drm/panel: simple: Add Powertip PH800480T013 drm_display_mode flags
	igc: Remove delay during TX ring configuration
	net/mlx5e: fix double free in mlx5e_destroy_flow_table
	net/mlx5e: Check for NOT_READY flag state after locking
	igc: set TP bit in 'supported' and 'advertising' fields of ethtool_link_ksettings
	scsi: qla2xxx: Fix error code in qla2x00_start_sp()
	net: mvneta: fix txq_map in case of txq_number==1
	net/sched: cls_fw: Fix improper refcount update leads to use-after-free
	gve: Set default duplex configuration to full
	ionic: remove WARN_ON to prevent panic_on_warn
	net: bgmac: postpone turning IRQs off to avoid SoC hangs
	net: prevent skb corruption on frag list segmentation
	icmp6: Fix null-ptr-deref of ip6_null_entry->rt6i_idev in icmp6_dev().
	udp6: fix udp6_ehashfn() typo
	ntb: idt: Fix error handling in idt_pci_driver_init()
	NTB: amd: Fix error handling in amd_ntb_pci_driver_init()
	ntb: intel: Fix error handling in intel_ntb_pci_driver_init()
	NTB: ntb_transport: fix possible memory leak while device_register() fails
	NTB: ntb_tool: Add check for devm_kcalloc
	ipv6/addrconf: fix a potential refcount underflow for idev
	platform/x86: wmi: remove unnecessary argument
	platform/x86: wmi: use guid_t and guid_equal()
	platform/x86: wmi: move variables
	platform/x86: wmi: Break possible infinite loop when parsing GUID
	igc: Fix launchtime before start of cycle
	igc: Fix inserting of empty frame for launchtime
	riscv: bpf: Move bpf_jit_alloc_exec() and bpf_jit_free_exec() to core
	riscv: bpf: Avoid breaking W^X
	bpf, riscv: Support riscv jit to provide bpf_line_info
	riscv, bpf: Fix inconsistent JIT image generation
	erofs: avoid infinite loop in z_erofs_do_read_page() when reading beyond EOF
	wifi: airo: avoid uninitialized warning in airo_get_rate()
	net/sched: flower: Ensure both minimum and maximum ports are specified
	netdevsim: fix uninitialized data in nsim_dev_trap_fa_cookie_write()
	net/sched: make psched_mtu() RTNL-less safe
	net/sched: sch_qfq: refactor parsing of netlink parameters
	net/sched: sch_qfq: account for stab overhead in qfq_enqueue
	nvme-pci: fix DMA direction of unmapping integrity data
	f2fs: fix to avoid NULL pointer dereference f2fs_write_end_io()
	pinctrl: amd: Fix mistake in handling clearing pins at startup
	pinctrl: amd: Detect internal GPIO0 debounce handling
	pinctrl: amd: Only use special debounce behavior for GPIO 0
	tpm: tpm_vtpm_proxy: fix a race condition in /dev/vtpmx creation
	mtd: rawnand: meson: fix unaligned DMA buffers handling
	net: bcmgenet: Ensure MDIO unregistration has clocks enabled
	powerpc: Fail build if using recordmcount with binutils v2.37
	misc: fastrpc: Create fastrpc scalar with correct buffer count
	erofs: fix compact 4B support for 16k block size
	MIPS: Loongson: Fix cpu_probe_loongson() again
	ext4: Fix reusing stale buffer heads from last failed mounting
	ext4: fix wrong unit use in ext4_mb_clear_bb
	ext4: get block from bh in ext4_free_blocks for fast commit replay
	ext4: fix wrong unit use in ext4_mb_new_blocks
	ext4: only update i_reserved_data_blocks on successful block allocation
	jfs: jfs_dmap: Validate db_l2nbperpage while mounting
	hwrng: imx-rngc - fix the timeout for init and self check
	PCI/PM: Avoid putting EloPOS E2/S2/H2 PCIe Ports in D3cold
	PCI: Add function 1 DMA alias quirk for Marvell 88SE9235
	PCI: qcom: Disable write access to read only registers for IP v2.3.3
	PCI: rockchip: Assert PCI Configuration Enable bit after probe
	PCI: rockchip: Write PCI Device ID to correct register
	PCI: rockchip: Add poll and timeout to wait for PHY PLLs to be locked
	PCI: rockchip: Fix legacy IRQ generation for RK3399 PCIe endpoint core
	PCI: rockchip: Use u32 variable to access 32-bit registers
	PCI: rockchip: Set address alignment for endpoint mode
	misc: pci_endpoint_test: Free IRQs before removing the device
	misc: pci_endpoint_test: Re-init completion for every test
	md/raid0: add discard support for the 'original' layout
	fs: dlm: return positive pid value for F_GETLK
	drm/atomic: Allow vblank-enabled + self-refresh "disable"
	drm/rockchip: vop: Leave vblank enabled in self-refresh
	drm/amd/display: Correct `DMUB_FW_VERSION` macro
	serial: atmel: don't enable IRQs prematurely
	tty: serial: samsung_tty: Fix a memory leak in s3c24xx_serial_getclk() in case of error
	tty: serial: samsung_tty: Fix a memory leak in s3c24xx_serial_getclk() when iterating clk
	firmware: stratix10-svc: Fix a potential resource leak in svc_create_memory_pool()
	ceph: don't let check_caps skip sending responses for revoke msgs
	xhci: Fix resume issue of some ZHAOXIN hosts
	xhci: Fix TRB prefetch issue of ZHAOXIN hosts
	xhci: Show ZHAOXIN xHCI root hub speed correctly
	meson saradc: fix clock divider mask length
	Revert "8250: add support for ASIX devices with a FIFO bug"
	s390/decompressor: fix misaligned symbol build error
	tracing/histograms: Add histograms to hist_vars if they have referenced variables
	samples: ftrace: Save required argument registers in sample trampolines
	net: ena: fix shift-out-of-bounds in exponential backoff
	ring-buffer: Fix deadloop issue on reading trace_pipe
	xtensa: ISS: fix call to split_if_spec
	tracing: Fix null pointer dereference in tracing_err_log_open()
	tracing/probes: Fix not to count error code to total length
	scsi: qla2xxx: Wait for io return on terminate rport
	scsi: qla2xxx: Array index may go out of bound
	scsi: qla2xxx: Fix buffer overrun
	scsi: qla2xxx: Fix potential NULL pointer dereference
	scsi: qla2xxx: Check valid rport returned by fc_bsg_to_rport()
	scsi: qla2xxx: Correct the index of array
	scsi: qla2xxx: Pointer may be dereferenced
	scsi: qla2xxx: Remove unused nvme_ls_waitq wait queue
	net/sched: sch_qfq: reintroduce lmax bound check for MTU
	RDMA/cma: Ensure rdma_addr_cancel() happens before issuing more requests
	drm/atomic: Fix potential use-after-free in nonblocking commits
	ALSA: hda/realtek - remove 3k pull low procedure
	ALSA: hda/realtek: Enable Mute LED on HP Laptop 15s-eq2xxx
	keys: Fix linking a duplicate key to a keyring's assoc_array
	perf probe: Add test for regression introduced by switch to die_get_decl_file()
	btrfs: fix warning when putting transaction with qgroups enabled after abort
	fuse: revalidate: don't invalidate if interrupted
	selftests: tc: set timeout to 15 minutes
	selftests: tc: add 'ct' action kconfig dep
	regmap: Drop initial version of maximum transfer length fixes
	regmap: Account for register length in SMBus I/O limits
	can: bcm: Fix UAF in bcm_proc_show()
	drm/client: Fix memory leak in drm_client_target_cloned
	drm/client: Fix memory leak in drm_client_modeset_probe
	ASoC: fsl_sai: Disable bit clock with transmitter
	ext4: correct inline offset when handling xattrs in inode body
	debugobjects: Recheck debug_objects_enabled before reporting
	nbd: Add the maximum limit of allocated index in nbd_dev_add
	md: fix data corruption for raid456 when reshape restart while grow up
	md/raid10: prevent soft lockup while flush writes
	posix-timers: Ensure timer ID search-loop limit is valid
	btrfs: add xxhash to fast checksum implementations
	ACPI: button: Add lid disable DMI quirk for Nextbook Ares 8A
	ACPI: video: Add backlight=native DMI quirk for Apple iMac11,3
	ACPI: video: Add backlight=native DMI quirk for Lenovo ThinkPad X131e (3371 AMD version)
	arm64: set __exception_irq_entry with __irq_entry as a default
	arm64: mm: fix VA-range sanity check
	sched/fair: Don't balance task to its current running CPU
	wifi: ath11k: fix registration of 6Ghz-only phy without the full channel range
	bpf: Address KCSAN report on bpf_lru_list
	devlink: report devlink_port_type_warn source device
	wifi: wext-core: Fix -Wstringop-overflow warning in ioctl_standard_iw_point()
	wifi: iwlwifi: mvm: avoid baid size integer overflow
	igb: Fix igb_down hung on surprise removal
	spi: bcm63xx: fix max prepend length
	fbdev: imxfb: warn about invalid left/right margin
	pinctrl: amd: Use amd_pinconf_set() for all config options
	net: ethernet: ti: cpsw_ale: Fix cpsw_ale_get_field()/cpsw_ale_set_field()
	bridge: Add extack warning when enabling STP in netns.
	iavf: Fix use-after-free in free_netdev
	iavf: Fix out-of-bounds when setting channels on remove
	security: keys: Modify mismatched function name
	octeontx2-pf: Dont allocate BPIDs for LBK interfaces
	tcp: annotate data-races around tcp_rsk(req)->ts_recent
	net: ipv4: Use kfree_sensitive instead of kfree
	net:ipv6: check return value of pskb_trim()
	Revert "tcp: avoid the lookup process failing to get sk in ehash table"
	fbdev: au1200fb: Fix missing IRQ check in au1200fb_drv_probe
	llc: Don't drop packet from non-root netns.
	netfilter: nf_tables: fix spurious set element insertion failure
	netfilter: nf_tables: can't schedule in nft_chain_validate
	netfilter: nft_set_pipapo: fix improper element removal
	netfilter: nf_tables: skip bound chain in netns release path
	netfilter: nf_tables: skip bound chain on rule flush
	tcp: annotate data-races around tp->tcp_tx_delay
	tcp: annotate data-races around tp->keepalive_time
	tcp: annotate data-races around tp->keepalive_intvl
	tcp: annotate data-races around tp->keepalive_probes
	net: Introduce net.ipv4.tcp_migrate_req.
	tcp: Fix data-races around sysctl_tcp_syn(ack)?_retries.
	tcp: annotate data-races around icsk->icsk_syn_retries
	tcp: annotate data-races around tp->linger2
	tcp: annotate data-races around rskq_defer_accept
	tcp: annotate data-races around tp->notsent_lowat
	tcp: annotate data-races around icsk->icsk_user_timeout
	tcp: annotate data-races around fastopenq.max_qlen
	net: phy: prevent stale pointer dereference in phy_init()
	tracing/histograms: Return an error if we fail to add histogram to hist_vars list
	tracing: Fix memory leak of iter->temp when reading trace_pipe
	ftrace: Store the order of pages allocated in ftrace_page
	ftrace: Fix possible warning on checking all pages used in ftrace_process_locs()
	Linux 5.10.188

Change-Id: Ibcc1adc43df5b8f649b12078eedd5d4f57de4578
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2023-08-03 11:23:27 +00:00

2242 lines
58 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* TCP over IPv6
* Linux INET6 implementation
*
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
* Based on:
* linux/net/ipv4/tcp.c
* linux/net/ipv4/tcp_input.c
* linux/net/ipv4/tcp_output.c
*
* Fixes:
* Hideaki YOSHIFUJI : sin6_scope_id support
* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
* a single port at the same time.
* YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
*/
#include <linux/bottom_half.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/jiffies.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/ipsec.h>
#include <linux/times.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
#include <linux/indirect_call_wrapper.h>
#include <net/tcp.h>
#include <net/ndisc.h>
#include <net/inet6_hashtables.h>
#include <net/inet6_connection_sock.h>
#include <net/ipv6.h>
#include <net/transp_v6.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/ip6_checksum.h>
#include <net/inet_ecn.h>
#include <net/protocol.h>
#include <net/xfrm.h>
#include <net/snmp.h>
#include <net/dsfield.h>
#include <net/timewait_sock.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
#include <net/busy_poll.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <crypto/hash.h>
#include <linux/scatterlist.h>
#include <trace/events/tcp.h>
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static const struct inet_connection_sock_af_ops ipv6_mapped;
const struct inet_connection_sock_af_ops ipv6_specific;
#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
#else
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
const struct in6_addr *addr,
int l3index)
{
return NULL;
}
#endif
/* Helper returning the inet6 address from a given tcp socket.
* It can be used in TCP stack instead of inet6_sk(sk).
* This avoids a dereference and allow compiler optimizations.
* It is a specialized version of inet6_sk_generic().
*/
static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
{
unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
rcu_assign_pointer(sk->sk_rx_dst, dst);
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
}
}
static u32 tcp_v6_init_seq(const struct sk_buff *skb)
{
return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32,
tcp_hdr(skb)->dest,
tcp_hdr(skb)->source);
}
static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
{
return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32);
}
static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* This check is replicated from tcp_v6_connect() and intended to
* prevent BPF program called below from accessing bytes that are out
* of the bound specified by user in addr_len.
*/
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
sock_owned_by_me(sk);
return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
int err;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
memset(&fl6, 0, sizeof(fl6));
if (np->sndflow) {
fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
IP6_ECN_flow_init(fl6.flowlabel);
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
struct ip6_flowlabel *flowlabel;
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (IS_ERR(flowlabel))
return -EINVAL;
fl6_sock_release(flowlabel);
}
}
/*
* connect() to INADDR_ANY means loopback (BSD'ism).
*/
if (ipv6_addr_any(&usin->sin6_addr)) {
if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
&usin->sin6_addr);
else
usin->sin6_addr = in6addr_loopback;
}
addr_type = ipv6_addr_type(&usin->sin6_addr);
if (addr_type & IPV6_ADDR_MULTICAST)
return -ENETUNREACH;
if (addr_type&IPV6_ADDR_LINKLOCAL) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
usin->sin6_scope_id) {
/* If interface is set while binding, indices
* must coincide.
*/
if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
return -EINVAL;
sk->sk_bound_dev_if = usin->sin6_scope_id;
}
/* Connect to link-local address requires an interface */
if (!sk->sk_bound_dev_if)
return -EINVAL;
}
if (tp->rx_opt.ts_recent_stamp &&
!ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
WRITE_ONCE(tp->write_seq, 0);
}
sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
/*
* TCP over IPv4
*/
if (addr_type & IPV6_ADDR_MAPPED) {
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
if (__ipv6_only_sock(sk))
return -ENETUNREACH;
sin.sin_family = AF_INET;
sin.sin_port = usin->sin6_port;
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
icsk->icsk_af_ops = &ipv6_mapped;
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, true);
sk->sk_backlog_rcv = tcp_v4_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
if (err) {
icsk->icsk_ext_hdr_len = exthdrlen;
icsk->icsk_af_ops = &ipv6_specific;
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, false);
sk->sk_backlog_rcv = tcp_v6_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv6_specific;
#endif
goto failure;
}
np->saddr = sk->sk_v6_rcv_saddr;
return err;
}
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
saddr = &sk->sk_v6_rcv_saddr;
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = saddr ? *saddr : np->saddr;
fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
fl6.flowi6_uid = sk->sk_uid;
opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto failure;
}
if (!saddr) {
saddr = &fl6.saddr;
sk->sk_v6_rcv_saddr = *saddr;
}
/* set the source address */
np->saddr = *saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
sk->sk_gso_type = SKB_GSO_TCPV6;
ip6_dst_store(sk, dst, NULL, NULL);
icsk->icsk_ext_hdr_len = 0;
if (opt)
icsk->icsk_ext_hdr_len = opt->opt_flen +
opt->opt_nflen;
tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
inet->inet_dport = usin->sin6_port;
tcp_set_state(sk, TCP_SYN_SENT);
err = inet6_hash_connect(tcp_death_row, sk);
if (err)
goto late_failure;
sk_set_txhash(sk);
if (likely(!tp->repair)) {
if (!tp->write_seq)
WRITE_ONCE(tp->write_seq,
secure_tcpv6_seq(np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport));
tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32);
}
if (tcp_fastopen_defer_connect(sk, &err))
return err;
if (err)
goto late_failure;
err = tcp_connect(sk);
if (err)
goto late_failure;
return 0;
late_failure:
tcp_set_state(sk, TCP_CLOSE);
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
failure:
inet->inet_dport = 0;
sk->sk_route_caps = 0;
return err;
}
static void tcp_v6_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
u32 mtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
/* Drop requests trying to increase our current mss.
* Check done in __ip6_rt_update_pmtu() is too late.
*/
if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
return;
dst = inet6_csk_update_pmtu(sk, mtu);
if (!dst)
return;
if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
tcp_sync_mss(sk, dst_mtu(dst));
tcp_simple_retransmit(sk);
}
}
static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
struct net *net = dev_net(skb->dev);
struct request_sock *fastopen;
struct ipv6_pinfo *np;
struct tcp_sock *tp;
__u32 seq, snd_una;
struct sock *sk;
bool fatal;
int err;
sk = __inet6_lookup_established(net, &tcp_hashinfo,
&hdr->daddr, th->dest,
&hdr->saddr, ntohs(th->source),
skb->dev->ifindex, inet6_sdif(skb));
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
return -ENOENT;
}
if (sk->sk_state == TCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
return 0;
}
seq = ntohl(th->seq);
fatal = icmpv6_err_convert(type, code, &err);
if (sk->sk_state == TCP_NEW_SYN_RECV) {
tcp_req_err(sk, seq, fatal);
return 0;
}
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
goto out;
if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto out;
}
tp = tcp_sk(sk);
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
fastopen = rcu_dereference(tp->fastopen_rsk);
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
np = tcp_inet6_sk(sk);
if (type == NDISC_REDIRECT) {
if (!sock_owned_by_user(sk)) {
struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
if (dst)
dst->ops->redirect(dst, sk, skb);
}
goto out;
}
if (type == ICMPV6_PKT_TOOBIG) {
u32 mtu = ntohl(info);
/* We are not interested in TCP_LISTEN and open_requests
* (SYN-ACKs send out by Linux are always <576bytes so
* they should go through unfragmented).
*/
if (sk->sk_state == TCP_LISTEN)
goto out;
if (!ip6_sk_accept_pmtu(sk))
goto out;
if (mtu < IPV6_MIN_MTU)
goto out;
WRITE_ONCE(tp->mtu_info, mtu);
if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk);
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
&sk->sk_tsq_flags))
sock_hold(sk);
goto out;
}
/* Might be for an request_sock */
switch (sk->sk_state) {
case TCP_SYN_SENT:
case TCP_SYN_RECV:
/* Only in fast or simultaneous open. If a fast open socket is
* already accepted it is treated as a connected one below.
*/
if (fastopen && !fastopen->sk)
break;
ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
tcp_done(sk);
} else
sk->sk_err_soft = err;
goto out;
case TCP_LISTEN:
break;
default:
/* check if this ICMP message allows revert of backoff.
* (see RFC 6069)
*/
if (!fastopen && type == ICMPV6_DEST_UNREACH &&
code == ICMPV6_NOROUTE)
tcp_ld_RTO_revert(sk, seq);
}
if (!sock_owned_by_user(sk) && np->recverr) {
sk->sk_err = err;
sk->sk_error_report(sk);
} else
sk->sk_err_soft = err;
out:
bh_unlock_sock(sk);
sock_put(sk);
return 0;
}
static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type,
struct sk_buff *syn_skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct ipv6_txoptions *opt;
struct flowi6 *fl6 = &fl->u.ip6;
struct sk_buff *skb;
int err = -ENOMEM;
u8 tclass;
/* First, grab a route. */
if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
IPPROTO_TCP)) == NULL)
goto done;
skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
if (skb) {
__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
&ireq->ir_v6_rmt_addr);
fl6->daddr = ireq->ir_v6_rmt_addr;
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(np->tclass & INET_ECN_MASK) :
np->tclass;
if (!INET_ECN_is_capable(tclass) &&
tcp_bpf_ca_needs_ecn((struct sock *)req))
tclass |= INET_ECN_ECT_0;
rcu_read_lock();
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
done:
return err;
}
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
kfree(inet_rsk(req)->ipv6_opt);
kfree_skb(inet_rsk(req)->pktopts);
}
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
const struct in6_addr *addr,
int l3index)
{
return tcp_md5_do_lookup(sk, l3index,
(union tcp_md5_addr *)addr, AF_INET6);
}
static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
int l3index;
l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
addr_sk->sk_bound_dev_if);
return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
l3index);
}
static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
sockptr_t optval, int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
int l3index = 0;
u8 prefixlen;
if (optlen < sizeof(cmd))
return -EINVAL;
if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
return -EFAULT;
if (sin6->sin6_family != AF_INET6)
return -EINVAL;
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
prefixlen > 32))
return -EINVAL;
} else {
prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
}
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
struct net_device *dev;
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
if (dev && netif_is_l3_master(dev))
l3index = dev->ifindex;
rcu_read_unlock();
/* ok to reference set/not set outside of rcu;
* right now device MUST be an L3 master
*/
if (!dev || !l3index)
return -EINVAL;
}
if (!cmd.tcpm_keylen) {
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
AF_INET, prefixlen,
l3index);
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
AF_INET6, prefixlen, l3index);
}
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
AF_INET, prefixlen, l3index,
cmd.tcpm_key, cmd.tcpm_keylen,
GFP_KERNEL);
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
AF_INET6, prefixlen, l3index,
cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
const struct in6_addr *daddr,
const struct in6_addr *saddr,
const struct tcphdr *th, int nbytes)
{
struct tcp6_pseudohdr *bp;
struct scatterlist sg;
struct tcphdr *_th;
bp = hp->scratch;
/* 1. TCP pseudo-header (RFC2460) */
bp->saddr = *saddr;
bp->daddr = *daddr;
bp->protocol = cpu_to_be32(IPPROTO_TCP);
bp->len = cpu_to_be32(nbytes);
_th = (struct tcphdr *)(bp + 1);
memcpy(_th, th, sizeof(*th));
_th->check = 0;
sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
ahash_request_set_crypt(hp->md5_req, &sg, NULL,
sizeof(*bp) + sizeof(*th));
return crypto_ahash_update(hp->md5_req);
}
static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
const struct in6_addr *daddr, struct in6_addr *saddr,
const struct tcphdr *th)
{
struct tcp_md5sig_pool *hp;
struct ahash_request *req;
hp = tcp_get_md5sig_pool();
if (!hp)
goto clear_hash_noput;
req = hp->md5_req;
if (crypto_ahash_init(req))
goto clear_hash;
if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
ahash_request_set_crypt(req, NULL, md5_hash, 0);
if (crypto_ahash_final(req))
goto clear_hash;
tcp_put_md5sig_pool();
return 0;
clear_hash:
tcp_put_md5sig_pool();
clear_hash_noput:
memset(md5_hash, 0, 16);
return 1;
}
static int tcp_v6_md5_hash_skb(char *md5_hash,
const struct tcp_md5sig_key *key,
const struct sock *sk,
const struct sk_buff *skb)
{
const struct in6_addr *saddr, *daddr;
struct tcp_md5sig_pool *hp;
struct ahash_request *req;
const struct tcphdr *th = tcp_hdr(skb);
if (sk) { /* valid for establish/request sockets */
saddr = &sk->sk_v6_rcv_saddr;
daddr = &sk->sk_v6_daddr;
} else {
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
saddr = &ip6h->saddr;
daddr = &ip6h->daddr;
}
hp = tcp_get_md5sig_pool();
if (!hp)
goto clear_hash_noput;
req = hp->md5_req;
if (crypto_ahash_init(req))
goto clear_hash;
if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
goto clear_hash;
if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
ahash_request_set_crypt(req, NULL, md5_hash, 0);
if (crypto_ahash_final(req))
goto clear_hash;
tcp_put_md5sig_pool();
return 0;
clear_hash:
tcp_put_md5sig_pool();
clear_hash_noput:
memset(md5_hash, 0, 16);
return 1;
}
#endif
static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb,
int dif, int sdif)
{
#ifdef CONFIG_TCP_MD5SIG
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
int genhash, l3index;
u8 newhash[16];
/* sdif set, means packet ingressed via a device
* in an L3 domain and dif is set to the l3mdev
*/
l3index = sdif ? dif : 0;
hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
hash_location = tcp_parse_md5sig_option(th);
/* We've parsed the options - do we have a hash? */
if (!hash_expected && !hash_location)
return false;
if (hash_expected && !hash_location) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
return true;
}
if (!hash_expected && hash_location) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
return true;
}
/* check the signature */
genhash = tcp_v6_md5_hash_skb(newhash,
hash_expected,
NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
genhash ? "failed" : "mismatch",
&ip6h->saddr, ntohs(th->source),
&ip6h->daddr, ntohs(th->dest), l3index);
return true;
}
#endif
return false;
}
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
{
bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
/* So that link locals have meaning */
if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
(ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
np->rxopt.bits.rxohlim || np->repflow)) {
refcount_inc(&skb->users);
ireq->pktopts = skb;
}
}
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct flowi *fl,
const struct request_sock *req)
{
return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
}
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.family = AF_INET6,
.obj_size = sizeof(struct tcp6_request_sock),
.rtx_syn_ack = tcp_rtx_synack,
.send_ack = tcp_v6_reqsk_send_ack,
.destructor = tcp_v6_reqsk_destructor,
.send_reset = tcp_v6_send_reset,
.syn_ack_timeout = tcp_syn_ack_timeout,
};
const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
sizeof(struct ipv6hdr),
#ifdef CONFIG_TCP_MD5SIG
.req_md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
#endif
.init_req = tcp_v6_init_req,
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v6_init_sequence,
#endif
.route_req = tcp_v6_route_req,
.init_seq = tcp_v6_init_seq,
.init_ts_off = tcp_v6_init_ts_off,
.send_synack = tcp_v6_send_synack,
};
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
int oif, struct tcp_md5sig_key *key, int rst,
u8 tclass, __be32 label, u32 priority)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
struct sk_buff *buff;
struct flowi6 fl6;
struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
struct sock *ctl_sk = net->ipv6.tcp_sk;
unsigned int tot_len = sizeof(struct tcphdr);
struct dst_entry *dst;
__be32 *topt;
__u32 mark = 0;
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
if (key)
tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
GFP_ATOMIC);
if (!buff)
return;
skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
t1 = skb_push(buff, tot_len);
skb_reset_transport_header(buff);
/* Swap the send and the receive. */
memset(t1, 0, sizeof(*t1));
t1->dest = th->source;
t1->source = th->dest;
t1->doff = tot_len / 4;
t1->seq = htonl(seq);
t1->ack_seq = htonl(ack);
t1->ack = !rst || !th->ack;
t1->rst = rst;
t1->window = htons(win);
topt = (__be32 *)(t1 + 1);
if (tsecr) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
*topt++ = htonl(tsval);
*topt++ = htonl(tsecr);
}
#ifdef CONFIG_TCP_MD5SIG
if (key) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
tcp_v6_md5_hash_hdr((__u8 *)topt, key,
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, t1);
}
#endif
memset(&fl6, 0, sizeof(fl6));
fl6.daddr = ipv6_hdr(skb)->saddr;
fl6.saddr = ipv6_hdr(skb)->daddr;
fl6.flowlabel = label;
buff->ip_summed = CHECKSUM_PARTIAL;
buff->csum = 0;
__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
fl6.flowi6_proto = IPPROTO_TCP;
if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = tcp_v6_iif(skb);
else {
if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
oif = skb->skb_iif;
fl6.flowi6_oif = oif;
}
if (sk) {
if (sk->sk_state == TCP_TIME_WAIT) {
mark = inet_twsk(sk)->tw_mark;
/* autoflowlabel relies on buff->hash */
skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
PKT_HASH_TYPE_L4);
} else {
mark = sk->sk_mark;
}
buff->tstamp = tcp_transmit_time(sk);
}
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
/* Pass a socket to ip6_dst_lookup either it is for RST
* Underlying function will use this to retrieve the network
* namespace
*/
dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
tclass & ~INET_ECN_MASK, priority);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
return;
}
kfree_skb(buff);
}
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
u32 seq = 0, ack_seq = 0;
struct tcp_md5sig_key *key = NULL;
#ifdef CONFIG_TCP_MD5SIG
const __u8 *hash_location = NULL;
unsigned char newhash[16];
int genhash;
struct sock *sk1 = NULL;
#endif
__be32 label = 0;
u32 priority = 0;
struct net *net;
int oif = 0;
if (th->rst)
return;
/* If sk not NULL, it means we did a successful lookup and incoming
* route had to be correct. prequeue might have dropped our dst.
*/
if (!sk && !ipv6_unicast_destination(skb))
return;
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
hash_location = tcp_parse_md5sig_option(th);
if (sk && sk_fullsock(sk)) {
int l3index;
/* sdif set, means packet ingressed via a device
* in an L3 domain and inet_iif is set to it.
*/
l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
} else if (hash_location) {
int dif = tcp_v6_iif_l3_slave(skb);
int sdif = tcp_v6_sdif(skb);
int l3index;
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
* we are not loose security here:
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
sk1 = inet6_lookup_listener(net,
&tcp_hashinfo, NULL, 0,
&ipv6h->saddr,
th->source, &ipv6h->daddr,
ntohs(th->source), dif, sdif);
if (!sk1)
goto out;
/* sdif set, means packet ingressed via a device
* in an L3 domain and dif is set to it.
*/
l3index = tcp_v6_sdif(skb) ? dif : 0;
key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
if (!key)
goto out;
genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0)
goto out;
}
#endif
if (th->ack)
seq = ntohl(th->ack_seq);
else
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
if (sk) {
oif = sk->sk_bound_dev_if;
if (sk_fullsock(sk)) {
const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
trace_tcp_send_reset(sk, skb);
if (np->repflow)
label = ip6_flowlabel(ipv6h);
priority = sk->sk_priority;
}
if (sk->sk_state == TCP_TIME_WAIT) {
label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
priority = inet_twsk(sk)->tw_priority;
}
} else {
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
label = ip6_flowlabel(ipv6h);
}
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
ipv6_get_dsfield(ipv6h), label, priority);
#ifdef CONFIG_TCP_MD5SIG
out:
rcu_read_unlock();
#endif
}
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
__be32 label, u32 priority)
{
tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
tclass, label, priority);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
inet_twsk_put(tw);
}
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
int l3index;
l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
/* RFC 7323 2.3
* The window field (SEG.WND) of every outgoing segment, with the
* exception of <SYN> segments, MUST be right-shifted by
* Rcv.Wind.Shift bits:
*/
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
}
static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
{
#ifdef CONFIG_SYN_COOKIES
const struct tcphdr *th = tcp_hdr(skb);
if (!th->syn)
sk = cookie_v6_check(sk, skb);
#endif
return sk;
}
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
struct tcphdr *th, u32 *cookie)
{
u16 mss = 0;
#ifdef CONFIG_SYN_COOKIES
mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
&tcp_request_sock_ipv6_ops, sk, th);
if (mss) {
*cookie = __cookie_v6_init_sequence(iph, th, &mss);
tcp_synq_overflow(sk);
}
#endif
return mss;
}
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
if (!ipv6_unicast_destination(skb))
goto drop;
if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
return 0;
}
return tcp_conn_request(&tcp6_request_sock_ops,
&tcp_request_sock_ipv6_ops, sk, skb);
drop:
tcp_listendrop(sk);
return 0; /* don't send reset */
}
static void tcp_v6_restore_cb(struct sk_buff *skb)
{
/* We need to move header back to the beginning if xfrm6_policy_check()
* and tcp_v6_fill_cb() are going to be called again.
* ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
*/
memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
sizeof(struct inet6_skb_parm));
}
static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst,
struct request_sock *req_unhash,
bool *own_req)
{
struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct ipv6_txoptions *opt;
struct inet_sock *newinet;
bool found_dup_sk = false;
struct tcp_sock *newtp;
struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
int l3index;
#endif
struct flowi6 fl6;
if (skb->protocol == htons(ETH_P_IP)) {
/*
* v6 mapped
*/
newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
req_unhash, own_req);
if (!newsk)
return NULL;
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
newinet = inet_sk(newsk);
newnp = tcp_inet6_sk(newsk);
newtp = tcp_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
newnp->saddr = newsk->sk_v6_rcv_saddr;
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
if (sk_is_mptcp(newsk))
mptcpv6_handle_mapped(newsk, true);
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet_iif(skb);
newnp->mcast_hops = ip_hdr(skb)->ttl;
newnp->rcv_flowinfo = 0;
if (np->repflow)
newnp->flow_label = 0;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
* here, tcp_create_openreq_child now does this for us, see the comment in
* that function for the gory details. -acme
*/
/* It is tricky place. Until this moment IPv4 tcp
worked with IPv6 icsk.icsk_af_ops.
Sync it now.
*/
tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
return newsk;
}
ireq = inet_rsk(req);
if (sk_acceptq_is_full(sk))
goto out_overflow;
if (!dst) {
dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
if (!dst)
goto out;
}
newsk = tcp_create_openreq_child(sk, req, skb);
if (!newsk)
goto out_nonewsk;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks
* count here, tcp_create_openreq_child now does this for us, see the
* comment in that function for the gory details. -acme
*/
newsk->sk_gso_type = SKB_GSO_TCPV6;
ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
newnp = tcp_inet6_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
newnp->saddr = ireq->ir_v6_loc_addr;
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif;
/* Now IPv6 options...
First: no IPv4 options.
*/
newinet->inet_opt = NULL;
newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
if (np->repflow)
newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
/* Clone native IPv6 options from listening socket (if any)
Yes, keeping reference count would be much more clever,
but we make one more one thing there: reattach optmem
to newsk.
*/
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
if (opt) {
opt = ipv6_dup_options(newsk, opt);
RCU_INIT_POINTER(newnp->opt, opt);
}
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (opt)
inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
opt->opt_flen;
tcp_ca_openreq_child(newsk, dst);
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
tcp_initialize_rcv_mss(newsk);
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
#ifdef CONFIG_TCP_MD5SIG
l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
/* Copy over the MD5 key from the original socket */
key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
if (key) {
/* We're using one, so create a matching key
* on the newsk structure. If we fail to get
* memory, then we end up not copying the key
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
AF_INET6, 128, l3index, key->key, key->keylen,
sk_gfp_mask(sk, GFP_ATOMIC));
}
#endif
if (__inet_inherit_port(sk, newsk) < 0) {
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
goto out;
}
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
&found_dup_sk);
if (*own_req) {
tcp_move_syn(newtp, req);
/* Clone pktoptions received with SYN, if we own the req */
if (ireq->pktopts) {
newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
if (newnp->pktoptions)
tcp_v6_restore_cb(newnp->pktoptions);
}
} else {
if (!req_unhash && found_dup_sk) {
/* This code path should only be executed in the
* syncookie case only
*/
bh_unlock_sock(newsk);
sock_put(newsk);
newsk = NULL;
}
}
return newsk;
out_overflow:
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
dst_release(dst);
out:
tcp_listendrop(sk);
return NULL;
}
/* The socket must have it's spinlock held when we get
* here, unless it is a TCP_LISTEN socket.
*
* We have a potential double-lock case here, so even when
* doing backlog processing we use the BH locking scheme.
* This is because we cannot sleep with the original spinlock
* held.
*/
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct sk_buff *opt_skb = NULL;
struct tcp_sock *tp;
/* Imagine: socket is IPv6. IPv4 packet arrives,
goes to IPv4 receive handler and backlogged.
From backlog it always goes here. Kerboom...
Fortunately, tcp_rcv_established and rcv_established
handle them correctly, but it is not case with
tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
*/
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
/*
* socket locking is here for SMP purposes as backlog rcv
* is currently called with bh processing disabled.
*/
/* Do Stevens' IPV6_PKTOPTIONS.
Yes, guys, it is the only place in our code, where we
may make it not affecting IPv4.
The rest of code is protocol independent,
and I do not like idea to uglify IPv4.
Actually, all the idea behind IPV6_PKTOPTIONS
looks not very well thought. For now we latch
options, received in the last packet, enqueued
by tcp. Feel free to propose better solution.
--ANK (980728)
*/
if (np->rxopt.all)
opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst;
dst = rcu_dereference_protected(sk->sk_rx_dst,
lockdep_sock_is_held(sk));
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
dst_release(dst);
}
}
tcp_rcv_established(sk, skb);
if (opt_skb)
goto ipv6_pktoptions;
return 0;
}
if (tcp_checksum_complete(skb))
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
struct sock *nsk = tcp_v6_cookie_check(sk, skb);
if (!nsk)
goto discard;
if (nsk != sk) {
if (tcp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb)
__kfree_skb(opt_skb);
return 0;
}
} else
sock_rps_save_rxhash(sk, skb);
if (tcp_rcv_state_process(sk, skb))
goto reset;
if (opt_skb)
goto ipv6_pktoptions;
return 0;
reset:
tcp_v6_send_reset(sk, skb);
discard:
if (opt_skb)
__kfree_skb(opt_skb);
kfree_skb(skb);
return 0;
csum_err:
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
ipv6_pktoptions:
/* Do you ask, what is it?
1. skb was enqueued by tcp.
2. skb is added to tail of read queue, rather than out of order.
3. socket is not in passive state.
4. Finally, it really contains options, which user wants to receive.
*/
tp = tcp_sk(sk);
if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
np->mcast_oif = tcp_v6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
if (np->repflow)
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
tcp_v6_restore_cb(opt_skb);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
__kfree_skb(opt_skb);
opt_skb = xchg(&np->pktoptions, NULL);
}
}
kfree_skb(opt_skb);
return 0;
}
static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
const struct tcphdr *th)
{
/* This is tricky: we move IP6CB at its correct location into
* TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
* _decode_session6() uses IP6CB().
* barrier() makes sure compiler won't play aliasing games.
*/
memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
sizeof(struct inet6_skb_parm));
barrier();
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
}
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
struct sk_buff *skb_to_free;
int sdif = inet6_sdif(skb);
int dif = inet6_iif(skb);
const struct tcphdr *th;
const struct ipv6hdr *hdr;
bool refcounted;
struct sock *sk;
int ret;
struct net *net = dev_net(skb->dev);
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
/*
* Count it even if it's bad.
*/
__TCP_INC_STATS(net, TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
th = (const struct tcphdr *)skb->data;
if (unlikely(th->doff < sizeof(struct tcphdr)/4))
goto bad_packet;
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
goto csum_error;
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
lookup:
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
th->source, th->dest, inet6_iif(skb), sdif,
&refcounted);
if (!sk)
goto no_tcp_socket;
process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
bool req_stolen = false;
struct sock *nsk;
sk = req->rsk_listener;
if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
}
if (tcp_checksum_complete(skb)) {
reqsk_put(req);
goto csum_error;
}
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
sock_hold(sk);
refcounted = true;
nsk = NULL;
if (!tcp_filter(sk, skb)) {
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
}
if (!nsk) {
reqsk_put(req);
if (req_stolen) {
/* Another cpu got exclusive access to req
* and created a full blown socket.
* Try to feed this packet to this socket
* instead of discarding it.
*/
tcp_v6_restore_cb(skb);
sock_put(sk);
goto lookup;
}
goto discard_and_relse;
}
if (nsk == sk) {
reqsk_put(req);
tcp_v6_restore_cb(skb);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v6_send_reset(nsk, skb);
goto discard_and_relse;
} else {
sock_put(sk);
return 0;
}
}
if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
}
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
goto discard_and_relse;
if (tcp_filter(sk, skb))
goto discard_and_relse;
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
skb->dev = NULL;
if (sk->sk_state == TCP_LISTEN) {
ret = tcp_v6_do_rcv(sk, skb);
goto put_and_return;
}
sk_incoming_cpu_update(sk);
bh_lock_sock_nested(sk);
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
skb_to_free = sk->sk_rx_skb_cache;
sk->sk_rx_skb_cache = NULL;
ret = tcp_v6_do_rcv(sk, skb);
} else {
if (tcp_add_backlog(sk, skb))
goto discard_and_relse;
skb_to_free = NULL;
}
bh_unlock_sock(sk);
if (skb_to_free)
__kfree_skb(skb_to_free);
put_and_return:
if (refcounted)
sock_put(sk);
return ret ? -1 : 0;
no_tcp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
tcp_v6_fill_cb(skb, hdr, th);
if (tcp_checksum_complete(skb)) {
csum_error:
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
tcp_v6_send_reset(NULL, skb);
}
discard_it:
kfree_skb(skb);
return 0;
discard_and_relse:
sk_drops_add(sk, skb);
if (refcounted)
sock_put(sk);
goto discard_it;
do_time_wait:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
tcp_v6_fill_cb(skb, hdr, th);
if (tcp_checksum_complete(skb)) {
inet_twsk_put(inet_twsk(sk));
goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN:
{
struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest),
tcp_v6_iif_l3_slave(skb),
sdif);
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
inet_twsk_deschedule_put(tw);
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
goto process;
}
}
/* to ACK */
fallthrough;
case TCP_TW_ACK:
tcp_v6_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
tcp_v6_send_reset(sk, skb);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:
;
}
goto discard_it;
}
void tcp_v6_early_demux(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
const struct tcphdr *th;
struct sock *sk;
if (skb->pkt_type != PACKET_HOST)
return;
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
return;
hdr = ipv6_hdr(skb);
th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr) / 4)
return;
/* Note : We use inet6_iif() here, not tcp_v6_iif() */
sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
&hdr->saddr, th->source,
&hdr->daddr, ntohs(th->dest),
inet6_iif(skb), inet6_sdif(skb));
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
if (sk_fullsock(sk)) {
struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
if (dst &&
inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
}
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
.twsk_destructor = tcp_twsk_destructor,
};
INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
}
const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
.rebuild_header = inet6_sk_rebuild_header,
.sk_rx_dst_set = inet6_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
.net_header_len = sizeof(struct ipv6hdr),
.net_frag_header_len = sizeof(struct frag_hdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
.mtu_reduced = tcp_v6_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
.md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
.md5_parse = tcp_v6_parse_md5_keys,
};
#endif
/*
* TCP over IPv4 via INET6 API
*/
static const struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
.sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
.net_header_len = sizeof(struct iphdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
.mtu_reduced = tcp_v4_mtu_reduced,
};
#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_parse = tcp_v6_parse_md5_keys,
};
#endif
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
static int tcp_v6_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_init_sock(sk);
icsk->icsk_af_ops = &ipv6_specific;
#ifdef CONFIG_TCP_MD5SIG
tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
#endif
return 0;
}
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCPv6 sock list dumping. */
static void get_openreq6(struct seq_file *seq,
const struct request_sock *req, int i)
{
long ttd = req->rsk_timer.expires - jiffies;
const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
if (ttd < 0)
ttd = 0;
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3],
inet_rsk(req)->ir_num,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3],
ntohs(inet_rsk(req)->ir_rmt_port),
TCP_SYN_RECV,
0, 0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
jiffies_to_clock_t(ttd),
req->num_timeout,
from_kuid_munged(seq_user_ns(seq),
sock_i_uid(req->rsk_listener)),
0, /* non standard timer */
0, /* open_requests have no inode */
0, req);
}
static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
{
const struct in6_addr *dest, *src;
__u16 destp, srcp;
int timer_active;
unsigned long timer_expires;
const struct inet_sock *inet = inet_sk(sp);
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
int rx_queue;
int state;
dest = &sp->sk_v6_daddr;
src = &sp->sk_v6_rcv_saddr;
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
} else if (timer_pending(&sp->sk_timer)) {
timer_active = 2;
timer_expires = sp->sk_timer.expires;
} else {
timer_active = 0;
timer_expires = jiffies;
}
state = inet_sk_state_load(sp);
if (state == TCP_LISTEN)
rx_queue = READ_ONCE(sp->sk_ack_backlog);
else
/* Because we don't lock the socket,
* we might find a transient negative value.
*/
rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
READ_ONCE(tp->copied_seq), 0);
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
state,
READ_ONCE(tp->write_seq) - tp->snd_una,
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
icsk->icsk_probes_out,
sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
tp->snd_cwnd,
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
}
static void get_timewait6_sock(struct seq_file *seq,
struct inet_timewait_sock *tw, int i)
{
long delta = tw->tw_timer.expires - jiffies;
const struct in6_addr *dest, *src;
__u16 destp, srcp;
dest = &tw->tw_v6_daddr;
src = &tw->tw_v6_rcv_saddr;
destp = ntohs(tw->tw_dport);
srcp = ntohs(tw->tw_sport);
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
tw->tw_substate, 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
refcount_read(&tw->tw_refcnt), tw);
}
static int tcp6_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
struct sock *sk = v;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
" sl "
"local_address "
"remote_address "
"st tx_queue rx_queue tr tm->when retrnsmt"
" uid timeout inode\n");
goto out;
}
st = seq->private;
if (sk->sk_state == TCP_TIME_WAIT)
get_timewait6_sock(seq, v, st->num);
else if (sk->sk_state == TCP_NEW_SYN_RECV)
get_openreq6(seq, v, st->num);
else
get_tcp6_sock(seq, v, st->num);
out:
return 0;
}
static const struct seq_operations tcp6_seq_ops = {
.show = tcp6_seq_show,
.start = tcp_seq_start,
.next = tcp_seq_next,
.stop = tcp_seq_stop,
};
static struct tcp_seq_afinfo tcp6_seq_afinfo = {
.family = AF_INET6,
};
int __net_init tcp6_proc_init(struct net *net)
{
if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
return -ENOMEM;
return 0;
}
void tcp6_proc_exit(struct net *net)
{
remove_proc_entry("tcp6", net->proc_net);
}
#endif
struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
.close = tcp_close,
.pre_connect = tcp_v6_pre_connect,
.connect = tcp_v6_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v6_init_sock,
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
.bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
.hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
.no_autobind = true,
.diag_destroy = tcp_abort,
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
static const struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
static struct inet_protosw tcpv6_protosw = {
.type = SOCK_STREAM,
.protocol = IPPROTO_TCP,
.prot = &tcpv6_prot,
.ops = &inet6_stream_ops,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
};
static int __net_init tcpv6_net_init(struct net *net)
{
return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
SOCK_RAW, IPPROTO_TCP, net);
}
static void __net_exit tcpv6_net_exit(struct net *net)
{
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
}
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
{
inet_twsk_purge(&tcp_hashinfo, AF_INET6);
}
static struct pernet_operations tcpv6_net_ops = {
.init = tcpv6_net_init,
.exit = tcpv6_net_exit,
.exit_batch = tcpv6_net_exit_batch,
};
int __init tcpv6_init(void)
{
int ret;
ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
if (ret)
goto out;
/* register inet6 protocol */
ret = inet6_register_protosw(&tcpv6_protosw);
if (ret)
goto out_tcpv6_protocol;
ret = register_pernet_subsys(&tcpv6_net_ops);
if (ret)
goto out_tcpv6_protosw;
ret = mptcpv6_init();
if (ret)
goto out_tcpv6_pernet_subsys;
out:
return ret;
out_tcpv6_pernet_subsys:
unregister_pernet_subsys(&tcpv6_net_ops);
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
out_tcpv6_protocol:
inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
goto out;
}
void tcpv6_exit(void)
{
unregister_pernet_subsys(&tcpv6_net_ops);
inet6_unregister_protosw(&tcpv6_protosw);
inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
}