Files
android_kernel_xiaomi_sm8450/mm/rmap.c
Greg Kroah-Hartman 9c2a5eef8f Merge tag 'android12-5.10.117_r00' into 'android12-5.10'
This is the merge of the upstream LTS release of 5.10.117 into the
android12-5.10 branch.

It contains the following commits:

fdd06dc6b0 ANDROID: GKI: db845c: Update symbols list and ABI
0974b8411a Merge 5.10.117 into android12-5.10-lts
7686a5c2a8 Linux 5.10.117
937c6b0e3e SUNRPC: Fix fall-through warnings for Clang
29f077d070 io_uring: always use original task when preparing req identity
1444e0568b usb: gadget: uvc: allow for application to cleanly shutdown
42505e3622 usb: gadget: uvc: rename function to be more consistent
002e7223dc ping: fix address binding wrt vrf
d9a1e82bf6 arm[64]/memremap: don't abuse pfn_valid() to ensure presence of linear map
49750c5e9a net: phy: Fix race condition on link status change
e68b60ae29 SUNRPC: Ensure we flush any closed sockets before xs_xprt_free()
dbe6974a39 SUNRPC: Don't call connect() more than once on a TCP socket
47541ed4d4 SUNRPC: Prevent immediate close+reconnect
2ab569edd8 SUNRPC: Clean up scheduling of autoclose
85844ea29f drm/vmwgfx: Initialize drm_mode_fb_cmd2
7e849dbe60 cgroup/cpuset: Remove cpus_allowed/mems_allowed setup in cpuset_init_smp()
6aa239d82e net: atlantic: always deep reset on pm op, fixing up my null deref regression
6158df4fa5 i40e: i40e_main: fix a missing check on list iterator
819796024c drm/nouveau/tegra: Stop using iommu_present()
e06605af8b ceph: fix setting of xattrs on async created inodes
86db01f373 serial: 8250_mtk: Fix register address for XON/XOFF character
84ad84e495 serial: 8250_mtk: Fix UART_EFR register address
f8d8440f13 slimbus: qcom: Fix IRQ check in qcom_slim_probe
d7b7c5532a USB: serial: option: add Fibocom MA510 modem
2ba0034e36 USB: serial: option: add Fibocom L610 modem
319b312edb USB: serial: qcserial: add support for Sierra Wireless EM7590
994395f356 USB: serial: pl2303: add device id for HP LM930 Display
8276a3dbe2 usb: typec: tcpci_mt6360: Update for BMC PHY setting
54979aa49e usb: typec: tcpci: Don't skip cleanup in .remove() on error
7335a6b11d usb: cdc-wdm: fix reading stuck on device close
6d47eceaf3 tty: n_gsm: fix mux activation issues in gsm_config()
69139a45b8 tty/serial: digicolor: fix possible null-ptr-deref in digicolor_uart_probe()
5a73581116 firmware_loader: use kernel credentials when reading firmware
d254309aab tcp: resalt the secret every 10 seconds
3abbfac1ab net: sfp: Add tx-fault workaround for Huawei MA5671A SFP ONT
48f1dd67a8 net: emaclite: Don't advertise 1000BASE-T and do auto negotiation
5c09dbdfd4 s390: disable -Warray-bounds
03ebc6fd5c ASoC: ops: Validate input values in snd_soc_put_volsw_range()
31606a73ba ASoC: max98090: Generate notifications on changes for custom control
ce154bd3bc ASoC: max98090: Reject invalid values in custom control put()
5ecaaaeb2c hwmon: (f71882fg) Fix negative temperature
88091c0275 gfs2: Fix filesystem block deallocation for short writes
fccf4bf3f2 tls: Fix context leak on tls_device_down
161c4edeca net: sfc: ef10: fix memory leak in efx_ef10_mtd_probe()
d5e1b41bf7 net/smc: non blocking recvmsg() return -EAGAIN when no data and signal_pending
e417a8fcea net: dsa: bcm_sf2: Fix Wake-on-LAN with mac_link_down()
9012209f43 net: bcmgenet: Check for Wake-on-LAN interrupt probe deferral
abe35bf3be net/sched: act_pedit: really ensure the skb is writable
b816ed53f3 s390/lcs: fix variable dereferenced before check
4d3c6d7418 s390/ctcm: fix potential memory leak
5497f87edc s390/ctcm: fix variable dereferenced before check
cc71c9f17c selftests: vm: Makefile: rename TARGETS to VMTARGETS
ce12e5ff8d hwmon: (ltq-cputemp) restrict it to SOC_XWAY
ceb3db723f dim: initialize all struct fields
8b1b8fc819 ionic: fix missing pci_release_regions() on error in ionic_probe()
2cb8689f45 nfs: fix broken handling of the softreval mount option
49c10784b9 mac80211_hwsim: call ieee80211_tx_prepare_skb under RCU protection
79432d2237 net: sfc: fix memory leak due to ptp channel
bdb8d4aed1 sfc: Use swap() instead of open coding it
33c93f6e55 netlink: do not reset transport header in netlink_recvmsg()
9e40f2c513 drm/nouveau: Fix a potential theorical leak in nouveau_get_backlight_name()
54f26fc07e ipv4: drop dst in multicast routing path
c07a84492f net: mscc: ocelot: avoid corrupting hardware counters when moving VCAP filters
abb237c544 net: mscc: ocelot: restrict tc-trap actions to VCAP IS2 lookup 0
f9674c52a1 net: mscc: ocelot: fix VCAP IS2 filters matching on both lookups
c1184d2888 net: mscc: ocelot: fix last VCAP IS1/IS2 filter persisting in hardware when deleted
e2cdde89d2 net: Fix features skip in for_each_netdev_feature()
c420d66047 mac80211: Reset MBSSID parameters upon connection
9cbf2a7d5d hwmon: (tmp401) Add OF device ID table
85eba08be2 iwlwifi: iwl-dbg: Use del_timer_sync() before freeing
a6a73781b4 batman-adv: Don't skb_split skbuffs with frag_list
0577ff1c69 Merge 5.10.116 into android12-5.10-lts
3f70116e5f Merge 5.10.115 into android12-5.10-lts
07a4d3649a Linux 5.10.116
d1ac096f88 mm: userfaultfd: fix missing cache flush in mcopy_atomic_pte() and __mcopy_atomic()
c6cbf5431a mm: hugetlb: fix missing cache flush in copy_huge_page_from_user()
308ff6a6e7 mm: fix missing cache flush for all tail pages of compound page
185fa5984d Bluetooth: Fix the creation of hdev->name
9ff4a6b806 arm: remove CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
dfb55dcf9d nfp: bpf: silence bitwise vs. logical OR warning
f89f76f4b0 drm/amd/display/dc/gpio/gpio_service: Pass around correct dce_{version, environment} types
efd1429fa9 block: drbd: drbd_nl: Make conversion to 'enum drbd_ret_code' explicit
a71658c7db regulator: consumer: Add missing stubs to regulator/consumer.h
7648f42d1a MIPS: Use address-of operator on section symbols
2ed28105c6 ANDROID: GKI: update the abi .xml file due to hex_to_bin() changes
ee8877df71 Revert "tcp: ensure to use the most recently sent skb when filling the rate sample"
6273d79c86 Merge 5.10.114 into android12-5.10-lts
e61686bb77 Linux 5.10.115
8528806abe mmc: rtsx: add 74 Clocks in power on flow
e1ab92302b PCI: aardvark: Fix reading MSI interrupt number
49143c9ed2 PCI: aardvark: Clear all MSIs at setup
7676a5b99f dm: interlock pending dm_io and dm_wait_for_bios_completion
a439819f47 block-map: add __GFP_ZERO flag for alloc_page in function bio_copy_kern
a22d66eb51 rcu: Apply callbacks processing time limit only on softirq
40fb3812d9 rcu: Fix callbacks processing time limit retaining cond_resched()
43dbc3edad KVM: LAPIC: Enable timer posted-interrupt only when mwait/hlt is advertised
9c8474fa34 KVM: x86/mmu: avoid NULL-pointer dereference on page freeing bugs
a474ee5ece KVM: x86: Do not change ICR on write to APIC_SELF_IPI
64e3e16dbc x86/kvm: Preserve BSP MSR_KVM_POLL_CONTROL across suspend/resume
5f884e0c2e net/mlx5: Fix slab-out-of-bounds while reading resource dump menu
599fc32e74 kvm: x86/cpuid: Only provide CPUID leaf 0xA if host has architectural PMU
0a960a3672 net: igmp: respect RCU rules in ip_mc_source() and ip_mc_msfilter()
4fd45ef704 btrfs: always log symlinks in full mode
687167eef9 smsc911x: allow using IRQ0
b280877eab selftests: ocelot: tc_flower_chains: specify conform-exceed action for policer
a9fd5d6cd5 bnxt_en: Fix unnecessary dropping of RX packets
72e4fc1a4e bnxt_en: Fix possible bnxt_open() failure caused by wrong RFS flag
9ac9f07f0f selftests: mirror_gre_bridge_1q: Avoid changing PVID while interface is operational
475237e807 hinic: fix bug of wq out of bound access
1b9f1f455d net: emaclite: Add error handling for of_address_to_resource()
8459485db7 net: cpsw: add missing of_node_put() in cpsw_probe_dt()
4eee980950 net: stmmac: dwmac-sun8i: add missing of_node_put() in sun8i_dwmac_register_mdio_mux()
2347e9c922 net: dsa: mt7530: add missing of_node_put() in mt7530_setup()
1092656cc4 net: ethernet: mediatek: add missing of_node_put() in mtk_sgmii_init()
408fb2680e NFSv4: Don't invalidate inode attributes on delegation return
c1b480e6be RDMA/siw: Fix a condition race issue in MPA request processing
5bf2a45e33 selftests/seccomp: Don't call read() on TTY from background pgrp
3ea0b44c01 net/mlx5: Avoid double clear or set of sync reset requested
2455331591 net/mlx5e: Fix the calling of update_buffer_lossy() API
e07c13fbdd net/mlx5e: CT: Fix queued up restore put() executing after relevant ft release
d8338a7a09 net/mlx5e: Don't match double-vlan packets if cvlan is not set
c7f87ad115 net/mlx5e: Fix trust state reset in reload
87f0d9a518 ASoC: dmaengine: Restore NULL prepare_slave_config() callback
ad87f8498e hwmon: (adt7470) Fix warning on module removal
997b8605e8 gpio: pca953x: fix irq_stat not updated when irq is disabled (irq_mask not set)
879b075a9a NFC: netlink: fix sleep in atomic bug when firmware download timeout
1961c5a688 nfc: nfcmrvl: main: reorder destructive operations in nfcmrvl_nci_unregister_dev to avoid bugs
8a9e7c64f4 nfc: replace improper check device_is_registered() in netlink related functions
11adc9ab3e can: grcan: only use the NAPI poll budget for RX
4df5e498e0 can: grcan: grcan_probe(): fix broken system id check for errata workaround needs
dd973c0185 can: grcan: use ofdev->dev when allocating DMA memory
45bdcb5ca4 can: isotp: remove re-binding of bound socket
13959b9117 can: grcan: grcan_close(): fix deadlock
6c7c0e131e s390/dasd: Fix read inconsistency for ESE DASD devices
6e02c0413a s390/dasd: Fix read for ESE with blksize < 4k
ecc8396827 s390/dasd: prevent double format of tracks for ESE devices
30e008ab3f s390/dasd: fix data corruption for ESE devices
d53d47fadd ASoC: meson: Fix event generation for AUI CODEC mux
93a1f0755e ASoC: meson: Fix event generation for G12A tohdmi mux
e8b08e2f17 ASoC: meson: Fix event generation for AUI ACODEC mux
954d55170f ASoC: wm8958: Fix change notifications for DSP controls
f45359824a ASoC: da7219: Fix change notifications for tone generator frequency
e6e61aab49 genirq: Synchronize interrupt thread startup
dcf1150f2e net: stmmac: disable Split Header (SPH) for Intel platforms
68f35987d4 firewire: core: extend card->lock in fw_core_handle_bus_reset
629b4003a7 firewire: remove check of list iterator against head past the loop body
e757ff4bbc firewire: fix potential uaf in outbound_phy_packet_callback()
70d25d4fba Revert "SUNRPC: attempt AF_LOCAL connect on setup"
466721d767 drm/amd/display: Avoid reading audio pattern past AUDIO_CHANNELS_COUNT
2e6f3d665a iommu/vt-d: Calculate mask for non-aligned flushes
fbb7c61e76 KVM: x86/svm: Account for family 17h event renumberings in amd_pmc_perf_hw_id
b085afe226 gpiolib: of: fix bounds check for 'gpio-reserved-ranges'
2b7cb072d0 mmc: core: Set HS clock speed before sending HS CMD13
66651d7199 mmc: sdhci-msm: Reset GCC_SDCC_BCR register for SDHC
2906c73632 ALSA: fireworks: fix wrong return count shorter than expected by 4 bytes
03ab174805 ALSA: hda/realtek: Add quirk for Yoga Duet 7 13ITL6 speakers
a196f277c5 parisc: Merge model and model name into one line in /proc/cpuinfo
326f02f172 MIPS: Fix CP0 counter erratum detection for R4k CPUs
681997eca1 Revert "ipv6: make ip6_rt_gc_expire an atomic_t"
141fbd343b Revert "oom_kill.c: futex: delay the OOM reaper to allow time for proper futex cleanup"
ca9b002a16 Merge 5.10.113 into android12-5.10-lts
f64cd19a00 Merge branch 'android12-5.10' into `android12-5.10-lts`
f40e35e79c Linux 5.10.114
2d74f61787 perf symbol: Remove arch__symbols__fixup_end()
bf98302e68 tty: n_gsm: fix software flow control handling
95b267271a tty: n_gsm: fix incorrect UA handling
70b045d9ae tty: n_gsm: fix reset fifo race condition
320a24c4ef tty: n_gsm: fix wrong command frame length field encoding
935f314b6f tty: n_gsm: fix wrong command retry handling
17b86db43c tty: n_gsm: fix missing explicit ldisc flush
a2baa907c2 tty: n_gsm: fix wrong DLCI release order
705925e693 tty: n_gsm: fix insufficient txframe size
842a9bbbef netfilter: nft_socket: only do sk lookups when indev is available
7346e54dbf tty: n_gsm: fix malformed counter for out of frame data
d19613895e tty: n_gsm: fix wrong signal octet encoding in convergence layer type 2
26f127f6d9 tty: n_gsm: fix mux cleanup after unregister tty device
f26c271492 tty: n_gsm: fix decoupled mux resource
47132f9f7f tty: n_gsm: fix restart handling via CLD command
b3c88d46db perf symbol: Update symbols__fixup_end()
3d0a3168a3 perf symbol: Pass is_kallsyms to symbols__fixup_end()
2ab14625b8 x86/cpu: Load microcode during restore_processor_state()
795afbe8b4 thermal: int340x: Fix attr.show callback prototype
11d16498d7 net: ethernet: stmmac: fix write to sgmii_adapter_base
236dd62230 drm/i915: Fix SEL_FETCH_PLANE_*(PIPE_B+) register addresses
78d4dccf16 kasan: prevent cpu_quarantine corruption when CPU offline and cache shrink occur at same time
5fef6df273 zonefs: Clear inode information flags on inode creation
92ed64a920 zonefs: Fix management of open zones
42e8ec3b4b powerpc/perf: Fix 32bit compile
ac3d077043 drivers: net: hippi: Fix deadlock in rr_close()
5399e7b80c cifs: destage any unwritten data to the server before calling copychunk_write
80fc45377f x86: __memcpy_flushcache: fix wrong alignment if size > 2^32
585ef03c9e ext4: fix bug_on in start_this_handle during umount filesystem
07da0be588 ASoC: wm8731: Disable the regulator when probing fails
1b1747ad7e ASoC: Intel: soc-acpi: correct device endpoints for max98373
aa138efd2b tcp: fix F-RTO may not work correctly when receiving DSACK
9d56e369bd Revert "ibmvnic: Add ethtool private flag for driver-defined queue limits"
96904c8289 ibmvnic: fix miscellaneous checks
17f71272ef ixgbe: ensure IPsec VF<->PF compatibility
c33d717e06 net: fec: add missing of_node_put() in fec_enet_init_stop_mode()
9591967ac4 bnx2x: fix napi API usage sequence
1781beb879 tls: Skip tls_append_frag on zero copy size
77b922683e drm/amd/display: Fix memory leak in dcn21_clock_source_create
18068e0527 drm/amdkfd: Fix GWS queue count
c0396f5e5b net: dsa: lantiq_gswip: Don't set GSWIP_MII_CFG_RMII_CLK
1204386e26 net: phy: marvell10g: fix return value on error
e974c730f0 net: bcmgenet: hide status block before TX timestamping
ee71b47da5 clk: sunxi: sun9i-mmc: check return value after calling platform_get_resource()
8dacbef4fe bus: sunxi-rsb: Fix the return value of sunxi_rsb_device_create()
9f29f6f8da tcp: make sure treq->af_specific is initialized
8a9d6ca360 tcp: fix potential xmit stalls caused by TCP_NOTSENT_LOWAT
720b6ced85 ip_gre, ip6_gre: Fix race condition on o_seqno in collect_md mode
41661b4c1a ip6_gre: Make o_seqno start from 0 in native mode
7b187fbd7e ip_gre: Make o_seqno start from 0 in native mode
83d128daff net/smc: sync err code when tcp connection was refused
9eb25e00f5 net: hns3: add return value for mailbox handling in PF
929c30c02d net: hns3: add validity check for message data length
e3ec78d82d net: hns3: modify the return code of hclge_get_ring_chain_from_mbx
06a40e7105 cpufreq: fix memory leak in sun50i_cpufreq_nvmem_probe
fb172e93f8 pinctrl: pistachio: fix use of irq_of_parse_and_map()
8f042884af arm64: dts: imx8mn-ddr4-evk: Describe the 32.768 kHz PMIC clock
73c35379db ARM: dts: imx6ull-colibri: fix vqmmc regulator
61a89d0a5b sctp: check asoc strreset_chunk in sctp_generate_reconf_event
41d6ac687d wireguard: device: check for metadata_dst with skb_valid_dst()
3c464db03c tcp: ensure to use the most recently sent skb when filling the rate sample
ce4c3f7087 pinctrl: stm32: Keep pinctrl block clock enabled when LEVEL IRQ requested
0c60271df0 tcp: md5: incorrect tcp_header_len for incoming connections
f4dad5a48d pinctrl: rockchip: fix RK3308 pinmux bits
9ef33d23f8 bpf, lwt: Fix crash when using bpf_skb_set_tunnel_key() from bpf_xmit lwt hook
6ac03e6ddd netfilter: nft_set_rbtree: overlap detection with element re-addition after deletion
72ae15d5ce net: dsa: Add missing of_node_put() in dsa_port_link_register_of
14cc2044c1 memory: renesas-rpc-if: Fix HF/OSPI data transfer in Manual Mode
690c1bc4bf pinctrl: stm32: Do not call stm32_gpio_get() for edge triggered IRQs in EOI
6f2bf9c5dd mtd: fix 'part' field data corruption in mtd_info
4da421035b mtd: rawnand: Fix return value check of wait_for_completion_timeout
94ca69b702 pinctrl: mediatek: moore: Fix build error
123b7e0388 ipvs: correctly print the memory size of ip_vs_conn_tab
f4446f2136 ARM: dts: logicpd-som-lv: Fix wrong pinmuxing on OMAP35
4a526cc29c ARM: dts: am3517-evm: Fix misc pinmuxing
b622bca852 ARM: dts: Fix mmc order for omap3-gta04
9419d27fe1 phy: ti: Add missing pm_runtime_disable() in serdes_am654_probe
9e00a6e1fd phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probe
eb659608e6 ARM: dts: at91: sama5d4_xplained: fix pinctrl phandle name
bb524f5a95 ARM: dts: at91: Map MCLK for wm8731 on at91sam9g20ek
4691ce8f28 phy: ti: omap-usb2: Fix error handling in omap_usb2_enable_clocks
76d1591a38 bus: ti-sysc: Make omap3 gpt12 quirk handling SoC specific
1b9855bf31 ARM: OMAP2+: Fix refcount leak in omap_gic_of_init
93cc8f184e phy: samsung: exynos5250-sata: fix missing device put in probe error paths
3ca7491570 phy: samsung: Fix missing of_node_put() in exynos_sata_phy_probe
8f7644ac24 ARM: dts: imx6qdl-apalis: Fix sgtl5000 detection issue
23b0711fcd USB: Fix xhci event ring dequeue pointer ERDP update issue
712302aed1 mtd: rawnand: fix ecc parameters for mt7622
207c7af341 iio:imu:bmi160: disable regulator in error path
70d2df257e arm64: dts: meson: remove CPU opps below 1GHz for SM1 boards
2d320609be arm64: dts: meson: remove CPU opps below 1GHz for G12B boards
c4fb41bdf4 video: fbdev: udlfb: properly check endpoint type
0967830e72 iocost: don't reset the inuse weight of under-weighted debtors
ad604cbd1d x86/pci/xen: Disable PCI/MSI[-X] masking for XEN_HVM guests
8fcce58c59 riscv: patch_text: Fixup last cpu should be master
51477d3b38 hex2bin: fix access beyond string end
616d354fb9 hex2bin: make the function hex_to_bin constant-time
1633cb2d4a pinctrl: samsung: fix missing GPIOLIB on ARM64 Exynos config
bdc3ad9251 arch_topology: Do not set llc_sibling if llc_id is invalid
aaee3f6617 serial: 8250: Correct the clock for EndRun PTP/1588 PCIe device
662f945a20 serial: 8250: Also set sticky MCR bits in console restoration
8be962c89d serial: imx: fix overrun interrupts in DMA mode
d22d92230f usb: phy: generic: Get the vbus supply
b820764c64 usb: cdns3: Fix issue for clear halt endpoint
bd7f84708e usb: dwc3: gadget: Return proper request status
a633b8c341 usb: dwc3: core: Only handle soft-reset in DCTL
5fa59bb867 usb: dwc3: core: Fix tx/rx threshold settings
140801d3fb usb: dwc3: Try usb-role-switch first in dwc3_drd_init
4dd5feb279 usb: gadget: configfs: clear deactivation flag in configfs_composite_unbind()
6c3da0e19c usb: gadget: uvc: Fix crash when encoding data for usb request
fb1fe1a455 usb: typec: ucsi: Fix role swapping
06826eb063 usb: typec: ucsi: Fix reuse of completion structure
7b510d4bb4 usb: misc: fix improper handling of refcount in uss720_probe()
bb8ecca2dd iio: imu: inv_icm42600: Fix I2C init possible nack
ca2b54b6ad iio: magnetometer: ak8975: Fix the error handling in ak8975_power_on()
1060604fc7 iio: dac: ad5446: Fix read_raw not returning set value
6ff33c01be iio: dac: ad5592r: Fix the missing return value.
06ada9487f xhci: increase usb U3 -> U0 link resume timeout from 100ms to 500ms
e1be000166 xhci: stop polling roothubs after shutdown
2eb6c86891 xhci: Enable runtime PM on second Alderlake controller
63eda431b2 USB: serial: option: add Telit 0x1057, 0x1058, 0x1075 compositions
e9971dac69 USB: serial: option: add support for Cinterion MV32-WA/MV32-WB
34ff5455ee USB: serial: cp210x: add PIDs for Kamstrup USB Meter Reader
729a81ae10 USB: serial: whiteheat: fix heap overflow in WHITEHEAT_GET_DTR_RTS
008ba29f33 USB: quirks: add STRING quirk for VCOM device
ac6ad0ef83 USB: quirks: add a Realtek card reader
8ba02cebb7 usb: mtu3: fix USB 3.0 dual-role-switch from device to host
549209caab lightnvm: disable the subsystem
54c028cfc4 floppy: disable FDRAWCMD by default
de64d941a7 Merge 5.10.112 into android12-5.10-lts
54af9dd2b9 Linux 5.10.113
7992fdb045 Revert "net: micrel: fix KS8851_MLL Kconfig"
8bedbc8f7f block/compat_ioctl: fix range check in BLKGETSIZE
fea24b07ed staging: ion: Prevent incorrect reference counting behavour
dccee748af spi: atmel-quadspi: Fix the buswidth adjustment between spi-mem and controller
572761645b jbd2: fix a potential race while discarding reserved buffers after an abort
50aac44273 can: isotp: stop timeout monitoring when no first frame was sent
e1e96e3727 ext4: force overhead calculation if the s_overhead_cluster makes no sense
4789149b9e ext4: fix overhead calculation to account for the reserved gdt blocks
0c54b09376 ext4, doc: fix incorrect h_reserved size
22c450d39f ext4: limit length to bitmap_maxbytes - blocksize in punch_hole
75ac724684 ext4: fix use-after-free in ext4_search_dir
a46b3d8498 ext4: fix symlink file size not match to file content
f6038d43b2 ext4: fix fallocate to use file_modified to update permissions consistently
19590bbc69 perf report: Set PERF_SAMPLE_DATA_SRC bit for Arm SPE event
e012f9d1af powerpc/perf: Fix power9 event alternatives
0a2cef65b3 drm/vc4: Use pm_runtime_resume_and_get to fix pm_runtime_get_sync() usage
f8f8b3124b KVM: PPC: Fix TCE handling for VFIO
405d984274 drm/panel/raspberrypi-touchscreen: Initialise the bridge in prepare
231381f521 drm/panel/raspberrypi-touchscreen: Avoid NULL deref if not initialised
51d9cbbb0f perf/core: Fix perf_mmap fail when CONFIG_PERF_USE_VMALLOC enabled
88fcfd6ee6 sched/pelt: Fix attach_entity_load_avg() corner case
c55327bc37 arm_pmu: Validate single/group leader events
5580b974a8 ARC: entry: fix syscall_trace_exit argument
7082650eb8 e1000e: Fix possible overflow in LTR decoding
43a2a3734a ASoC: soc-dapm: fix two incorrect uses of list iterator
54e6180c8c gpio: Request interrupts after IRQ is initialized
0837ff17d0 openvswitch: fix OOB access in reserve_sfa_size()
19f6dcb1f0 xtensa: fix a7 clobbering in coprocessor context load/store
f399ab11dd xtensa: patch_text: Fixup last cpu should be master
ba2716da23 net: atlantic: invert deep par in pm functions, preventing null derefs
358a3846f6 dma: at_xdmac: fix a missing check on list iterator
cf23a960c5 ata: pata_marvell: Check the 'bmdma_addr' beforing reading
9ca66d7914 mm/mmu_notifier.c: fix race in mmu_interval_notifier_remove()
ed5d4efb4d oom_kill.c: futex: delay the OOM reaper to allow time for proper futex cleanup
6b932920b9 mm, hugetlb: allow for "high" userspace addresses
50cbc583fa EDAC/synopsys: Read the error count from the correct register
7ec6e06ee4 nvme-pci: disable namespace identifiers for Qemu controllers
316bd86c22 nvme: add a quirk to disable namespace identifiers
76101c8e0c stat: fix inconsistency between struct stat and struct compat_stat
bf28bba304 scsi: qedi: Fix failed disconnect handling
a284cca3d8 net: macb: Restart tx only if queue pointer is lagging
9581e07b54 drm/msm/mdp5: check the return of kzalloc()
8d71edabb0 dpaa_eth: Fix missing of_node_put in dpaa_get_ts_info()
b3afe5a7fd brcmfmac: sdio: Fix undefined behavior due to shift overflowing the constant
202748f441 mt76: Fix undefined behavior due to shift overflowing the constant
0de9c104d0 net: atlantic: Avoid out-of-bounds indexing
5bef9fc38f cifs: Check the IOCB_DIRECT flag, not O_DIRECT
e129c55153 vxlan: fix error return code in vxlan_fdb_append
8e7ea11364 arm64: dts: imx: Fix imx8*-var-som touchscreen property sizes
cd227ac03f ALSA: usb-audio: Fix undefined behavior due to shift overflowing the constant
490815f0b5 platform/x86: samsung-laptop: Fix an unsigned comparison which can never be negative
cb17b56a9b reset: tegra-bpmp: Restore Handle errors in BPMP response
d513ea9b7e ARM: vexpress/spc: Avoid negative array index when !SMP
052e4a661f arm64: mm: fix p?d_leaf()
18ff7a2efa arm64/mm: Remove [PUD|PMD]_TABLE_BIT from [pud|pmd]_bad()
3bf8ca3501 selftests: mlxsw: vxlan_flooding: Prevent flooding of unwanted packets
520aab8b72 dmaengine: idxd: add RO check for wq max_transfer_size write
9a3c026dc3 dmaengine: idxd: add RO check for wq max_batch_size write
f593f49fcd net: stmmac: Use readl_poll_timeout_atomic() in atomic state
3d55b19574 netlink: reset network and mac headers in netlink_dump()
49516e6ed9 ipv6: make ip6_rt_gc_expire an atomic_t
078d839f11 l3mdev: l3mdev_master_upper_ifindex_by_index_rcu should be using netdev_master_upper_dev_get_rcu
0ac8f83d8f net/sched: cls_u32: fix possible leak in u32_init_knode()
93366275be ip6_gre: Fix skb_under_panic in __gre6_xmit()
200f96ebb3 ip6_gre: Avoid updating tunnel->tun_hlen in __gre6_xmit()
8fb76adb89 net/packet: fix packet_sock xmit return value checking
a499cb5f3e net/smc: Fix sock leak when release after smc_shutdown()
60592f16a4 rxrpc: Restore removed timer deletion
fc7116a79a igc: Fix BUG: scheduling while atomic
46b0e4f998 igc: Fix infinite loop in release_swfw_sync
c075c3ea03 esp: limit skb_page_frag_refill use to a single page
3f7914dbea spi: spi-mtk-nor: initialize spi controller after resume
f714abf28f dmaengine: mediatek:Fix PM usage reference leak of mtk_uart_apdma_alloc_chan_resources
9bc949a181 dmaengine: imx-sdma: Fix error checking in sdma_event_remap
12aa8021c7 ASoC: codecs: wcd934x: do not switch off SIDO Buck when codec is in use
b6f474cd30 ASoC: msm8916-wcd-digital: Check failure for devm_snd_soc_register_component
608fc58858 ASoC: atmel: Remove system clock tree configuration for at91sam9g20ek
d29c78d3f9 dm: fix mempool NULL pointer race when completing IO
cf9b195464 ALSA: hda/realtek: Add quirk for Clevo NP70PNP
8ce3820fc9 ALSA: usb-audio: Clear MIDI port active flag after draining
43ce33a68e net/sched: cls_u32: fix netns refcount changes in u32_change()
04dd45d977 gfs2: assign rgrp glock before compute_bitstructs
378061c9b8 perf tools: Fix segfault accessing sample_id xyarray
5e8446e382 tracing: Dump stacktrace trigger to the corresponding instance
69848f9488 mm: page_alloc: fix building error on -Werror=array-compare
08ad7a770e etherdevice: Adjust ether_addr* prototypes to silence -Wstringop-overead
904c5c08bb ANDROID: fix up gpio change in 5.10.111
5dadf6321c Merge 5.10.111 into android12-5.10-lts
1052f9bce6 Linux 5.10.112
5c62d3bf14 ax25: Fix UAF bugs in ax25 timers
f934fa478d ax25: Fix NULL pointer dereferences in ax25 timers
145ea8d213 ax25: fix NPD bug in ax25_disconnect
a4942c6fea ax25: fix UAF bug in ax25_send_control()
b20a5ab0f5 ax25: Fix refcount leaks caused by ax25_cb_del()
57cc15f5fd ax25: fix UAF bugs of net_device caused by rebinding operation
5ddae8d064 ax25: fix reference count leaks of ax25_dev
5ea00fc606 ax25: add refcount in ax25_dev to avoid UAF bugs
361288633b scsi: iscsi: Fix unbound endpoint error handling
129db30599 scsi: iscsi: Fix endpoint reuse regression
26f827e095 dma-direct: avoid redundant memory sync for swiotlb
9a5a4d23e2 timers: Fix warning condition in __run_timers()
84837f43e5 i2c: pasemi: Wait for write xfers to finish
89496d80bf smp: Fix offline cpu check in flush_smp_call_function_queue()
cd02b2687d dm integrity: fix memory corruption when tag_size is less than digest size
0a312ec66a ARM: davinci: da850-evm: Avoid NULL pointer dereference
0806f19305 tick/nohz: Use WARN_ON_ONCE() to prevent console saturation
0275c75955 genirq/affinity: Consider that CPUs on nodes can be unbalanced
1fcfe37d17 drm/amdgpu: Enable gfxoff quirk on MacBook Pro
68ae52efa1 drm/amd/display: don't ignore alpha property on pre-multiplied mode
a263712ba8 ipv6: fix panic when forwarding a pkt with no in6 dev
659214603b nl80211: correctly check NL80211_ATTR_REG_ALPHA2 size
912797e54c ALSA: pcm: Test for "silence" field in struct "pcm_format_data"
48d070ca5e ALSA: hda/realtek: add quirk for Lenovo Thinkpad X12 speakers
163e162471 ALSA: hda/realtek: Add quirk for Clevo PD50PNT
5e4dd17998 btrfs: mark resumed async balance as writing
1d2eda18f6 btrfs: fix root ref counts in error handling in btrfs_get_root_ref
9b7ec35253 ath9k: Fix usage of driver-private space in tx_info
0f65cedae5 ath9k: Properly clear TX status area before reporting to mac80211
cc21ae9326 gcc-plugins: latent_entropy: use /dev/urandom
c089ffc846 memory: renesas-rpc-if: fix platform-device leak in error path
342454231e KVM: x86/mmu: Resolve nx_huge_pages when kvm.ko is loaded
06c348fde5 mm: kmemleak: take a full lowmem check in kmemleak_*_phys()
20ed94f818 mm: fix unexpected zeroed page mapping with zram swap
192e507ef8 mm, page_alloc: fix build_zonerefs_node()
000b3921b4 perf/imx_ddr: Fix undefined behavior due to shift overflowing the constant
ca24c5e8f0 drivers: net: slip: fix NPD bug in sl_tx_timeout()
e8cf1e4d95 scsi: megaraid_sas: Target with invalid LUN ID is deleted during scan
5b7ce74b6b scsi: mvsas: Add PCI ID of RocketRaid 2640
4b44cd5840 drm/amd/display: Fix allocate_mst_payload assert on resume
34ea097fb6 drm/amd/display: Revert FEC check in validation
fa5ee7c423 myri10ge: fix an incorrect free for skb in myri10ge_sw_tso
d90df6da50 net: usb: aqc111: Fix out-of-bounds accesses in RX fixup
9c12fcf1d8 net: axienet: setup mdio unconditionally
b643807a73 tlb: hugetlb: Add more sizes to tlb_remove_huge_tlb_entry
98973d2bdd arm64: alternatives: mark patch_alternative() as `noinstr`
2462faffbf regulator: wm8994: Add an off-on delay for WM8994 variant
aa8cdedaf7 gpu: ipu-v3: Fix dev_dbg frequency output
150fe861c5 ata: libata-core: Disable READ LOG DMA EXT for Samsung 840 EVOs
1ff5359afa net: micrel: fix KS8851_MLL Kconfig
d3478709ed scsi: ibmvscsis: Increase INITIAL_SRP_LIMIT to 1024
b9a110fa75 scsi: lpfc: Fix queue failures when recovering from PCI parity error
aec36b98a1 scsi: target: tcmu: Fix possible page UAF
4366679805 Drivers: hv: vmbus: Prevent load re-ordering when reading ring buffer
1d7a5aae88 drm/amdkfd: Check for potential null return of kmalloc_array()
e5afacc826 drm/amdgpu/vcn: improve vcn dpg stop procedure
d2e0931e6d drm/amdkfd: Fix Incorrect VMIDs passed to HWS
7fc0610ad8 drm/amd/display: Update VTEM Infopacket definition
6906e05cf3 drm/amd/display: FEC check in timing validation
756c61c168 drm/amd/display: fix audio format not updated after edid updated
76e086ce7b btrfs: do not warn for free space inode in cow_file_range
217190dc66 btrfs: fix fallocate to use file_modified to update permissions consistently
9b5d1b3413 drm/amd: Add USBC connector ID
6f9c06501d net: bcmgenet: Revert "Use stronger register read/writes to assure ordering"
504c15f07f dm mpath: only use ktime_get_ns() in historical selector
4e166a4118 cifs: potential buffer overflow in handling symlinks
67677050ce nfc: nci: add flush_workqueue to prevent uaf
bfba9722cf perf tools: Fix misleading add event PMU debug message
280f721edc testing/selftests/mqueue: Fix mq_perf_tests to free the allocated cpu set
eb8873b324 sctp: Initialize daddr on peeled off socket
45226fac4d scsi: iscsi: Fix conn cleanup and stop race during iscsid restart
73805795c9 scsi: iscsi: Fix offload conn cleanup when iscsid restarts
699bd835c3 scsi: iscsi: Move iscsi_ep_disconnect()
46f37a34a5 scsi: iscsi: Fix in-kernel conn failure handling
8125738967 scsi: iscsi: Rel ref after iscsi_lookup_endpoint()
22608545b8 scsi: iscsi: Use system_unbound_wq for destroy_work
4029a1e992 scsi: iscsi: Force immediate failure during shutdown
17d14456f6 scsi: iscsi: Stop queueing during ep_disconnect
da9cf24aa7 scsi: pm80xx: Enable upper inbound, outbound queues
e08d269712 scsi: pm80xx: Mask and unmask upper interrupt vectors 32-63
35b91e49bc net/smc: Fix NULL pointer dereference in smc_pnet_find_ib()
98a7f6c4ad drm/msm/dsi: Use connector directly in msm_dsi_manager_connector_init()
5f78ad9383 drm/msm: Fix range size vs end confusion
5513f9a0b0 cfg80211: hold bss_lock while updating nontrans_list
a44938950e net/sched: taprio: Check if socket flags are valid
08d5e3e954 net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link
2ad9d890d8 net: dsa: felix: suppress -EPROBE_DEFER errors
f2cc341fcc net/sched: fix initialization order when updating chain 0 head
7a7cf84148 mlxsw: i2c: Fix initialization error flow
43e58e119a net: mdio: Alphabetically sort header inclusion
9709c8b5cd gpiolib: acpi: use correct format characters
d67c900f19 veth: Ensure eth header is in skb's linear part
845f44ce3d net/sched: flower: fix parsing of ethertype following VLAN header
85ee17ca21 SUNRPC: Fix the svc_deferred_event trace class
af12dd7123 media: rockchip/rga: do proper error checking in probe
5637129712 firmware: arm_scmi: Fix sorting of retrieved clock rates
16c628b0c6 memory: atmel-ebi: Fix missing of_node_put in atmel_ebi_probe
cb66641f81 drm/msm: Add missing put_task_struct() in debugfs path
921fdc45a0 btrfs: remove unused variable in btrfs_{start,write}_dirty_block_groups()
5d131318bb ACPI: processor idle: Check for architectural support for LPI
503934df31 cpuidle: PSCI: Move the `has_lpi` check to the beginning of the function
cfa98ffc42 hamradio: remove needs_free_netdev to avoid UAF
80a4df1464 hamradio: defer 6pack kfree after unregister_netdev
f0c31f192f drm/amdkfd: Use drm_priv to pass VM from KFD to amdgpu
6c8e5cb264 Linux 5.10.111
d36febbcd5 powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit
5c672073bc mm/sparsemem: fix 'mem_section' will never be NULL gcc 12 warning
5973f7507a irqchip/gic, gic-v3: Prevent GSI to SGI translations
000e09462f Drivers: hv: vmbus: Replace smp_store_mb() with virt_store_mb()
e1f540b752 arm64: module: remove (NOLOAD) from linker script
919823bd67 selftests: cgroup: Test open-time cgroup namespace usage for migration checks
637eca44b8 selftests: cgroup: Test open-time credential usage for migration checks
9dd39d2c65 selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644
e74da71e66 selftests/cgroup: Fix build on older distros
4665722d36 cgroup: Use open-time credentials for process migraton perm checks
f089471d1b mm: don't skip swap entry even if zap_details specified
58823a9b09 ubsan: remove CONFIG_UBSAN_OBJECT_SIZE
03b39bbbec dmaengine: Revert "dmaengine: shdma: Fix runtime PM imbalance on error"
40e00885a6 tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts
75c8558d41 tools build: Filter out options and warnings not supported by clang
6374faf49e perf python: Fix probing for some clang command line options
79abc219ba perf build: Don't use -ffat-lto-objects in the python feature test when building with clang-13
82e4395014 drm/amdkfd: Create file descriptor after client is added to smi_clients list
326b408e7e drm/nouveau/pmu: Add missing callbacks for Tegra devices
786ae8de3a drm/amdgpu/smu10: fix SoC/fclk units in auto mode
ff24114bb0 irqchip/gic-v3: Fix GICR_CTLR.RWP polling
451214b266 perf: qcom_l2_pmu: fix an incorrect NULL check on list iterator
fc629224aa ata: sata_dwc_460ex: Fix crash due to OOB write
7e88a50704 gpio: Restrict usage of GPIO chip irq members before initialization
5f54364ff6 RDMA/hfi1: Fix use-after-free bug for mm struct
8bb4168291 arm64: patch_text: Fixup last cpu should be master
a044bca8ef btrfs: prevent subvol with swapfile from being deleted
82ae73ac96 btrfs: fix qgroup reserve overflow the qgroup limit
fc4bdaed4d x86/speculation: Restore speculation related MSRs during S3 resume
8c9e26c890 x86/pm: Save the MSR validity status at context setup
2827328e64 io_uring: fix race between timeout flush and removal
f7e183b0a7 mm/mempolicy: fix mpol_new leak in shared_policy_replace
7d659cb176 mmmremap.c: avoid pointless invalidate_range_start/end on mremap(old_size=0)
6adc01a7aa lz4: fix LZ4_decompress_safe_partial read out of bound
8b6f04b4c9 mmc: renesas_sdhi: don't overwrite TAP settings when HS400 tuning is complete
029b417073 mmc: mmci: stm32: correctly check all elements of sg list
41a519c05b Revert "mmc: sdhci-xenon: fix annoying 1.8V regulator warning"
9de98470db arm64: Add part number for Arm Cortex-A78AE
4604b5738d perf session: Remap buf if there is no space for event
362ced3769 perf tools: Fix perf's libperf_print callback
65210fac63 perf: arm-spe: Fix perf report --mem-mode
bd905fed87 iommu/omap: Fix regression in probe for NULL pointer dereference
b3c00be2ff SUNRPC: svc_tcp_sendmsg() should handle errors from xdr_alloc_bvec()
9a45e08636 SUNRPC: Handle low memory situations in call_status()
132cbe2f18 SUNRPC: Handle ENOMEM in call_transmit_status()
aed30a2054 io_uring: don't touch scm_fp_list after queueing skb
594205b493 drbd: Fix five use after free bugs in get_initial_state
970a6bb729 bpf: Support dual-stack sockets in bpf_tcp_check_syncookie
6c17f4ef3c spi: bcm-qspi: fix MSPI only access with bcm_qspi_exec_mem_op()
8928239e5e qede: confirm skb is allocated before using
b7893388bb net: phy: mscc-miim: reject clause 45 register accesses
08ff0e74fa rxrpc: fix a race in rxrpc_exit_net()
5ae05b5eb5 net: openvswitch: fix leak of nested actions
42ab401d22 net: openvswitch: don't send internal clone attribute to the userspace.
e54ea8fc51 ice: synchronize_rcu() when terminating rings
e3dd1202ab ipv6: Fix stats accounting in ip6_pkt_drop
ffce126c95 ice: Do not skip not enabled queues in ice_vc_dis_qs_msg
b003fc4913 ice: Set txq_teid to ICE_INVAL_TEID on ring creation
ebd1e3458d dpaa2-ptp: Fix refcount leak in dpaa2_ptp_probe
43c2d7890e IB/rdmavt: add lock to call to rvt_error_qp to prevent a race condition
3a57babfb6 RDMA/mlx5: Don't remove cache MRs when a delay is needed
d8992b393f sfc: Do not free an empty page_ring
0ac74169eb bnxt_en: reserve space inside receive page for skb_shared_info
f8b0ef0a58 drm/imx: Fix memory leak in imx_pd_connector_get_modes
25bc9fd4c8 drm/imx: imx-ldb: Check for null pointer after calling kmemdup
02ab4abe5b net: stmmac: Fix unset max_speed difference between DT and non-DT platforms
63ea57478a net: ipv4: fix route with nexthop object delete warning
4be6ed0310 ice: Clear default forwarding VSI during VSI release
589154d0f1 net/tls: fix slab-out-of-bounds bug in decrypt_internal
c5f77b5953 scsi: zorro7xx: Fix a resource leak in zorro7xx_remove_one()
45b9932b4d NFSv4: fix open failure with O_ACCMODE flag
c688705a39 Revert "NFSv4: Handle the special Linux file open access mode"
cf580d2e38 Drivers: hv: vmbus: Fix potential crash on module unload
0c122eb3a1 drm/amdgpu: fix off by one in amdgpu_gfx_kiq_acquire()
84e5dfc05f Revert "hv: utils: add PTP_1588_CLOCK to Kconfig to fix build"
3c3fbfa6dd mm: fix race between MADV_FREE reclaim and blkdev direct IO read
1753a49e26 parisc: Fix patch code locking and flushing
f7c3522030 parisc: Fix CPU affinity for Lasi, WAX and Dino chips
c74e2f6ecc NFS: Avoid writeback threads getting stuck in mempool_alloc()
34681aeddc NFS: nfsiod should not block forever in mempool_alloc()
7a506fabcf SUNRPC: Fix socket waits for write buffer space
b9c5ac0a15 jfs: prevent NULL deref in diFree
c69b442125 virtio_console: eliminate anonymous module_init & module_exit
3309b32217 serial: samsung_tty: do not unlock port->lock for uart_write_wakeup()
9cb90f9ad5 x86/Kconfig: Do not allow CONFIG_X86_X32_ABI=y with llvm-objcopy
b3882e78aa NFS: swap-out must always use STABLE writes.
d4170a2821 NFS: swap IO handling is slightly different for O_DIRECT IO
4b6f122bdf SUNRPC: remove scheduling boost for "SWAPPER" tasks.
f4fc47e71e SUNRPC/xprt: async tasks mustn't block waiting for memory
f9244d31e0 SUNRPC/call_alloc: async tasks mustn't block waiting for memory
e2b2542f74 clk: Enforce that disjoints limits are invalid
1e9b5538cf clk: ti: Preserve node in ti_dt_clocks_register()
a2a0e04f64 xen: delay xen_hvm_init_time_ops() if kdump is boot on vcpu>=32
4a2544ce24 NFSv4: Protect the state recovery thread against direct reclaim
9b9feec97c NFSv4.2: fix reference count leaks in _nfs42_proc_copy_notify()
2e16895d06 w1: w1_therm: fixes w1_seq for ds28ea00 sensors
93498c6e77 staging: wfx: fix an error handling in wfx_init_common()
8f1d24f85f phy: amlogic: meson8b-usb2: Use dev_err_probe()
aa0b729678 staging: vchiq_core: handle NULL result of find_service_by_handle
be4ecca958 clk: si5341: fix reported clk_rate when output divider is 2
c9cf6baabf minix: fix bug when opening a file with O_DIRECT
8d9efd4434 init/main.c: return 1 from handled __setup() functions
f442978612 ceph: fix memory leak in ceph_readdir when note_last_dentry returns error
d745512d54 netlabel: fix out-of-bounds memory accesses
2cc803804e Bluetooth: Fix use after free in hci_send_acl
789621df19 MIPS: ingenic: correct unit node address
61e25021e6 xtensa: fix DTC warning unit_address_format
f6b9550f53 usb: dwc3: omap: fix "unbalanced disables for smps10_out1" on omap5evm
a4dd3e9e5a net: sfp: add 2500base-X quirk for Lantech SFP module
278b652f0a net: limit altnames to 64k total
423e7107f6 net: account alternate interface name memory
74c4d50255 can: isotp: set default value for N_As to 50 micro seconds
1d7effe5ff scsi: libfc: Fix use after free in fc_exch_abts_resp()
02222bf4f0 powerpc/secvar: fix refcount leak in format_show()
fd416c3f5a MIPS: fix fortify panic when copying asm exception handlers
7c657c0694 PCI: endpoint: Fix misused goto label
79cfc0052f bnxt_en: Eliminate unintended link toggle during FW reset
9567d54e70 Bluetooth: use memset avoid memory leaks
f9b183f133 Bluetooth: Fix not checking for valid hdev on bt_dev_{info,warn,err,dbg}
647b35aaf4 tuntap: add sanity checks about msg_controllen in sendmsg
797b4ea951 macvtap: advertise link netns via netlink
142ae7d4f2 mips: ralink: fix a refcount leak in ill_acc_of_setup()
f2565cb40e net/smc: correct settings of RMB window update limit
224903cc60 scsi: hisi_sas: Free irq vectors in order for v3 HW
f49ffaa85d scsi: aha152x: Fix aha152x_setup() __setup handler return value
91ee8a14ef mt76: mt7615: Fix assigning negative values to unsigned variable
d83574666b scsi: pm8001: Fix memory leak in pm8001_chip_fw_flash_update_req()
a0bb65eadb scsi: pm8001: Fix tag leaks on error
2051044d79 scsi: pm8001: Fix task leak in pm8001_send_abort_all()
3bd9a28798 scsi: pm8001: Fix pm8001_mpi_task_abort_resp()
ef969095c4 scsi: pm8001: Fix pm80xx_pci_mem_copy() interface
fe4b6d5a0d drm/amdkfd: make CRAT table missing message informational only
2f2f017ea8 dm: requeue IO if mapping table not yet available
71c8df33fd dm ioctl: prevent potential spectre v1 gadget
f655b724b4 ipv4: Invalidate neighbour for broadcast address upon address addition
bae03957e8 iwlwifi: mvm: Correctly set fragmented EBS
9538563d31 power: supply: axp288-charger: Set Vhold to 4.4V
c66cc04043 PCI: pciehp: Add Qualcomm quirk for Command Completed erratum
b1b27b0e8d tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.
b02a1a6502 PCI: endpoint: Fix alignment fault error in copy tests
4820847e8b usb: ehci: add pci device support for Aspeed platforms
0b9cf0b599 iommu/arm-smmu-v3: fix event handling soft lockup
e07e420a00 PCI: aardvark: Fix support for MSI interrupts
6694b8643b drm/amdgpu: Fix recursive locking warning
ea21eaea7f powerpc: Set crashkernel offset to mid of RMA region
fb5ac62fbe ipv6: make mc_forwarding atomic
5baf92a2c4 libbpf: Fix build issue with llvm-readelf
26a1e4739e cfg80211: don't add non transmitted BSS to 6GHz scanned channels
9a56e2b271 mt76: dma: initialize skip_unmap in mt76_dma_rx_fill
b42b6d0ec3 power: supply: axp20x_battery: properly report current when discharging
de9505936c scsi: bfa: Replace snprintf() with sysfs_emit()
ed7db95920 scsi: mvsas: Replace snprintf() with sysfs_emit()
995f517888 bpf: Make dst_port field in struct bpf_sock 16-bit wide
339bd0b55e ath11k: mhi: use mhi_sync_power_up()
c6a815f5ab ath11k: fix kernel panic during unload/load ath11k modules
e4d2d72013 powerpc: dts: t104xrdb: fix phy type for FMAN 4/5
02e2ee8619 ptp: replace snprintf with sysfs_emit
9ea17b9f1d usb: gadget: tegra-xudc: Fix control endpoint's definitions
07971b818e usb: gadget: tegra-xudc: Do not program SPARAM
927beb05aa drm/amd/amdgpu/amdgpu_cs: fix refcount leak of a dma_fence obj
85313d9bc7 drm/amd/display: Add signal type check when verify stream backends same
9d7d83d039 ath5k: fix OOB in ath5k_eeprom_read_pcal_info_5111
850c4351e8 drm: Add orientation quirk for GPD Win Max
a24479c5e9 KVM: x86/emulator: Emulate RDPID only if it is enabled in guest
66b0fa6b22 KVM: x86/svm: Clear reserved bits written to PerfEvtSeln MSRs
2e52a29470 rtc: wm8350: Handle error for wm8350_register_irq
0777fe98a4 gfs2: gfs2_setattr_size error path fix
f349d7f9ee gfs2: Fix gfs2_release for non-writers regression
3f53715fd5 gfs2: Check for active reservation in gfs2_release
2dc49f58a2 ubifs: Rectify space amount budget for mkdir/tmpfile operations

Update the .xml file with the following needed changes that came in from
the -lts branch to handle ABI issues with LTS security fixes:

Leaf changes summary: 3 artifacts changed
Changed leaf types summary: 2 leaf types changed
Removed/Changed/Added functions summary: 0 Removed, 1 Changed, 0 Added function
Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable

1 function with some sub-type change:

  [C] 'function int hex_to_bin(char)' at hexdump.c:53:1 has some sub-type changes:
    parameter 1 of type 'char' changed:
      type name changed from 'char' to 'unsigned char'
      type size hasn't changed

'struct gpio_chip at driver.h:362:1' changed (indirectly):
  type size hasn't changed
  there are data member changes:
    type 'struct gpio_irq_chip' of 'gpio_chip::irq' changed:
      type size hasn't changed
      there are data member changes:
        data member u64 android_kabi_reserved1 at offset 2304 (in bits) became anonymous data member 'union {bool initialized; struct {u64 android_kabi_reserved1;}; union {};}'
      1265 impacted interfaces
  1265 impacted interfaces

'struct gpio_irq_chip at driver.h:32:1' changed:
  details were reported earlier

Change-Id: Iface7385c5d82fbcdaeb92fda79ac3cd1835d323
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2022-07-27 11:21:05 +02:00

2020 lines
56 KiB
C

/*
* mm/rmap.c - physical to virtual reverse mappings
*
* Copyright 2001, Rik van Riel <riel@conectiva.com.br>
* Released under the General Public License (GPL).
*
* Simple, low overhead reverse mapping scheme.
* Please try to keep this thing as modular as possible.
*
* Provides methods for unmapping each kind of mapped page:
* the anon methods track anonymous pages, and
* the file methods track pages belonging to an inode.
*
* Original design by Rik van Riel <riel@conectiva.com.br> 2001
* File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
* Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
* Contributions by Hugh Dickins 2003, 2004
*/
/*
* Lock ordering in mm:
*
* inode->i_mutex (while writing or truncating, not reading or faulting)
* mm->mmap_lock
* page->flags PG_locked (lock_page) * (see huegtlbfs below)
* hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
* mapping->i_mmap_rwsem
* hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
* anon_vma->rwsem
* mm->page_table_lock or pte_lock
* pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
* swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others)
* mapping->private_lock (in __set_page_dirty_buffers)
* mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
* i_pages lock (widely used)
* inode->i_lock (in set_page_dirty's __mark_inode_dirty)
* bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
* sb_lock (within inode_lock in fs/fs-writeback.c)
* i_pages lock (widely used, in set_page_dirty,
* in arch-dependent flush_dcache_mmap_lock,
* within bdi.wb->list_lock in __sync_single_inode)
*
* anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
* ->tasklist_lock
* pte map lock
*
* * hugetlbfs PageHuge() pages take locks in this order:
* mapping->i_mmap_rwsem
* hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
* page->flags PG_locked (lock_page)
*/
#include <linux/mm.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
#include <linux/memcontrol.h>
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
#include <linux/backing-dev.h>
#include <linux/page_idle.h>
#include <linux/memremap.h>
#include <linux/userfaultfd_k.h>
#include <asm/tlbflush.h>
#include <trace/events/tlb.h>
#include <trace/hooks/mm.h>
#include "internal.h"
static struct kmem_cache *anon_vma_cachep;
static struct kmem_cache *anon_vma_chain_cachep;
static inline struct anon_vma *anon_vma_alloc(void)
{
struct anon_vma *anon_vma;
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
if (anon_vma) {
atomic_set(&anon_vma->refcount, 1);
anon_vma->degree = 1; /* Reference for first vma */
anon_vma->parent = anon_vma;
/*
* Initialise the anon_vma root to point to itself. If called
* from fork, the root will be reset to the parents anon_vma.
*/
anon_vma->root = anon_vma;
}
return anon_vma;
}
static inline void anon_vma_free(struct anon_vma *anon_vma)
{
VM_BUG_ON(atomic_read(&anon_vma->refcount));
/*
* Synchronize against page_lock_anon_vma_read() such that
* we can safely hold the lock without the anon_vma getting
* freed.
*
* Relies on the full mb implied by the atomic_dec_and_test() from
* put_anon_vma() against the acquire barrier implied by
* down_read_trylock() from page_lock_anon_vma_read(). This orders:
*
* page_lock_anon_vma_read() VS put_anon_vma()
* down_read_trylock() atomic_dec_and_test()
* LOCK MB
* atomic_read() rwsem_is_locked()
*
* LOCK should suffice since the actual taking of the lock must
* happen _before_ what follows.
*/
might_sleep();
if (rwsem_is_locked(&anon_vma->root->rwsem)) {
anon_vma_lock_write(anon_vma);
anon_vma_unlock_write(anon_vma);
}
kmem_cache_free(anon_vma_cachep, anon_vma);
}
static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
{
return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
}
static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
{
kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
}
static void anon_vma_chain_link(struct vm_area_struct *vma,
struct anon_vma_chain *avc,
struct anon_vma *anon_vma)
{
avc->vma = vma;
avc->anon_vma = anon_vma;
list_add(&avc->same_vma, &vma->anon_vma_chain);
anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
}
/**
* __anon_vma_prepare - attach an anon_vma to a memory region
* @vma: the memory region in question
*
* This makes sure the memory mapping described by 'vma' has
* an 'anon_vma' attached to it, so that we can associate the
* anonymous pages mapped into it with that anon_vma.
*
* The common case will be that we already have one, which
* is handled inline by anon_vma_prepare(). But if
* not we either need to find an adjacent mapping that we
* can re-use the anon_vma from (very common when the only
* reason for splitting a vma has been mprotect()), or we
* allocate a new one.
*
* Anon-vma allocations are very subtle, because we may have
* optimistically looked up an anon_vma in page_lock_anon_vma_read()
* and that may actually touch the spinlock even in the newly
* allocated vma (it depends on RCU to make sure that the
* anon_vma isn't actually destroyed).
*
* As a result, we need to do proper anon_vma locking even
* for the new allocation. At the same time, we do not want
* to do any locking for the common case of already having
* an anon_vma.
*
* This must be called with the mmap_lock held for reading.
*/
int __anon_vma_prepare(struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
struct anon_vma *anon_vma, *allocated;
struct anon_vma_chain *avc;
might_sleep();
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto out_enomem;
anon_vma = find_mergeable_anon_vma(vma);
allocated = NULL;
if (!anon_vma) {
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
allocated = anon_vma;
}
anon_vma_lock_write(anon_vma);
/* page_table_lock to protect against threads */
spin_lock(&mm->page_table_lock);
if (likely(!vma->anon_vma)) {
vma->anon_vma = anon_vma;
anon_vma_chain_link(vma, avc, anon_vma);
/* vma reference or self-parent link for new root */
anon_vma->degree++;
allocated = NULL;
avc = NULL;
}
spin_unlock(&mm->page_table_lock);
anon_vma_unlock_write(anon_vma);
if (unlikely(allocated))
put_anon_vma(allocated);
if (unlikely(avc))
anon_vma_chain_free(avc);
return 0;
out_enomem_free_avc:
anon_vma_chain_free(avc);
out_enomem:
return -ENOMEM;
}
/*
* This is a useful helper function for locking the anon_vma root as
* we traverse the vma->anon_vma_chain, looping over anon_vma's that
* have the same vma.
*
* Such anon_vma's should have the same root, so you'd expect to see
* just a single mutex_lock for the whole traversal.
*/
static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
{
struct anon_vma *new_root = anon_vma->root;
if (new_root != root) {
if (WARN_ON_ONCE(root))
up_write(&root->rwsem);
root = new_root;
down_write(&root->rwsem);
}
return root;
}
static inline void unlock_anon_vma_root(struct anon_vma *root)
{
if (root)
up_write(&root->rwsem);
}
/*
* Attach the anon_vmas from src to dst.
* Returns 0 on success, -ENOMEM on failure.
*
* anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
* anon_vma_fork(). The first three want an exact copy of src, while the last
* one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
* endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
* we can identify this case by checking (!dst->anon_vma && src->anon_vma).
*
* If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
* and reuse existing anon_vma which has no vmas and only one child anon_vma.
* This prevents degradation of anon_vma hierarchy to endless linear chain in
* case of constantly forking task. On the other hand, an anon_vma with more
* than one child isn't reused even if there was no alive vma, thus rmap
* walker has a good chance of avoiding scanning the whole hierarchy when it
* searches where page is mapped.
*/
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
struct anon_vma_chain *avc, *pavc;
struct anon_vma *root = NULL;
list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma;
avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(!avc)) {
unlock_anon_vma_root(root);
root = NULL;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto enomem_failure;
}
anon_vma = pavc->anon_vma;
root = lock_anon_vma_root(root, anon_vma);
anon_vma_chain_link(dst, avc, anon_vma);
/*
* Reuse existing anon_vma if its degree lower than two,
* that means it has no vma and only one anon_vma child.
*
* Do not chose parent anon_vma, otherwise first child
* will always reuse it. Root anon_vma is never reused:
* it has self-parent reference and at least one child.
*/
if (!dst->anon_vma && src->anon_vma &&
anon_vma != src->anon_vma && anon_vma->degree < 2)
dst->anon_vma = anon_vma;
}
if (dst->anon_vma)
dst->anon_vma->degree++;
unlock_anon_vma_root(root);
return 0;
enomem_failure:
/*
* dst->anon_vma is dropped here otherwise its degree can be incorrectly
* decremented in unlink_anon_vmas().
* We can safely do this because callers of anon_vma_clone() don't care
* about dst->anon_vma if anon_vma_clone() failed.
*/
dst->anon_vma = NULL;
unlink_anon_vmas(dst);
return -ENOMEM;
}
/*
* Attach vma to its own anon_vma, as well as to the anon_vmas that
* the corresponding VMA in the parent process is attached to.
* Returns 0 on success, non-zero on failure.
*/
int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
{
struct anon_vma_chain *avc;
struct anon_vma *anon_vma;
int error;
/* Don't bother if the parent process has no anon_vma here. */
if (!pvma->anon_vma)
return 0;
/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
vma->anon_vma = NULL;
/*
* First, attach the new VMA to the parent VMA's anon_vmas,
* so rmap can find non-COWed pages in child processes.
*/
error = anon_vma_clone(vma, pvma);
if (error)
return error;
/* An existing anon_vma has been reused, all done then. */
if (vma->anon_vma)
return 0;
/* Then add our own anon_vma. */
anon_vma = anon_vma_alloc();
if (!anon_vma)
goto out_error;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto out_error_free_anon_vma;
/*
* The root anon_vma's spinlock is the lock actually used when we
* lock any of the anon_vmas in this anon_vma tree.
*/
anon_vma->root = pvma->anon_vma->root;
anon_vma->parent = pvma->anon_vma;
/*
* With refcounts, an anon_vma can stay around longer than the
* process it belongs to. The root anon_vma needs to be pinned until
* this anon_vma is freed, because the lock lives in the root.
*/
get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
vma->anon_vma = anon_vma;
anon_vma_lock_write(anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
anon_vma->parent->degree++;
anon_vma_unlock_write(anon_vma);
return 0;
out_error_free_anon_vma:
put_anon_vma(anon_vma);
out_error:
unlink_anon_vmas(vma);
return -ENOMEM;
}
void unlink_anon_vmas(struct vm_area_struct *vma)
{
struct anon_vma_chain *avc, *next;
struct anon_vma *root = NULL;
/*
* Unlink each anon_vma chained to the VMA. This list is ordered
* from newest to oldest, ensuring the root anon_vma gets freed last.
*/
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma = avc->anon_vma;
root = lock_anon_vma_root(root, anon_vma);
anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
/*
* Leave empty anon_vmas on the list - we'll need
* to free them outside the lock.
*/
if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
anon_vma->parent->degree--;
continue;
}
list_del(&avc->same_vma);
anon_vma_chain_free(avc);
}
if (vma->anon_vma)
vma->anon_vma->degree--;
unlock_anon_vma_root(root);
/*
* Iterate the list once more, it now only contains empty and unlinked
* anon_vmas, destroy them. Could not do before due to __put_anon_vma()
* needing to write-acquire the anon_vma->root->rwsem.
*/
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma = avc->anon_vma;
VM_WARN_ON(anon_vma->degree);
put_anon_vma(anon_vma);
list_del(&avc->same_vma);
anon_vma_chain_free(avc);
}
}
static void anon_vma_ctor(void *data)
{
struct anon_vma *anon_vma = data;
init_rwsem(&anon_vma->rwsem);
atomic_set(&anon_vma->refcount, 0);
anon_vma->rb_root = RB_ROOT_CACHED;
}
void __init anon_vma_init(void)
{
anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
anon_vma_ctor);
anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
SLAB_PANIC|SLAB_ACCOUNT);
}
/*
* Getting a lock on a stable anon_vma from a page off the LRU is tricky!
*
* Since there is no serialization what so ever against page_remove_rmap()
* the best this function can do is return a locked anon_vma that might
* have been relevant to this page.
*
* The page might have been remapped to a different anon_vma or the anon_vma
* returned may already be freed (and even reused).
*
* In case it was remapped to a different anon_vma, the new anon_vma will be a
* child of the old anon_vma, and the anon_vma lifetime rules will therefore
* ensure that any anon_vma obtained from the page will still be valid for as
* long as we observe page_mapped() [ hence all those page_mapped() tests ].
*
* All users of this function must be very careful when walking the anon_vma
* chain and verify that the page in question is indeed mapped in it
* [ something equivalent to page_mapped_in_vma() ].
*
* Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
* page_remove_rmap() that the anon_vma pointer from page->mapping is valid
* if there is a mapcount, we can dereference the anon_vma after observing
* those.
*/
struct anon_vma *page_get_anon_vma(struct page *page)
{
struct anon_vma *anon_vma = NULL;
unsigned long anon_mapping;
rcu_read_lock();
anon_mapping = (unsigned long)READ_ONCE(page->mapping);
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
goto out;
if (!page_mapped(page))
goto out;
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
if (!atomic_inc_not_zero(&anon_vma->refcount)) {
anon_vma = NULL;
goto out;
}
/*
* If this page is still mapped, then its anon_vma cannot have been
* freed. But if it has been unmapped, we have no security against the
* anon_vma structure being freed and reused (for another anon_vma:
* SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
* above cannot corrupt).
*/
if (!page_mapped(page)) {
rcu_read_unlock();
put_anon_vma(anon_vma);
return NULL;
}
out:
rcu_read_unlock();
return anon_vma;
}
/*
* Similar to page_get_anon_vma() except it locks the anon_vma.
*
* Its a little more complex as it tries to keep the fast path to a single
* atomic op -- the trylock. If we fail the trylock, we fall back to getting a
* reference like with page_get_anon_vma() and then block on the mutex.
*/
struct anon_vma *page_lock_anon_vma_read(struct page *page)
{
struct anon_vma *anon_vma = NULL;
struct anon_vma *root_anon_vma;
unsigned long anon_mapping;
rcu_read_lock();
anon_mapping = (unsigned long)READ_ONCE(page->mapping);
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
goto out;
if (!page_mapped(page))
goto out;
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
root_anon_vma = READ_ONCE(anon_vma->root);
if (down_read_trylock(&root_anon_vma->rwsem)) {
/*
* If the page is still mapped, then this anon_vma is still
* its anon_vma, and holding the mutex ensures that it will
* not go away, see anon_vma_free().
*/
if (!page_mapped(page)) {
up_read(&root_anon_vma->rwsem);
anon_vma = NULL;
}
goto out;
}
/* trylock failed, we got to sleep */
if (!atomic_inc_not_zero(&anon_vma->refcount)) {
anon_vma = NULL;
goto out;
}
if (!page_mapped(page)) {
rcu_read_unlock();
put_anon_vma(anon_vma);
return NULL;
}
/* we pinned the anon_vma, its safe to sleep */
rcu_read_unlock();
anon_vma_lock_read(anon_vma);
if (atomic_dec_and_test(&anon_vma->refcount)) {
/*
* Oops, we held the last refcount, release the lock
* and bail -- can't simply use put_anon_vma() because
* we'll deadlock on the anon_vma_lock_write() recursion.
*/
anon_vma_unlock_read(anon_vma);
__put_anon_vma(anon_vma);
anon_vma = NULL;
}
return anon_vma;
out:
rcu_read_unlock();
return anon_vma;
}
void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
{
anon_vma_unlock_read(anon_vma);
}
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
/*
* Flush TLB entries for recently unmapped pages from remote CPUs. It is
* important if a PTE was dirty when it was unmapped that it's flushed
* before any IO is initiated on the page to prevent lost writes. Similarly,
* it must be flushed before freeing to prevent data leakage.
*/
void try_to_unmap_flush(void)
{
struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
if (!tlb_ubc->flush_required)
return;
arch_tlbbatch_flush(&tlb_ubc->arch);
tlb_ubc->flush_required = false;
tlb_ubc->writable = false;
}
/* Flush iff there are potentially writable TLB entries that can race with IO */
void try_to_unmap_flush_dirty(void)
{
struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
if (tlb_ubc->writable)
try_to_unmap_flush();
}
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
{
struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
tlb_ubc->flush_required = true;
/*
* Ensure compiler does not re-order the setting of tlb_flush_batched
* before the PTE is cleared.
*/
barrier();
mm->tlb_flush_batched = true;
/*
* If the PTE was dirty then it's best to assume it's writable. The
* caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
* before the page is queued for IO.
*/
if (writable)
tlb_ubc->writable = true;
}
/*
* Returns true if the TLB flush should be deferred to the end of a batch of
* unmap operations to reduce IPIs.
*/
static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
{
bool should_defer = false;
if (!(flags & TTU_BATCH_FLUSH))
return false;
/* If remote CPUs need to be flushed then defer batch the flush */
if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
should_defer = true;
put_cpu();
return should_defer;
}
/*
* Reclaim unmaps pages under the PTL but do not flush the TLB prior to
* releasing the PTL if TLB flushes are batched. It's possible for a parallel
* operation such as mprotect or munmap to race between reclaim unmapping
* the page and flushing the page. If this race occurs, it potentially allows
* access to data via a stale TLB entry. Tracking all mm's that have TLB
* batching in flight would be expensive during reclaim so instead track
* whether TLB batching occurred in the past and if so then do a flush here
* if required. This will cost one additional flush per reclaim cycle paid
* by the first operation at risk such as mprotect and mumap.
*
* This must be called under the PTL so that an access to tlb_flush_batched
* that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
* via the PTL.
*/
void flush_tlb_batched_pending(struct mm_struct *mm)
{
if (data_race(mm->tlb_flush_batched)) {
flush_tlb_mm(mm);
/*
* Do not allow the compiler to re-order the clearing of
* tlb_flush_batched before the tlb is flushed.
*/
barrier();
mm->tlb_flush_batched = false;
}
}
#else
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
{
}
static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
{
return false;
}
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
/*
* At what user virtual address is page expected in vma?
* Caller should check the page is actually part of the vma.
*/
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
if (PageAnon(page)) {
struct anon_vma *page__anon_vma = page_anon_vma(page);
/*
* Note: swapoff's unuse_vma() is more efficient with this
* check, and needs it to match anon_vma when KSM is active.
*/
if (!vma->anon_vma || !page__anon_vma ||
vma->anon_vma->root != page__anon_vma->root)
return -EFAULT;
} else if (!vma->vm_file) {
return -EFAULT;
} else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
return -EFAULT;
}
return vma_address(page, vma);
}
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd = NULL;
pmd_t pmde;
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
goto out;
p4d = p4d_offset(pgd, address);
if (!p4d_present(*p4d))
goto out;
pud = pud_offset(p4d, address);
if (!pud_present(*pud))
goto out;
pmd = pmd_offset(pud, address);
/*
* Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
* without holding anon_vma lock for write. So when looking for a
* genuine pmde (in which to find pte), test present and !THP together.
*/
pmde = *pmd;
barrier();
if (!pmd_present(pmde) || pmd_trans_huge(pmde))
pmd = NULL;
out:
return pmd;
}
struct page_referenced_arg {
int mapcount;
int referenced;
unsigned long vm_flags;
struct mem_cgroup *memcg;
};
/*
* arg: page_referenced_arg will be passed
*/
static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct page_referenced_arg *pra = arg;
struct page_vma_mapped_walk pvmw = {
.page = page,
.vma = vma,
.address = address,
};
int referenced = 0;
while (page_vma_mapped_walk(&pvmw)) {
address = pvmw.address;
if (vma->vm_flags & VM_LOCKED) {
page_vma_mapped_walk_done(&pvmw);
pra->vm_flags |= VM_LOCKED;
return false; /* To break the loop */
}
if (pvmw.pte) {
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte)) {
/*
* Don't treat a reference through
* a sequentially read mapping as such.
* If the page has been used in another mapping,
* we will catch it; if this other mapping is
* already gone, the unmap path will have set
* PG_referenced or activated the page.
*/
if (likely(!(vma->vm_flags & VM_SEQ_READ)))
referenced++;
}
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
if (pmdp_clear_flush_young_notify(vma, address,
pvmw.pmd))
referenced++;
} else {
/* unexpected pmd-mapped page? */
WARN_ON_ONCE(1);
}
pra->mapcount--;
}
if (referenced)
clear_page_idle(page);
if (test_and_clear_page_young(page))
referenced++;
if (referenced) {
pra->referenced++;
pra->vm_flags |= vma->vm_flags;
}
trace_android_vh_page_referenced_one_end(vma, page, referenced);
if (!pra->mapcount)
return false; /* To break the loop */
return true;
}
static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
{
struct page_referenced_arg *pra = arg;
struct mem_cgroup *memcg = pra->memcg;
if (!mm_match_cgroup(vma->vm_mm, memcg))
return true;
return false;
}
/**
* page_referenced - test if the page was referenced
* @page: the page to test
* @is_locked: caller holds lock on the page
* @memcg: target memory cgroup
* @vm_flags: collect encountered vma->vm_flags who actually referenced the page
*
* Quick test_and_clear_referenced for all mappings to a page,
* returns the number of ptes which referenced the page.
*/
int page_referenced(struct page *page,
int is_locked,
struct mem_cgroup *memcg,
unsigned long *vm_flags)
{
int we_locked = 0;
struct page_referenced_arg pra = {
.mapcount = total_mapcount(page),
.memcg = memcg,
};
struct rmap_walk_control rwc = {
.rmap_one = page_referenced_one,
.arg = (void *)&pra,
.anon_lock = page_lock_anon_vma_read,
};
*vm_flags = 0;
if (!pra.mapcount)
return 0;
if (!page_rmapping(page))
return 0;
if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
we_locked = trylock_page(page);
if (!we_locked)
return 1;
}
/*
* If we are reclaiming on behalf of a cgroup, skip
* counting on behalf of references from different
* cgroups
*/
if (memcg) {
rwc.invalid_vma = invalid_page_referenced_vma;
}
rmap_walk(page, &rwc);
*vm_flags = pra.vm_flags;
if (we_locked)
unlock_page(page);
return pra.referenced;
}
static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct page_vma_mapped_walk pvmw = {
.page = page,
.vma = vma,
.address = address,
.flags = PVMW_SYNC,
};
struct mmu_notifier_range range;
int *cleaned = arg;
/*
* We have to assume the worse case ie pmd for invalidation. Note that
* the page can not be free from this function.
*/
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
0, vma, vma->vm_mm, address,
vma_address_end(page, vma));
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
int ret = 0;
address = pvmw.address;
if (pvmw.pte) {
pte_t entry;
pte_t *pte = pvmw.pte;
if (!pte_dirty(*pte) && !pte_write(*pte))
continue;
flush_cache_page(vma, address, pte_pfn(*pte));
entry = ptep_clear_flush(vma, address, pte);
entry = pte_wrprotect(entry);
entry = pte_mkclean(entry);
set_pte_at(vma->vm_mm, address, pte, entry);
ret = 1;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
pmd_t *pmd = pvmw.pmd;
pmd_t entry;
if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
continue;
flush_cache_page(vma, address, page_to_pfn(page));
entry = pmdp_invalidate(vma, address, pmd);
entry = pmd_wrprotect(entry);
entry = pmd_mkclean(entry);
set_pmd_at(vma->vm_mm, address, pmd, entry);
ret = 1;
#else
/* unexpected pmd-mapped page? */
WARN_ON_ONCE(1);
#endif
}
/*
* No need to call mmu_notifier_invalidate_range() as we are
* downgrading page table protection not changing it to point
* to a new page.
*
* See Documentation/vm/mmu_notifier.rst
*/
if (ret)
(*cleaned)++;
}
mmu_notifier_invalidate_range_end(&range);
return true;
}
static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
{
if (vma->vm_flags & VM_SHARED)
return false;
return true;
}
int page_mkclean(struct page *page)
{
int cleaned = 0;
struct address_space *mapping;
struct rmap_walk_control rwc = {
.arg = (void *)&cleaned,
.rmap_one = page_mkclean_one,
.invalid_vma = invalid_mkclean_vma,
};
BUG_ON(!PageLocked(page));
if (!page_mapped(page))
return 0;
mapping = page_mapping(page);
if (!mapping)
return 0;
rmap_walk(page, &rwc);
return cleaned;
}
EXPORT_SYMBOL_GPL(page_mkclean);
/**
* page_move_anon_rmap - move a page to our anon_vma
* @page: the page to move to our anon_vma
* @vma: the vma the page belongs to
*
* When a page belongs exclusively to one process after a COW event,
* that page can be moved into the anon_vma that belongs to just that
* process, so the rmap code will not search the parent or sibling
* processes.
*/
void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
page = compound_head(page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_VMA(!anon_vma, vma);
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
/*
* Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
* simultaneously, so a concurrent reader (eg page_referenced()'s
* PageAnon()) will not see one without the other.
*/
WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
}
/**
* __page_set_anon_rmap - set up new anonymous rmap
* @page: Page or Hugepage to add to rmap
* @vma: VM area to add page to.
* @address: User virtual address of the mapping
* @exclusive: the page is exclusively owned by the current process
*/
static void __page_set_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address, int exclusive)
{
struct anon_vma *anon_vma = vma->anon_vma;
BUG_ON(!anon_vma);
if (PageAnon(page))
return;
/*
* If the page isn't exclusively mapped into this vma,
* we must use the _oldest_ possible anon_vma for the
* page mapping!
*/
if (!exclusive)
anon_vma = anon_vma->root;
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
page->mapping = (struct address_space *) anon_vma;
page->index = linear_page_index(vma, address);
}
/**
* __page_check_anon_rmap - sanity check anonymous rmap addition
* @page: the page to add the mapping to
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
*/
static void __page_check_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
/*
* The page's anon-rmap details (mapping and index) are guaranteed to
* be set up correctly at this point.
*
* We have exclusion against page_add_anon_rmap because the caller
* always holds the page locked, except if called from page_dup_rmap,
* in which case the page is already known to be setup.
*
* We have exclusion against page_add_new_anon_rmap because those pages
* are initially only visible via the pagetables, and the pte is locked
* over the call to page_add_new_anon_rmap.
*/
VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
page);
}
/**
* page_add_anon_rmap - add pte mapping to an anonymous page
* @page: the page to add the mapping to
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
* @compound: charge the page as compound or small page
*
* The caller needs to hold the pte lock, and the page must be locked in
* the anon_vma case: to serialize mapping,index checking after setting,
* and to ensure that PageAnon is not being upgraded racily to PageKsm
* (but PageKsm is never downgraded to PageAnon).
*/
void page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address, bool compound)
{
do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
}
/*
* Special version of the above for do_swap_page, which often runs
* into pages that are exclusively owned by the current process.
* Everybody else should continue to use page_add_anon_rmap above.
*/
void do_page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address, int flags)
{
bool compound = flags & RMAP_COMPOUND;
bool first;
if (unlikely(PageKsm(page)))
lock_page_memcg(page);
else
VM_BUG_ON_PAGE(!PageLocked(page), page);
if (compound) {
atomic_t *mapcount;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
mapcount = compound_mapcount_ptr(page);
first = atomic_inc_and_test(mapcount);
} else {
first = atomic_inc_and_test(&page->_mapcount);
}
if (first) {
int nr = compound ? thp_nr_pages(page) : 1;
/*
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption
* disabled.
*/
if (compound)
__inc_lruvec_page_state(page, NR_ANON_THPS);
__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
}
if (unlikely(PageKsm(page))) {
unlock_page_memcg(page);
return;
}
/* address might be in next vma when migration races vma_adjust */
if (first)
__page_set_anon_rmap(page, vma, address,
flags & RMAP_EXCLUSIVE);
else
__page_check_anon_rmap(page, vma, address);
}
/**
* __page_add_new_anon_rmap - add pte mapping to a new anonymous page
* @page: the page to add the mapping to
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
* @compound: charge the page as compound or small page
*
* Same as page_add_anon_rmap but must only be called on *new* pages.
* This means the inc-and-test can be bypassed.
* Page does not have to be locked.
*/
void __page_add_new_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address, bool compound)
{
int nr = compound ? thp_nr_pages(page) : 1;
__SetPageSwapBacked(page);
if (compound) {
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
/* increment count (starts at -1) */
atomic_set(compound_mapcount_ptr(page), 0);
if (hpage_pincount_available(page))
atomic_set(compound_pincount_ptr(page), 0);
__inc_lruvec_page_state(page, NR_ANON_THPS);
} else {
/* Anon THP always mapped first with PMD */
VM_BUG_ON_PAGE(PageTransCompound(page), page);
/* increment count (starts at -1) */
atomic_set(&page->_mapcount, 0);
}
__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
__page_set_anon_rmap(page, vma, address, 1);
}
/**
* page_add_file_rmap - add pte mapping to a file page
* @page: the page to add the mapping to
* @compound: charge the page as compound or small page
*
* The caller needs to hold the pte lock.
*/
void page_add_file_rmap(struct page *page, bool compound)
{
int i, nr = 1;
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
lock_page_memcg(page);
if (compound && PageTransHuge(page)) {
for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
if (atomic_inc_and_test(&page[i]._mapcount))
nr++;
}
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
goto out;
if (PageSwapBacked(page))
__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
else
__inc_node_page_state(page, NR_FILE_PMDMAPPED);
} else {
if (PageTransCompound(page) && page_mapping(page)) {
VM_WARN_ON_ONCE(!PageLocked(page));
SetPageDoubleMap(compound_head(page));
if (PageMlocked(page))
clear_page_mlock(compound_head(page));
}
if (!atomic_inc_and_test(&page->_mapcount))
goto out;
}
__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
out:
unlock_page_memcg(page);
}
static void page_remove_file_rmap(struct page *page, bool compound)
{
int i, nr = 1;
VM_BUG_ON_PAGE(compound && !PageHead(page), page);
/* Hugepages are not counted in NR_FILE_MAPPED for now. */
if (unlikely(PageHuge(page))) {
/* hugetlb pages are always mapped with pmds */
atomic_dec(compound_mapcount_ptr(page));
return;
}
/* page still mapped by someone else? */
if (compound && PageTransHuge(page)) {
for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
if (atomic_add_negative(-1, &page[i]._mapcount))
nr++;
}
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
return;
if (PageSwapBacked(page))
__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
else
__dec_node_page_state(page, NR_FILE_PMDMAPPED);
} else {
if (!atomic_add_negative(-1, &page->_mapcount))
return;
}
/*
* We use the irq-unsafe __{inc|mod}_lruvec_page_state because
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
}
static void page_remove_anon_compound_rmap(struct page *page)
{
int i, nr;
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
return;
/* Hugepages are not counted in NR_ANON_PAGES for now. */
if (unlikely(PageHuge(page)))
return;
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
return;
__dec_lruvec_page_state(page, NR_ANON_THPS);
if (TestClearPageDoubleMap(page)) {
/*
* Subpages can be mapped with PTEs too. Check how many of
* them are still mapped.
*/
for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
if (atomic_add_negative(-1, &page[i]._mapcount))
nr++;
}
/*
* Queue the page for deferred split if at least one small
* page of the compound page is unmapped, but at least one
* small page is still mapped.
*/
if (nr && nr < thp_nr_pages(page))
deferred_split_huge_page(page);
} else {
nr = thp_nr_pages(page);
}
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
if (nr)
__mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
}
/**
* page_remove_rmap - take down pte mapping from a page
* @page: page to remove mapping from
* @compound: uncharge the page as compound or small page
*
* The caller needs to hold the pte lock.
*/
void page_remove_rmap(struct page *page, bool compound)
{
lock_page_memcg(page);
if (!PageAnon(page)) {
page_remove_file_rmap(page, compound);
goto out;
}
if (compound) {
page_remove_anon_compound_rmap(page);
goto out;
}
/* page still mapped by someone else? */
if (!atomic_add_negative(-1, &page->_mapcount))
goto out;
/*
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
__dec_lruvec_page_state(page, NR_ANON_MAPPED);
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
if (PageTransCompound(page))
deferred_split_huge_page(compound_head(page));
/*
* It would be tidy to reset the PageAnon mapping here,
* but that might overwrite a racing page_add_anon_rmap
* which increments mapcount after us but sets mapping
* before us: so leave the reset to free_unref_page,
* and remember that it's only reliable while mapped.
* Leaving it set also helps swapoff to reinstate ptes
* faster for those pages still in swapcache.
*/
out:
unlock_page_memcg(page);
}
/*
* @arg: enum ttu_flags will be passed to this argument
*/
static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
struct page_vma_mapped_walk pvmw = {
.page = page,
.vma = vma,
.address = address,
};
pte_t pteval;
struct page *subpage;
bool ret = true;
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
/*
* When racing against e.g. zap_pte_range() on another cpu,
* in between its ptep_get_and_clear_full() and page_remove_rmap(),
* try_to_unmap() may return false when it is about to become true,
* if page table locking is skipped: use TTU_SYNC to wait for that.
*/
if (flags & TTU_SYNC)
pvmw.flags = PVMW_SYNC;
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
return true;
if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
is_zone_device_page(page) && !is_device_private_page(page))
return true;
if (flags & TTU_SPLIT_HUGE_PMD) {
split_huge_pmd_address(vma, address,
flags & TTU_SPLIT_FREEZE, page);
}
/*
* For THP, we have to assume the worse case ie pmd for invalidation.
* For hugetlb, it could be much worse if we need to do pud
* invalidation in the case of pmd sharing.
*
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/
range.end = PageKsm(page) ?
address + PAGE_SIZE : vma_address_end(page, vma);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
address, range.end);
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
* accordingly.
*/
adjust_range_if_pmd_sharing_possible(vma, &range.start,
&range.end);
}
mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
/* PMD-mapped THP migration entry */
if (!pvmw.pte && (flags & TTU_MIGRATION)) {
VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
set_pmd_migration_entry(&pvmw, page);
continue;
}
#endif
/*
* If the page is mlock()d, we cannot swap it out.
* If it's recently referenced (perhaps page_referenced
* skipped over this mm) then we should reactivate it.
*/
if (!(flags & TTU_IGNORE_MLOCK)) {
if (vma->vm_flags & VM_LOCKED) {
/* PTE-mapped THP are never mlocked */
if (!PageTransCompound(page)) {
/*
* Holding pte lock, we do *not* need
* mmap_lock here
*/
mlock_vma_page(page);
}
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
}
if (flags & TTU_MUNLOCK)
continue;
}
/* Unexpected PMD-mapped THP? */
VM_BUG_ON_PAGE(!pvmw.pte, page);
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
if (PageHuge(page) && !PageAnon(page)) {
/*
* To call huge_pmd_unshare, i_mmap_rwsem must be
* held in write mode. Caller needs to explicitly
* do this outside rmap routines.
*/
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
/*
* huge_pmd_unshare unmapped an entire PMD
* page. There is no way of knowing exactly
* which PMDs may be cached for this mm, so
* we must flush them all. start/end were
* already adjusted above to cover this range.
*/
flush_cache_range(vma, range.start, range.end);
flush_tlb_range(vma, range.start, range.end);
mmu_notifier_invalidate_range(mm, range.start,
range.end);
/*
* The ref count of the PMD page was dropped
* which is part of the way map counting
* is done for shared PMDs. Return 'true'
* here. When there is no other sharing,
* huge_pmd_unshare returns false and we will
* unmap the actual page and drop map count
* to zero.
*/
page_vma_mapped_walk_done(&pvmw);
break;
}
}
if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & TTU_MIGRATION) &&
is_zone_device_page(page)) {
swp_entry_t entry;
pte_t swp_pte;
pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
/*
* Store the pfn of the page in a special migration
* pte. do_swap_page() will wait until the migration
* pte is removed and then restart fault handling.
*/
entry = make_migration_entry(page, 0);
swp_pte = swp_entry_to_pte(entry);
/*
* pteval maps a zone device page and is therefore
* a swap pte.
*/
if (pte_swp_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
if (pte_swp_uffd_wp(pteval))
swp_pte = pte_swp_mkuffd_wp(swp_pte);
set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
/*
* No need to invalidate here it will synchronize on
* against the special swap migration pte.
*
* The assignment to subpage above was computed from a
* swap PTE which results in an invalid pointer.
* Since only PAGE_SIZE pages can currently be
* migrated, just set it to page. This will need to be
* changed when hugepage migrations to device private
* memory are supported.
*/
subpage = page;
goto discard;
}
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
if (should_defer_flush(mm, flags)) {
/*
* We clear the PTE but do not flush so potentially
* a remote CPU could still be writing to the page.
* If the entry was previously clean then the
* architecture must guarantee that a clear->dirty
* transition on a cached TLB entry is written through
* and traps if the PTE is unmapped.
*/
pteval = ptep_get_and_clear(mm, address, pvmw.pte);
set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
} else {
pteval = ptep_clear_flush(vma, address, pvmw.pte);
}
/* Move the dirty bit to the page. Now the pte is gone. */
if (pte_dirty(pteval))
set_page_dirty(page);
/* Update high watermark before we lower rss */
update_hiwater_rss(mm);
if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (PageHuge(page)) {
hugetlb_count_sub(compound_nr(page), mm);
set_huge_swap_pte_at(mm, address,
pvmw.pte, pteval,
vma_mmu_pagesize(vma));
} else {
dec_mm_counter(mm, mm_counter(page));
set_pte_at(mm, address, pvmw.pte, pteval);
}
} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
/*
* The guest indicated that the page content is of no
* interest anymore. Simply discard the pte, vmscan
* will take care of the rest.
* A future reference will then fault in a new zero
* page. When userfaultfd is active, we must not drop
* this page though, as its main user (postcopy
* migration) will not expect userfaults on already
* copied pages.
*/
dec_mm_counter(mm, mm_counter(page));
/* We have to invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm, address,
address + PAGE_SIZE);
} else if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
swp_entry_t entry;
pte_t swp_pte;
if (arch_unmap_one(mm, vma, address, pteval) < 0) {
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
}
/*
* Store the pfn of the page in a special migration
* pte. do_swap_page() will wait until the migration
* pte is removed and then restart fault handling.
*/
entry = make_migration_entry(subpage,
pte_write(pteval));
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
if (pte_uffd_wp(pteval))
swp_pte = pte_swp_mkuffd_wp(swp_pte);
set_pte_at(mm, address, pvmw.pte, swp_pte);
/*
* No need to invalidate here it will synchronize on
* against the special swap migration pte.
*/
} else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(subpage) };
pte_t swp_pte;
/*
* Store the swap location in the pte.
* See handle_pte_fault() ...
*/
if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
WARN_ON_ONCE(1);
ret = false;
/* We have to invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm, address,
address + PAGE_SIZE);
page_vma_mapped_walk_done(&pvmw);
break;
}
/* MADV_FREE page check */
if (!PageSwapBacked(page)) {
int ref_count, map_count;
/*
* Synchronize with gup_pte_range():
* - clear PTE; barrier; read refcount
* - inc refcount; barrier; read PTE
*/
smp_mb();
ref_count = page_ref_count(page);
map_count = page_mapcount(page);
/*
* Order reads for page refcount and dirty flag
* (see comments in __remove_mapping()).
*/
smp_rmb();
/*
* The only page refs must be one from isolation
* plus the rmap(s) (dropped by discard:).
*/
if (ref_count == 1 + map_count &&
!PageDirty(page)) {
/* Invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm,
address, address + PAGE_SIZE);
dec_mm_counter(mm, MM_ANONPAGES);
goto discard;
}
/*
* If the page was redirtied, it cannot be
* discarded. Remap the page to page table.
*/
set_pte_at(mm, address, pvmw.pte, pteval);
SetPageSwapBacked(page);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
}
if (swap_duplicate(entry) < 0) {
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
}
if (arch_unmap_one(mm, vma, address, pteval) < 0) {
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
}
if (list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
if (list_empty(&mm->mmlist))
list_add(&mm->mmlist, &init_mm.mmlist);
spin_unlock(&mmlist_lock);
}
dec_mm_counter(mm, MM_ANONPAGES);
inc_mm_counter(mm, MM_SWAPENTS);
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
if (pte_uffd_wp(pteval))
swp_pte = pte_swp_mkuffd_wp(swp_pte);
set_pte_at(mm, address, pvmw.pte, swp_pte);
/* Invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm, address,
address + PAGE_SIZE);
} else {
/*
* This is a locked file-backed page, thus it cannot
* be removed from the page cache and replaced by a new
* page before mmu_notifier_invalidate_range_end, so no
* concurrent thread might update its page table to
* point at new page while a device still is using this
* page.
*
* See Documentation/vm/mmu_notifier.rst
*/
dec_mm_counter(mm, mm_counter_file(page));
}
discard:
/*
* No need to call mmu_notifier_invalidate_range() it has be
* done above for all cases requiring it to happen under page
* table lock before mmu_notifier_invalidate_range_end()
*
* See Documentation/vm/mmu_notifier.rst
*/
page_remove_rmap(subpage, PageHuge(page));
put_page(page);
}
mmu_notifier_invalidate_range_end(&range);
trace_android_vh_try_to_unmap_one(vma, page, address, ret);
return ret;
}
static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
{
return vma_is_temporary_stack(vma);
}
static int page_not_mapped(struct page *page)
{
return !page_mapped(page);
}
/**
* try_to_unmap - try to remove all page table mappings to a page
* @page: the page to get unmapped
* @flags: action and flags
*
* Tries to remove all the page table entries which are mapping this
* page, used in the pageout path. Caller must hold the page lock.
*
* If unmap is successful, return true. Otherwise, false.
*/
bool try_to_unmap(struct page *page, enum ttu_flags flags)
{
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
.arg = (void *)flags,
.done = page_not_mapped,
.anon_lock = page_lock_anon_vma_read,
};
/*
* During exec, a temporary VMA is setup and later moved.
* The VMA is moved under the anon_vma lock but not the
* page tables leading to a race where migration cannot
* find the migration ptes. Rather than increasing the
* locking requirements of exec(), migration skips
* temporary VMAs until after exec() completes.
*/
if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
&& !PageKsm(page) && PageAnon(page))
rwc.invalid_vma = invalid_migration_vma;
if (flags & TTU_RMAP_LOCKED)
rmap_walk_locked(page, &rwc);
else
rmap_walk(page, &rwc);
/*
* When racing against e.g. zap_pte_range() on another cpu,
* in between its ptep_get_and_clear_full() and page_remove_rmap(),
* try_to_unmap() may return false when it is about to become true,
* if page table locking is skipped: use TTU_SYNC to wait for that.
*/
return !page_mapcount(page);
}
/**
* try_to_munlock - try to munlock a page
* @page: the page to be munlocked
*
* Called from munlock code. Checks all of the VMAs mapping the page
* to make sure nobody else has this page mlocked. The page will be
* returned with PG_mlocked cleared if no other vmas have it mlocked.
*/
void try_to_munlock(struct page *page)
{
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
.arg = (void *)TTU_MUNLOCK,
.done = page_not_mapped,
.anon_lock = page_lock_anon_vma_read,
};
VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
rmap_walk(page, &rwc);
}
void __put_anon_vma(struct anon_vma *anon_vma)
{
struct anon_vma *root = anon_vma->root;
anon_vma_free(anon_vma);
if (root != anon_vma && atomic_dec_and_test(&root->refcount))
anon_vma_free(root);
}
static struct anon_vma *rmap_walk_anon_lock(struct page *page,
struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma;
if (rwc->anon_lock)
return rwc->anon_lock(page);
/*
* Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
* because that depends on page_mapped(); but not all its usages
* are holding mmap_lock. Users without mmap_lock are required to
* take a reference count to prevent the anon_vma disappearing
*/
anon_vma = page_anon_vma(page);
if (!anon_vma)
return NULL;
anon_vma_lock_read(anon_vma);
return anon_vma;
}
/*
* rmap_walk_anon - do something to anonymous page using the object-based
* rmap method
* @page: the page to be handled
* @rwc: control variable according to each walk type
*
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the anon_vma struct it points to.
*
* When called from try_to_munlock(), the mmap_lock of the mm containing the vma
* where the page was found will be held for write. So, we won't recheck
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* LOCKED.
*/
static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
bool locked)
{
struct anon_vma *anon_vma;
pgoff_t pgoff_start, pgoff_end;
struct anon_vma_chain *avc;
if (locked) {
anon_vma = page_anon_vma(page);
/* anon_vma disappear under us? */
VM_BUG_ON_PAGE(!anon_vma, page);
} else {
anon_vma = rmap_walk_anon_lock(page, rwc);
}
if (!anon_vma)
return;
pgoff_start = page_to_pgoff(page);
pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
pgoff_start, pgoff_end) {
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
continue;
if (!rwc->rmap_one(page, vma, address, rwc->arg))
break;
if (rwc->done && rwc->done(page))
break;
}
if (!locked)
anon_vma_unlock_read(anon_vma);
}
/*
* rmap_walk_file - do something to file page using the object-based rmap method
* @page: the page to be handled
* @rwc: control variable according to each walk type
*
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the address_space struct it points to.
*
* When called from try_to_munlock(), the mmap_lock of the mm containing the vma
* where the page was found will be held for write. So, we won't recheck
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* LOCKED.
*/
static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
bool locked)
{
struct address_space *mapping = page_mapping(page);
pgoff_t pgoff_start, pgoff_end;
struct vm_area_struct *vma;
/*
* The page lock not only makes sure that page->mapping cannot
* suddenly be NULLified by truncation, it makes sure that the
* structure at mapping cannot be freed and reused yet,
* so we can safely take mapping->i_mmap_rwsem.
*/
VM_BUG_ON_PAGE(!PageLocked(page), page);
if (!mapping)
return;
pgoff_start = page_to_pgoff(page);
pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
if (!locked)
i_mmap_lock_read(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap,
pgoff_start, pgoff_end) {
unsigned long address = vma_address(page, vma);
VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
continue;
if (!rwc->rmap_one(page, vma, address, rwc->arg))
goto done;
if (rwc->done && rwc->done(page))
goto done;
}
done:
if (!locked)
i_mmap_unlock_read(mapping);
}
void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
{
if (unlikely(PageKsm(page)))
rmap_walk_ksm(page, rwc);
else if (PageAnon(page))
rmap_walk_anon(page, rwc, false);
else
rmap_walk_file(page, rwc, false);
}
/* Like rmap_walk, but caller holds relevant rmap lock */
void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
{
/* no ksm support for now */
VM_BUG_ON_PAGE(PageKsm(page), page);
if (PageAnon(page))
rmap_walk_anon(page, rwc, true);
else
rmap_walk_file(page, rwc, true);
}
#ifdef CONFIG_HUGETLB_PAGE
/*
* The following two functions are for anonymous (private mapped) hugepages.
* Unlike common anonymous pages, anonymous hugepages have no accounting code
* and no lru code, because we handle hugepages differently from common pages.
*/
void hugepage_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
struct anon_vma *anon_vma = vma->anon_vma;
int first;
BUG_ON(!PageLocked(page));
BUG_ON(!anon_vma);
/* address might be in next vma when migration races vma_adjust */
first = atomic_inc_and_test(compound_mapcount_ptr(page));
if (first)
__page_set_anon_rmap(page, vma, address, 0);
}
void hugepage_add_new_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
atomic_set(compound_mapcount_ptr(page), 0);
if (hpage_pincount_available(page))
atomic_set(compound_pincount_ptr(page), 0);
__page_set_anon_rmap(page, vma, address, 1);
}
#endif /* CONFIG_HUGETLB_PAGE */