
This is the merge of the upstream LTS release of 5.10.117 into the android12-5.10 branch. It contains the following commits:fdd06dc6b0
ANDROID: GKI: db845c: Update symbols list and ABI0974b8411a
Merge 5.10.117 into android12-5.10-lts7686a5c2a8
Linux 5.10.117937c6b0e3e
SUNRPC: Fix fall-through warnings for Clang29f077d070
io_uring: always use original task when preparing req identity1444e0568b
usb: gadget: uvc: allow for application to cleanly shutdown42505e3622
usb: gadget: uvc: rename function to be more consistent002e7223dc
ping: fix address binding wrt vrfd9a1e82bf6
arm[64]/memremap: don't abuse pfn_valid() to ensure presence of linear map49750c5e9a
net: phy: Fix race condition on link status changee68b60ae29
SUNRPC: Ensure we flush any closed sockets before xs_xprt_free()dbe6974a39
SUNRPC: Don't call connect() more than once on a TCP socket47541ed4d4
SUNRPC: Prevent immediate close+reconnect2ab569edd8
SUNRPC: Clean up scheduling of autoclose85844ea29f
drm/vmwgfx: Initialize drm_mode_fb_cmd27e849dbe60
cgroup/cpuset: Remove cpus_allowed/mems_allowed setup in cpuset_init_smp()6aa239d82e
net: atlantic: always deep reset on pm op, fixing up my null deref regression6158df4fa5
i40e: i40e_main: fix a missing check on list iterator819796024c
drm/nouveau/tegra: Stop using iommu_present()e06605af8b
ceph: fix setting of xattrs on async created inodes86db01f373
serial: 8250_mtk: Fix register address for XON/XOFF character84ad84e495
serial: 8250_mtk: Fix UART_EFR register addressf8d8440f13
slimbus: qcom: Fix IRQ check in qcom_slim_probed7b7c5532a
USB: serial: option: add Fibocom MA510 modem2ba0034e36
USB: serial: option: add Fibocom L610 modem319b312edb
USB: serial: qcserial: add support for Sierra Wireless EM7590994395f356
USB: serial: pl2303: add device id for HP LM930 Display8276a3dbe2
usb: typec: tcpci_mt6360: Update for BMC PHY setting54979aa49e
usb: typec: tcpci: Don't skip cleanup in .remove() on error7335a6b11d
usb: cdc-wdm: fix reading stuck on device close6d47eceaf3
tty: n_gsm: fix mux activation issues in gsm_config()69139a45b8
tty/serial: digicolor: fix possible null-ptr-deref in digicolor_uart_probe()5a73581116
firmware_loader: use kernel credentials when reading firmwared254309aab
tcp: resalt the secret every 10 seconds3abbfac1ab
net: sfp: Add tx-fault workaround for Huawei MA5671A SFP ONT48f1dd67a8
net: emaclite: Don't advertise 1000BASE-T and do auto negotiation5c09dbdfd4
s390: disable -Warray-bounds03ebc6fd5c
ASoC: ops: Validate input values in snd_soc_put_volsw_range()31606a73ba
ASoC: max98090: Generate notifications on changes for custom controlce154bd3bc
ASoC: max98090: Reject invalid values in custom control put()5ecaaaeb2c
hwmon: (f71882fg) Fix negative temperature88091c0275
gfs2: Fix filesystem block deallocation for short writesfccf4bf3f2
tls: Fix context leak on tls_device_down161c4edeca
net: sfc: ef10: fix memory leak in efx_ef10_mtd_probe()d5e1b41bf7
net/smc: non blocking recvmsg() return -EAGAIN when no data and signal_pendinge417a8fcea
net: dsa: bcm_sf2: Fix Wake-on-LAN with mac_link_down()9012209f43
net: bcmgenet: Check for Wake-on-LAN interrupt probe deferralabe35bf3be
net/sched: act_pedit: really ensure the skb is writableb816ed53f3
s390/lcs: fix variable dereferenced before check4d3c6d7418
s390/ctcm: fix potential memory leak5497f87edc
s390/ctcm: fix variable dereferenced before checkcc71c9f17c
selftests: vm: Makefile: rename TARGETS to VMTARGETSce12e5ff8d
hwmon: (ltq-cputemp) restrict it to SOC_XWAYceb3db723f
dim: initialize all struct fields8b1b8fc819
ionic: fix missing pci_release_regions() on error in ionic_probe()2cb8689f45
nfs: fix broken handling of the softreval mount option49c10784b9
mac80211_hwsim: call ieee80211_tx_prepare_skb under RCU protection79432d2237
net: sfc: fix memory leak due to ptp channelbdb8d4aed1
sfc: Use swap() instead of open coding it33c93f6e55
netlink: do not reset transport header in netlink_recvmsg()9e40f2c513
drm/nouveau: Fix a potential theorical leak in nouveau_get_backlight_name()54f26fc07e
ipv4: drop dst in multicast routing pathc07a84492f
net: mscc: ocelot: avoid corrupting hardware counters when moving VCAP filtersabb237c544
net: mscc: ocelot: restrict tc-trap actions to VCAP IS2 lookup 0f9674c52a1
net: mscc: ocelot: fix VCAP IS2 filters matching on both lookupsc1184d2888
net: mscc: ocelot: fix last VCAP IS1/IS2 filter persisting in hardware when deletede2cdde89d2
net: Fix features skip in for_each_netdev_feature()c420d66047
mac80211: Reset MBSSID parameters upon connection9cbf2a7d5d
hwmon: (tmp401) Add OF device ID table85eba08be2
iwlwifi: iwl-dbg: Use del_timer_sync() before freeinga6a73781b4
batman-adv: Don't skb_split skbuffs with frag_list0577ff1c69
Merge 5.10.116 into android12-5.10-lts3f70116e5f
Merge 5.10.115 into android12-5.10-lts07a4d3649a
Linux 5.10.116d1ac096f88
mm: userfaultfd: fix missing cache flush in mcopy_atomic_pte() and __mcopy_atomic()c6cbf5431a
mm: hugetlb: fix missing cache flush in copy_huge_page_from_user()308ff6a6e7
mm: fix missing cache flush for all tail pages of compound page185fa5984d
Bluetooth: Fix the creation of hdev->name9ff4a6b806
arm: remove CONFIG_ARCH_HAS_HOLES_MEMORYMODELdfb55dcf9d
nfp: bpf: silence bitwise vs. logical OR warningf89f76f4b0
drm/amd/display/dc/gpio/gpio_service: Pass around correct dce_{version, environment} typesefd1429fa9
block: drbd: drbd_nl: Make conversion to 'enum drbd_ret_code' explicita71658c7db
regulator: consumer: Add missing stubs to regulator/consumer.h7648f42d1a
MIPS: Use address-of operator on section symbols2ed28105c6
ANDROID: GKI: update the abi .xml file due to hex_to_bin() changesee8877df71
Revert "tcp: ensure to use the most recently sent skb when filling the rate sample"6273d79c86
Merge 5.10.114 into android12-5.10-ltse61686bb77
Linux 5.10.1158528806abe
mmc: rtsx: add 74 Clocks in power on flowe1ab92302b
PCI: aardvark: Fix reading MSI interrupt number49143c9ed2
PCI: aardvark: Clear all MSIs at setup7676a5b99f
dm: interlock pending dm_io and dm_wait_for_bios_completiona439819f47
block-map: add __GFP_ZERO flag for alloc_page in function bio_copy_kerna22d66eb51
rcu: Apply callbacks processing time limit only on softirq40fb3812d9
rcu: Fix callbacks processing time limit retaining cond_resched()43dbc3edad
KVM: LAPIC: Enable timer posted-interrupt only when mwait/hlt is advertised9c8474fa34
KVM: x86/mmu: avoid NULL-pointer dereference on page freeing bugsa474ee5ece
KVM: x86: Do not change ICR on write to APIC_SELF_IPI64e3e16dbc
x86/kvm: Preserve BSP MSR_KVM_POLL_CONTROL across suspend/resume5f884e0c2e
net/mlx5: Fix slab-out-of-bounds while reading resource dump menu599fc32e74
kvm: x86/cpuid: Only provide CPUID leaf 0xA if host has architectural PMU0a960a3672
net: igmp: respect RCU rules in ip_mc_source() and ip_mc_msfilter()4fd45ef704
btrfs: always log symlinks in full mode687167eef9
smsc911x: allow using IRQ0b280877eab
selftests: ocelot: tc_flower_chains: specify conform-exceed action for policera9fd5d6cd5
bnxt_en: Fix unnecessary dropping of RX packets72e4fc1a4e
bnxt_en: Fix possible bnxt_open() failure caused by wrong RFS flag9ac9f07f0f
selftests: mirror_gre_bridge_1q: Avoid changing PVID while interface is operational475237e807
hinic: fix bug of wq out of bound access1b9f1f455d
net: emaclite: Add error handling for of_address_to_resource()8459485db7
net: cpsw: add missing of_node_put() in cpsw_probe_dt()4eee980950
net: stmmac: dwmac-sun8i: add missing of_node_put() in sun8i_dwmac_register_mdio_mux()2347e9c922
net: dsa: mt7530: add missing of_node_put() in mt7530_setup()1092656cc4
net: ethernet: mediatek: add missing of_node_put() in mtk_sgmii_init()408fb2680e
NFSv4: Don't invalidate inode attributes on delegation returnc1b480e6be
RDMA/siw: Fix a condition race issue in MPA request processing5bf2a45e33
selftests/seccomp: Don't call read() on TTY from background pgrp3ea0b44c01
net/mlx5: Avoid double clear or set of sync reset requested2455331591
net/mlx5e: Fix the calling of update_buffer_lossy() APIe07c13fbdd
net/mlx5e: CT: Fix queued up restore put() executing after relevant ft released8338a7a09
net/mlx5e: Don't match double-vlan packets if cvlan is not setc7f87ad115
net/mlx5e: Fix trust state reset in reload87f0d9a518
ASoC: dmaengine: Restore NULL prepare_slave_config() callbackad87f8498e
hwmon: (adt7470) Fix warning on module removal997b8605e8
gpio: pca953x: fix irq_stat not updated when irq is disabled (irq_mask not set)879b075a9a
NFC: netlink: fix sleep in atomic bug when firmware download timeout1961c5a688
nfc: nfcmrvl: main: reorder destructive operations in nfcmrvl_nci_unregister_dev to avoid bugs8a9e7c64f4
nfc: replace improper check device_is_registered() in netlink related functions11adc9ab3e
can: grcan: only use the NAPI poll budget for RX4df5e498e0
can: grcan: grcan_probe(): fix broken system id check for errata workaround needsdd973c0185
can: grcan: use ofdev->dev when allocating DMA memory45bdcb5ca4
can: isotp: remove re-binding of bound socket13959b9117
can: grcan: grcan_close(): fix deadlock6c7c0e131e
s390/dasd: Fix read inconsistency for ESE DASD devices6e02c0413a
s390/dasd: Fix read for ESE with blksize < 4kecc8396827
s390/dasd: prevent double format of tracks for ESE devices30e008ab3f
s390/dasd: fix data corruption for ESE devicesd53d47fadd
ASoC: meson: Fix event generation for AUI CODEC mux93a1f0755e
ASoC: meson: Fix event generation for G12A tohdmi muxe8b08e2f17
ASoC: meson: Fix event generation for AUI ACODEC mux954d55170f
ASoC: wm8958: Fix change notifications for DSP controlsf45359824a
ASoC: da7219: Fix change notifications for tone generator frequencye6e61aab49
genirq: Synchronize interrupt thread startupdcf1150f2e
net: stmmac: disable Split Header (SPH) for Intel platforms68f35987d4
firewire: core: extend card->lock in fw_core_handle_bus_reset629b4003a7
firewire: remove check of list iterator against head past the loop bodye757ff4bbc
firewire: fix potential uaf in outbound_phy_packet_callback()70d25d4fba
Revert "SUNRPC: attempt AF_LOCAL connect on setup"466721d767
drm/amd/display: Avoid reading audio pattern past AUDIO_CHANNELS_COUNT2e6f3d665a
iommu/vt-d: Calculate mask for non-aligned flushesfbb7c61e76
KVM: x86/svm: Account for family 17h event renumberings in amd_pmc_perf_hw_idb085afe226
gpiolib: of: fix bounds check for 'gpio-reserved-ranges'2b7cb072d0
mmc: core: Set HS clock speed before sending HS CMD1366651d7199
mmc: sdhci-msm: Reset GCC_SDCC_BCR register for SDHC2906c73632
ALSA: fireworks: fix wrong return count shorter than expected by 4 bytes03ab174805
ALSA: hda/realtek: Add quirk for Yoga Duet 7 13ITL6 speakersa196f277c5
parisc: Merge model and model name into one line in /proc/cpuinfo326f02f172
MIPS: Fix CP0 counter erratum detection for R4k CPUs681997eca1
Revert "ipv6: make ip6_rt_gc_expire an atomic_t"141fbd343b
Revert "oom_kill.c: futex: delay the OOM reaper to allow time for proper futex cleanup"ca9b002a16
Merge 5.10.113 into android12-5.10-ltsf64cd19a00
Merge branch 'android12-5.10' into `android12-5.10-lts`f40e35e79c
Linux 5.10.1142d74f61787
perf symbol: Remove arch__symbols__fixup_end()bf98302e68
tty: n_gsm: fix software flow control handling95b267271a
tty: n_gsm: fix incorrect UA handling70b045d9ae
tty: n_gsm: fix reset fifo race condition320a24c4ef
tty: n_gsm: fix wrong command frame length field encoding935f314b6f
tty: n_gsm: fix wrong command retry handling17b86db43c
tty: n_gsm: fix missing explicit ldisc flusha2baa907c2
tty: n_gsm: fix wrong DLCI release order705925e693
tty: n_gsm: fix insufficient txframe size842a9bbbef
netfilter: nft_socket: only do sk lookups when indev is available7346e54dbf
tty: n_gsm: fix malformed counter for out of frame datad19613895e
tty: n_gsm: fix wrong signal octet encoding in convergence layer type 226f127f6d9
tty: n_gsm: fix mux cleanup after unregister tty devicef26c271492
tty: n_gsm: fix decoupled mux resource47132f9f7f
tty: n_gsm: fix restart handling via CLD commandb3c88d46db
perf symbol: Update symbols__fixup_end()3d0a3168a3
perf symbol: Pass is_kallsyms to symbols__fixup_end()2ab14625b8
x86/cpu: Load microcode during restore_processor_state()795afbe8b4
thermal: int340x: Fix attr.show callback prototype11d16498d7
net: ethernet: stmmac: fix write to sgmii_adapter_base236dd62230
drm/i915: Fix SEL_FETCH_PLANE_*(PIPE_B+) register addresses78d4dccf16
kasan: prevent cpu_quarantine corruption when CPU offline and cache shrink occur at same time5fef6df273
zonefs: Clear inode information flags on inode creation92ed64a920
zonefs: Fix management of open zones42e8ec3b4b
powerpc/perf: Fix 32bit compileac3d077043
drivers: net: hippi: Fix deadlock in rr_close()5399e7b80c
cifs: destage any unwritten data to the server before calling copychunk_write80fc45377f
x86: __memcpy_flushcache: fix wrong alignment if size > 2^32585ef03c9e
ext4: fix bug_on in start_this_handle during umount filesystem07da0be588
ASoC: wm8731: Disable the regulator when probing fails1b1747ad7e
ASoC: Intel: soc-acpi: correct device endpoints for max98373aa138efd2b
tcp: fix F-RTO may not work correctly when receiving DSACK9d56e369bd
Revert "ibmvnic: Add ethtool private flag for driver-defined queue limits"96904c8289
ibmvnic: fix miscellaneous checks17f71272ef
ixgbe: ensure IPsec VF<->PF compatibilityc33d717e06
net: fec: add missing of_node_put() in fec_enet_init_stop_mode()9591967ac4
bnx2x: fix napi API usage sequence1781beb879
tls: Skip tls_append_frag on zero copy size77b922683e
drm/amd/display: Fix memory leak in dcn21_clock_source_create18068e0527
drm/amdkfd: Fix GWS queue countc0396f5e5b
net: dsa: lantiq_gswip: Don't set GSWIP_MII_CFG_RMII_CLK1204386e26
net: phy: marvell10g: fix return value on errore974c730f0
net: bcmgenet: hide status block before TX timestampingee71b47da5
clk: sunxi: sun9i-mmc: check return value after calling platform_get_resource()8dacbef4fe
bus: sunxi-rsb: Fix the return value of sunxi_rsb_device_create()9f29f6f8da
tcp: make sure treq->af_specific is initialized8a9d6ca360
tcp: fix potential xmit stalls caused by TCP_NOTSENT_LOWAT720b6ced85
ip_gre, ip6_gre: Fix race condition on o_seqno in collect_md mode41661b4c1a
ip6_gre: Make o_seqno start from 0 in native mode7b187fbd7e
ip_gre: Make o_seqno start from 0 in native mode83d128daff
net/smc: sync err code when tcp connection was refused9eb25e00f5
net: hns3: add return value for mailbox handling in PF929c30c02d
net: hns3: add validity check for message data lengthe3ec78d82d
net: hns3: modify the return code of hclge_get_ring_chain_from_mbx06a40e7105
cpufreq: fix memory leak in sun50i_cpufreq_nvmem_probefb172e93f8
pinctrl: pistachio: fix use of irq_of_parse_and_map()8f042884af
arm64: dts: imx8mn-ddr4-evk: Describe the 32.768 kHz PMIC clock73c35379db
ARM: dts: imx6ull-colibri: fix vqmmc regulator61a89d0a5b
sctp: check asoc strreset_chunk in sctp_generate_reconf_event41d6ac687d
wireguard: device: check for metadata_dst with skb_valid_dst()3c464db03c
tcp: ensure to use the most recently sent skb when filling the rate samplece4c3f7087
pinctrl: stm32: Keep pinctrl block clock enabled when LEVEL IRQ requested0c60271df0
tcp: md5: incorrect tcp_header_len for incoming connectionsf4dad5a48d
pinctrl: rockchip: fix RK3308 pinmux bits9ef33d23f8
bpf, lwt: Fix crash when using bpf_skb_set_tunnel_key() from bpf_xmit lwt hook6ac03e6ddd
netfilter: nft_set_rbtree: overlap detection with element re-addition after deletion72ae15d5ce
net: dsa: Add missing of_node_put() in dsa_port_link_register_of14cc2044c1
memory: renesas-rpc-if: Fix HF/OSPI data transfer in Manual Mode690c1bc4bf
pinctrl: stm32: Do not call stm32_gpio_get() for edge triggered IRQs in EOI6f2bf9c5dd
mtd: fix 'part' field data corruption in mtd_info4da421035b
mtd: rawnand: Fix return value check of wait_for_completion_timeout94ca69b702
pinctrl: mediatek: moore: Fix build error123b7e0388
ipvs: correctly print the memory size of ip_vs_conn_tabf4446f2136
ARM: dts: logicpd-som-lv: Fix wrong pinmuxing on OMAP354a526cc29c
ARM: dts: am3517-evm: Fix misc pinmuxingb622bca852
ARM: dts: Fix mmc order for omap3-gta049419d27fe1
phy: ti: Add missing pm_runtime_disable() in serdes_am654_probe9e00a6e1fd
phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probeeb659608e6
ARM: dts: at91: sama5d4_xplained: fix pinctrl phandle namebb524f5a95
ARM: dts: at91: Map MCLK for wm8731 on at91sam9g20ek4691ce8f28
phy: ti: omap-usb2: Fix error handling in omap_usb2_enable_clocks76d1591a38
bus: ti-sysc: Make omap3 gpt12 quirk handling SoC specific1b9855bf31
ARM: OMAP2+: Fix refcount leak in omap_gic_of_init93cc8f184e
phy: samsung: exynos5250-sata: fix missing device put in probe error paths3ca7491570
phy: samsung: Fix missing of_node_put() in exynos_sata_phy_probe8f7644ac24
ARM: dts: imx6qdl-apalis: Fix sgtl5000 detection issue23b0711fcd
USB: Fix xhci event ring dequeue pointer ERDP update issue712302aed1
mtd: rawnand: fix ecc parameters for mt7622207c7af341
iio:imu:bmi160: disable regulator in error path70d2df257e
arm64: dts: meson: remove CPU opps below 1GHz for SM1 boards2d320609be
arm64: dts: meson: remove CPU opps below 1GHz for G12B boardsc4fb41bdf4
video: fbdev: udlfb: properly check endpoint type0967830e72
iocost: don't reset the inuse weight of under-weighted debtorsad604cbd1d
x86/pci/xen: Disable PCI/MSI[-X] masking for XEN_HVM guests8fcce58c59
riscv: patch_text: Fixup last cpu should be master51477d3b38
hex2bin: fix access beyond string end616d354fb9
hex2bin: make the function hex_to_bin constant-time1633cb2d4a
pinctrl: samsung: fix missing GPIOLIB on ARM64 Exynos configbdc3ad9251
arch_topology: Do not set llc_sibling if llc_id is invalidaaee3f6617
serial: 8250: Correct the clock for EndRun PTP/1588 PCIe device662f945a20
serial: 8250: Also set sticky MCR bits in console restoration8be962c89d
serial: imx: fix overrun interrupts in DMA moded22d92230f
usb: phy: generic: Get the vbus supplyb820764c64
usb: cdns3: Fix issue for clear halt endpointbd7f84708e
usb: dwc3: gadget: Return proper request statusa633b8c341
usb: dwc3: core: Only handle soft-reset in DCTL5fa59bb867
usb: dwc3: core: Fix tx/rx threshold settings140801d3fb
usb: dwc3: Try usb-role-switch first in dwc3_drd_init4dd5feb279
usb: gadget: configfs: clear deactivation flag in configfs_composite_unbind()6c3da0e19c
usb: gadget: uvc: Fix crash when encoding data for usb requestfb1fe1a455
usb: typec: ucsi: Fix role swapping06826eb063
usb: typec: ucsi: Fix reuse of completion structure7b510d4bb4
usb: misc: fix improper handling of refcount in uss720_probe()bb8ecca2dd
iio: imu: inv_icm42600: Fix I2C init possible nackca2b54b6ad
iio: magnetometer: ak8975: Fix the error handling in ak8975_power_on()1060604fc7
iio: dac: ad5446: Fix read_raw not returning set value6ff33c01be
iio: dac: ad5592r: Fix the missing return value.06ada9487f
xhci: increase usb U3 -> U0 link resume timeout from 100ms to 500mse1be000166
xhci: stop polling roothubs after shutdown2eb6c86891
xhci: Enable runtime PM on second Alderlake controller63eda431b2
USB: serial: option: add Telit 0x1057, 0x1058, 0x1075 compositionse9971dac69
USB: serial: option: add support for Cinterion MV32-WA/MV32-WB34ff5455ee
USB: serial: cp210x: add PIDs for Kamstrup USB Meter Reader729a81ae10
USB: serial: whiteheat: fix heap overflow in WHITEHEAT_GET_DTR_RTS008ba29f33
USB: quirks: add STRING quirk for VCOM deviceac6ad0ef83
USB: quirks: add a Realtek card reader8ba02cebb7
usb: mtu3: fix USB 3.0 dual-role-switch from device to host549209caab
lightnvm: disable the subsystem54c028cfc4
floppy: disable FDRAWCMD by defaultde64d941a7
Merge 5.10.112 into android12-5.10-lts54af9dd2b9
Linux 5.10.1137992fdb045
Revert "net: micrel: fix KS8851_MLL Kconfig"8bedbc8f7f
block/compat_ioctl: fix range check in BLKGETSIZEfea24b07ed
staging: ion: Prevent incorrect reference counting behavourdccee748af
spi: atmel-quadspi: Fix the buswidth adjustment between spi-mem and controller572761645b
jbd2: fix a potential race while discarding reserved buffers after an abort50aac44273
can: isotp: stop timeout monitoring when no first frame was sente1e96e3727
ext4: force overhead calculation if the s_overhead_cluster makes no sense4789149b9e
ext4: fix overhead calculation to account for the reserved gdt blocks0c54b09376
ext4, doc: fix incorrect h_reserved size22c450d39f
ext4: limit length to bitmap_maxbytes - blocksize in punch_hole75ac724684
ext4: fix use-after-free in ext4_search_dira46b3d8498
ext4: fix symlink file size not match to file contentf6038d43b2
ext4: fix fallocate to use file_modified to update permissions consistently19590bbc69
perf report: Set PERF_SAMPLE_DATA_SRC bit for Arm SPE evente012f9d1af
powerpc/perf: Fix power9 event alternatives0a2cef65b3
drm/vc4: Use pm_runtime_resume_and_get to fix pm_runtime_get_sync() usagef8f8b3124b
KVM: PPC: Fix TCE handling for VFIO405d984274
drm/panel/raspberrypi-touchscreen: Initialise the bridge in prepare231381f521
drm/panel/raspberrypi-touchscreen: Avoid NULL deref if not initialised51d9cbbb0f
perf/core: Fix perf_mmap fail when CONFIG_PERF_USE_VMALLOC enabled88fcfd6ee6
sched/pelt: Fix attach_entity_load_avg() corner casec55327bc37
arm_pmu: Validate single/group leader events5580b974a8
ARC: entry: fix syscall_trace_exit argument7082650eb8
e1000e: Fix possible overflow in LTR decoding43a2a3734a
ASoC: soc-dapm: fix two incorrect uses of list iterator54e6180c8c
gpio: Request interrupts after IRQ is initialized0837ff17d0
openvswitch: fix OOB access in reserve_sfa_size()19f6dcb1f0
xtensa: fix a7 clobbering in coprocessor context load/storef399ab11dd
xtensa: patch_text: Fixup last cpu should be masterba2716da23
net: atlantic: invert deep par in pm functions, preventing null derefs358a3846f6
dma: at_xdmac: fix a missing check on list iteratorcf23a960c5
ata: pata_marvell: Check the 'bmdma_addr' beforing reading9ca66d7914
mm/mmu_notifier.c: fix race in mmu_interval_notifier_remove()ed5d4efb4d
oom_kill.c: futex: delay the OOM reaper to allow time for proper futex cleanup6b932920b9
mm, hugetlb: allow for "high" userspace addresses50cbc583fa
EDAC/synopsys: Read the error count from the correct register7ec6e06ee4
nvme-pci: disable namespace identifiers for Qemu controllers316bd86c22
nvme: add a quirk to disable namespace identifiers76101c8e0c
stat: fix inconsistency between struct stat and struct compat_statbf28bba304
scsi: qedi: Fix failed disconnect handlinga284cca3d8
net: macb: Restart tx only if queue pointer is lagging9581e07b54
drm/msm/mdp5: check the return of kzalloc()8d71edabb0
dpaa_eth: Fix missing of_node_put in dpaa_get_ts_info()b3afe5a7fd
brcmfmac: sdio: Fix undefined behavior due to shift overflowing the constant202748f441
mt76: Fix undefined behavior due to shift overflowing the constant0de9c104d0
net: atlantic: Avoid out-of-bounds indexing5bef9fc38f
cifs: Check the IOCB_DIRECT flag, not O_DIRECTe129c55153
vxlan: fix error return code in vxlan_fdb_append8e7ea11364
arm64: dts: imx: Fix imx8*-var-som touchscreen property sizescd227ac03f
ALSA: usb-audio: Fix undefined behavior due to shift overflowing the constant490815f0b5
platform/x86: samsung-laptop: Fix an unsigned comparison which can never be negativecb17b56a9b
reset: tegra-bpmp: Restore Handle errors in BPMP responsed513ea9b7e
ARM: vexpress/spc: Avoid negative array index when !SMP052e4a661f
arm64: mm: fix p?d_leaf()18ff7a2efa
arm64/mm: Remove [PUD|PMD]_TABLE_BIT from [pud|pmd]_bad()3bf8ca3501
selftests: mlxsw: vxlan_flooding: Prevent flooding of unwanted packets520aab8b72
dmaengine: idxd: add RO check for wq max_transfer_size write9a3c026dc3
dmaengine: idxd: add RO check for wq max_batch_size writef593f49fcd
net: stmmac: Use readl_poll_timeout_atomic() in atomic state3d55b19574
netlink: reset network and mac headers in netlink_dump()49516e6ed9
ipv6: make ip6_rt_gc_expire an atomic_t078d839f11
l3mdev: l3mdev_master_upper_ifindex_by_index_rcu should be using netdev_master_upper_dev_get_rcu0ac8f83d8f
net/sched: cls_u32: fix possible leak in u32_init_knode()93366275be
ip6_gre: Fix skb_under_panic in __gre6_xmit()200f96ebb3
ip6_gre: Avoid updating tunnel->tun_hlen in __gre6_xmit()8fb76adb89
net/packet: fix packet_sock xmit return value checkinga499cb5f3e
net/smc: Fix sock leak when release after smc_shutdown()60592f16a4
rxrpc: Restore removed timer deletionfc7116a79a
igc: Fix BUG: scheduling while atomic46b0e4f998
igc: Fix infinite loop in release_swfw_syncc075c3ea03
esp: limit skb_page_frag_refill use to a single page3f7914dbea
spi: spi-mtk-nor: initialize spi controller after resumef714abf28f
dmaengine: mediatek:Fix PM usage reference leak of mtk_uart_apdma_alloc_chan_resources9bc949a181
dmaengine: imx-sdma: Fix error checking in sdma_event_remap12aa8021c7
ASoC: codecs: wcd934x: do not switch off SIDO Buck when codec is in useb6f474cd30
ASoC: msm8916-wcd-digital: Check failure for devm_snd_soc_register_component608fc58858
ASoC: atmel: Remove system clock tree configuration for at91sam9g20ekd29c78d3f9
dm: fix mempool NULL pointer race when completing IOcf9b195464
ALSA: hda/realtek: Add quirk for Clevo NP70PNP8ce3820fc9
ALSA: usb-audio: Clear MIDI port active flag after draining43ce33a68e
net/sched: cls_u32: fix netns refcount changes in u32_change()04dd45d977
gfs2: assign rgrp glock before compute_bitstructs378061c9b8
perf tools: Fix segfault accessing sample_id xyarray5e8446e382
tracing: Dump stacktrace trigger to the corresponding instance69848f9488
mm: page_alloc: fix building error on -Werror=array-compare08ad7a770e
etherdevice: Adjust ether_addr* prototypes to silence -Wstringop-overead904c5c08bb
ANDROID: fix up gpio change in 5.10.1115dadf6321c
Merge 5.10.111 into android12-5.10-lts1052f9bce6
Linux 5.10.1125c62d3bf14
ax25: Fix UAF bugs in ax25 timersf934fa478d
ax25: Fix NULL pointer dereferences in ax25 timers145ea8d213
ax25: fix NPD bug in ax25_disconnecta4942c6fea
ax25: fix UAF bug in ax25_send_control()b20a5ab0f5
ax25: Fix refcount leaks caused by ax25_cb_del()57cc15f5fd
ax25: fix UAF bugs of net_device caused by rebinding operation5ddae8d064
ax25: fix reference count leaks of ax25_dev5ea00fc606
ax25: add refcount in ax25_dev to avoid UAF bugs361288633b
scsi: iscsi: Fix unbound endpoint error handling129db30599
scsi: iscsi: Fix endpoint reuse regression26f827e095
dma-direct: avoid redundant memory sync for swiotlb9a5a4d23e2
timers: Fix warning condition in __run_timers()84837f43e5
i2c: pasemi: Wait for write xfers to finish89496d80bf
smp: Fix offline cpu check in flush_smp_call_function_queue()cd02b2687d
dm integrity: fix memory corruption when tag_size is less than digest size0a312ec66a
ARM: davinci: da850-evm: Avoid NULL pointer dereference0806f19305
tick/nohz: Use WARN_ON_ONCE() to prevent console saturation0275c75955
genirq/affinity: Consider that CPUs on nodes can be unbalanced1fcfe37d17
drm/amdgpu: Enable gfxoff quirk on MacBook Pro68ae52efa1
drm/amd/display: don't ignore alpha property on pre-multiplied modea263712ba8
ipv6: fix panic when forwarding a pkt with no in6 dev659214603b
nl80211: correctly check NL80211_ATTR_REG_ALPHA2 size912797e54c
ALSA: pcm: Test for "silence" field in struct "pcm_format_data"48d070ca5e
ALSA: hda/realtek: add quirk for Lenovo Thinkpad X12 speakers163e162471
ALSA: hda/realtek: Add quirk for Clevo PD50PNT5e4dd17998
btrfs: mark resumed async balance as writing1d2eda18f6
btrfs: fix root ref counts in error handling in btrfs_get_root_ref9b7ec35253
ath9k: Fix usage of driver-private space in tx_info0f65cedae5
ath9k: Properly clear TX status area before reporting to mac80211cc21ae9326
gcc-plugins: latent_entropy: use /dev/urandomc089ffc846
memory: renesas-rpc-if: fix platform-device leak in error path342454231e
KVM: x86/mmu: Resolve nx_huge_pages when kvm.ko is loaded06c348fde5
mm: kmemleak: take a full lowmem check in kmemleak_*_phys()20ed94f818
mm: fix unexpected zeroed page mapping with zram swap192e507ef8
mm, page_alloc: fix build_zonerefs_node()000b3921b4
perf/imx_ddr: Fix undefined behavior due to shift overflowing the constantca24c5e8f0
drivers: net: slip: fix NPD bug in sl_tx_timeout()e8cf1e4d95
scsi: megaraid_sas: Target with invalid LUN ID is deleted during scan5b7ce74b6b
scsi: mvsas: Add PCI ID of RocketRaid 26404b44cd5840
drm/amd/display: Fix allocate_mst_payload assert on resume34ea097fb6
drm/amd/display: Revert FEC check in validationfa5ee7c423
myri10ge: fix an incorrect free for skb in myri10ge_sw_tsod90df6da50
net: usb: aqc111: Fix out-of-bounds accesses in RX fixup9c12fcf1d8
net: axienet: setup mdio unconditionallyb643807a73
tlb: hugetlb: Add more sizes to tlb_remove_huge_tlb_entry98973d2bdd
arm64: alternatives: mark patch_alternative() as `noinstr`2462faffbf
regulator: wm8994: Add an off-on delay for WM8994 variantaa8cdedaf7
gpu: ipu-v3: Fix dev_dbg frequency output150fe861c5
ata: libata-core: Disable READ LOG DMA EXT for Samsung 840 EVOs1ff5359afa
net: micrel: fix KS8851_MLL Kconfigd3478709ed
scsi: ibmvscsis: Increase INITIAL_SRP_LIMIT to 1024b9a110fa75
scsi: lpfc: Fix queue failures when recovering from PCI parity erroraec36b98a1
scsi: target: tcmu: Fix possible page UAF4366679805
Drivers: hv: vmbus: Prevent load re-ordering when reading ring buffer1d7a5aae88
drm/amdkfd: Check for potential null return of kmalloc_array()e5afacc826
drm/amdgpu/vcn: improve vcn dpg stop procedured2e0931e6d
drm/amdkfd: Fix Incorrect VMIDs passed to HWS7fc0610ad8
drm/amd/display: Update VTEM Infopacket definition6906e05cf3
drm/amd/display: FEC check in timing validation756c61c168
drm/amd/display: fix audio format not updated after edid updated76e086ce7b
btrfs: do not warn for free space inode in cow_file_range217190dc66
btrfs: fix fallocate to use file_modified to update permissions consistently9b5d1b3413
drm/amd: Add USBC connector ID6f9c06501d
net: bcmgenet: Revert "Use stronger register read/writes to assure ordering"504c15f07f
dm mpath: only use ktime_get_ns() in historical selector4e166a4118
cifs: potential buffer overflow in handling symlinks67677050ce
nfc: nci: add flush_workqueue to prevent uafbfba9722cf
perf tools: Fix misleading add event PMU debug message280f721edc
testing/selftests/mqueue: Fix mq_perf_tests to free the allocated cpu seteb8873b324
sctp: Initialize daddr on peeled off socket45226fac4d
scsi: iscsi: Fix conn cleanup and stop race during iscsid restart73805795c9
scsi: iscsi: Fix offload conn cleanup when iscsid restarts699bd835c3
scsi: iscsi: Move iscsi_ep_disconnect()46f37a34a5
scsi: iscsi: Fix in-kernel conn failure handling8125738967
scsi: iscsi: Rel ref after iscsi_lookup_endpoint()22608545b8
scsi: iscsi: Use system_unbound_wq for destroy_work4029a1e992
scsi: iscsi: Force immediate failure during shutdown17d14456f6
scsi: iscsi: Stop queueing during ep_disconnectda9cf24aa7
scsi: pm80xx: Enable upper inbound, outbound queuese08d269712
scsi: pm80xx: Mask and unmask upper interrupt vectors 32-6335b91e49bc
net/smc: Fix NULL pointer dereference in smc_pnet_find_ib()98a7f6c4ad
drm/msm/dsi: Use connector directly in msm_dsi_manager_connector_init()5f78ad9383
drm/msm: Fix range size vs end confusion5513f9a0b0
cfg80211: hold bss_lock while updating nontrans_lista44938950e
net/sched: taprio: Check if socket flags are valid08d5e3e954
net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link2ad9d890d8
net: dsa: felix: suppress -EPROBE_DEFER errorsf2cc341fcc
net/sched: fix initialization order when updating chain 0 head7a7cf84148
mlxsw: i2c: Fix initialization error flow43e58e119a
net: mdio: Alphabetically sort header inclusion9709c8b5cd
gpiolib: acpi: use correct format charactersd67c900f19
veth: Ensure eth header is in skb's linear part845f44ce3d
net/sched: flower: fix parsing of ethertype following VLAN header85ee17ca21
SUNRPC: Fix the svc_deferred_event trace classaf12dd7123
media: rockchip/rga: do proper error checking in probe5637129712
firmware: arm_scmi: Fix sorting of retrieved clock rates16c628b0c6
memory: atmel-ebi: Fix missing of_node_put in atmel_ebi_probecb66641f81
drm/msm: Add missing put_task_struct() in debugfs path921fdc45a0
btrfs: remove unused variable in btrfs_{start,write}_dirty_block_groups()5d131318bb
ACPI: processor idle: Check for architectural support for LPI503934df31
cpuidle: PSCI: Move the `has_lpi` check to the beginning of the functioncfa98ffc42
hamradio: remove needs_free_netdev to avoid UAF80a4df1464
hamradio: defer 6pack kfree after unregister_netdevf0c31f192f
drm/amdkfd: Use drm_priv to pass VM from KFD to amdgpu6c8e5cb264
Linux 5.10.111d36febbcd5
powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit5c672073bc
mm/sparsemem: fix 'mem_section' will never be NULL gcc 12 warning5973f7507a
irqchip/gic, gic-v3: Prevent GSI to SGI translations000e09462f
Drivers: hv: vmbus: Replace smp_store_mb() with virt_store_mb()e1f540b752
arm64: module: remove (NOLOAD) from linker script919823bd67
selftests: cgroup: Test open-time cgroup namespace usage for migration checks637eca44b8
selftests: cgroup: Test open-time credential usage for migration checks9dd39d2c65
selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644e74da71e66
selftests/cgroup: Fix build on older distros4665722d36
cgroup: Use open-time credentials for process migraton perm checksf089471d1b
mm: don't skip swap entry even if zap_details specified58823a9b09
ubsan: remove CONFIG_UBSAN_OBJECT_SIZE03b39bbbec
dmaengine: Revert "dmaengine: shdma: Fix runtime PM imbalance on error"40e00885a6
tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts75c8558d41
tools build: Filter out options and warnings not supported by clang6374faf49e
perf python: Fix probing for some clang command line options79abc219ba
perf build: Don't use -ffat-lto-objects in the python feature test when building with clang-1382e4395014
drm/amdkfd: Create file descriptor after client is added to smi_clients list326b408e7e
drm/nouveau/pmu: Add missing callbacks for Tegra devices786ae8de3a
drm/amdgpu/smu10: fix SoC/fclk units in auto modeff24114bb0
irqchip/gic-v3: Fix GICR_CTLR.RWP polling451214b266
perf: qcom_l2_pmu: fix an incorrect NULL check on list iteratorfc629224aa
ata: sata_dwc_460ex: Fix crash due to OOB write7e88a50704
gpio: Restrict usage of GPIO chip irq members before initialization5f54364ff6
RDMA/hfi1: Fix use-after-free bug for mm struct8bb4168291
arm64: patch_text: Fixup last cpu should be mastera044bca8ef
btrfs: prevent subvol with swapfile from being deleted82ae73ac96
btrfs: fix qgroup reserve overflow the qgroup limitfc4bdaed4d
x86/speculation: Restore speculation related MSRs during S3 resume8c9e26c890
x86/pm: Save the MSR validity status at context setup2827328e64
io_uring: fix race between timeout flush and removalf7e183b0a7
mm/mempolicy: fix mpol_new leak in shared_policy_replace7d659cb176
mmmremap.c: avoid pointless invalidate_range_start/end on mremap(old_size=0)6adc01a7aa
lz4: fix LZ4_decompress_safe_partial read out of bound8b6f04b4c9
mmc: renesas_sdhi: don't overwrite TAP settings when HS400 tuning is complete029b417073
mmc: mmci: stm32: correctly check all elements of sg list41a519c05b
Revert "mmc: sdhci-xenon: fix annoying 1.8V regulator warning"9de98470db
arm64: Add part number for Arm Cortex-A78AE4604b5738d
perf session: Remap buf if there is no space for event362ced3769
perf tools: Fix perf's libperf_print callback65210fac63
perf: arm-spe: Fix perf report --mem-modebd905fed87
iommu/omap: Fix regression in probe for NULL pointer dereferenceb3c00be2ff
SUNRPC: svc_tcp_sendmsg() should handle errors from xdr_alloc_bvec()9a45e08636
SUNRPC: Handle low memory situations in call_status()132cbe2f18
SUNRPC: Handle ENOMEM in call_transmit_status()aed30a2054
io_uring: don't touch scm_fp_list after queueing skb594205b493
drbd: Fix five use after free bugs in get_initial_state970a6bb729
bpf: Support dual-stack sockets in bpf_tcp_check_syncookie6c17f4ef3c
spi: bcm-qspi: fix MSPI only access with bcm_qspi_exec_mem_op()8928239e5e
qede: confirm skb is allocated before usingb7893388bb
net: phy: mscc-miim: reject clause 45 register accesses08ff0e74fa
rxrpc: fix a race in rxrpc_exit_net()5ae05b5eb5
net: openvswitch: fix leak of nested actions42ab401d22
net: openvswitch: don't send internal clone attribute to the userspace.e54ea8fc51
ice: synchronize_rcu() when terminating ringse3dd1202ab
ipv6: Fix stats accounting in ip6_pkt_dropffce126c95
ice: Do not skip not enabled queues in ice_vc_dis_qs_msgb003fc4913
ice: Set txq_teid to ICE_INVAL_TEID on ring creationebd1e3458d
dpaa2-ptp: Fix refcount leak in dpaa2_ptp_probe43c2d7890e
IB/rdmavt: add lock to call to rvt_error_qp to prevent a race condition3a57babfb6
RDMA/mlx5: Don't remove cache MRs when a delay is neededd8992b393f
sfc: Do not free an empty page_ring0ac74169eb
bnxt_en: reserve space inside receive page for skb_shared_infof8b0ef0a58
drm/imx: Fix memory leak in imx_pd_connector_get_modes25bc9fd4c8
drm/imx: imx-ldb: Check for null pointer after calling kmemdup02ab4abe5b
net: stmmac: Fix unset max_speed difference between DT and non-DT platforms63ea57478a
net: ipv4: fix route with nexthop object delete warning4be6ed0310
ice: Clear default forwarding VSI during VSI release589154d0f1
net/tls: fix slab-out-of-bounds bug in decrypt_internalc5f77b5953
scsi: zorro7xx: Fix a resource leak in zorro7xx_remove_one()45b9932b4d
NFSv4: fix open failure with O_ACCMODE flagc688705a39
Revert "NFSv4: Handle the special Linux file open access mode"cf580d2e38
Drivers: hv: vmbus: Fix potential crash on module unload0c122eb3a1
drm/amdgpu: fix off by one in amdgpu_gfx_kiq_acquire()84e5dfc05f
Revert "hv: utils: add PTP_1588_CLOCK to Kconfig to fix build"3c3fbfa6dd
mm: fix race between MADV_FREE reclaim and blkdev direct IO read1753a49e26
parisc: Fix patch code locking and flushingf7c3522030
parisc: Fix CPU affinity for Lasi, WAX and Dino chipsc74e2f6ecc
NFS: Avoid writeback threads getting stuck in mempool_alloc()34681aeddc
NFS: nfsiod should not block forever in mempool_alloc()7a506fabcf
SUNRPC: Fix socket waits for write buffer spaceb9c5ac0a15
jfs: prevent NULL deref in diFreec69b442125
virtio_console: eliminate anonymous module_init & module_exit3309b32217
serial: samsung_tty: do not unlock port->lock for uart_write_wakeup()9cb90f9ad5
x86/Kconfig: Do not allow CONFIG_X86_X32_ABI=y with llvm-objcopyb3882e78aa
NFS: swap-out must always use STABLE writes.d4170a2821
NFS: swap IO handling is slightly different for O_DIRECT IO4b6f122bdf
SUNRPC: remove scheduling boost for "SWAPPER" tasks.f4fc47e71e
SUNRPC/xprt: async tasks mustn't block waiting for memoryf9244d31e0
SUNRPC/call_alloc: async tasks mustn't block waiting for memorye2b2542f74
clk: Enforce that disjoints limits are invalid1e9b5538cf
clk: ti: Preserve node in ti_dt_clocks_register()a2a0e04f64
xen: delay xen_hvm_init_time_ops() if kdump is boot on vcpu>=324a2544ce24
NFSv4: Protect the state recovery thread against direct reclaim9b9feec97c
NFSv4.2: fix reference count leaks in _nfs42_proc_copy_notify()2e16895d06
w1: w1_therm: fixes w1_seq for ds28ea00 sensors93498c6e77
staging: wfx: fix an error handling in wfx_init_common()8f1d24f85f
phy: amlogic: meson8b-usb2: Use dev_err_probe()aa0b729678
staging: vchiq_core: handle NULL result of find_service_by_handlebe4ecca958
clk: si5341: fix reported clk_rate when output divider is 2c9cf6baabf
minix: fix bug when opening a file with O_DIRECT8d9efd4434
init/main.c: return 1 from handled __setup() functionsf442978612
ceph: fix memory leak in ceph_readdir when note_last_dentry returns errord745512d54
netlabel: fix out-of-bounds memory accesses2cc803804e
Bluetooth: Fix use after free in hci_send_acl789621df19
MIPS: ingenic: correct unit node address61e25021e6
xtensa: fix DTC warning unit_address_formatf6b9550f53
usb: dwc3: omap: fix "unbalanced disables for smps10_out1" on omap5evma4dd3e9e5a
net: sfp: add 2500base-X quirk for Lantech SFP module278b652f0a
net: limit altnames to 64k total423e7107f6
net: account alternate interface name memory74c4d50255
can: isotp: set default value for N_As to 50 micro seconds1d7effe5ff
scsi: libfc: Fix use after free in fc_exch_abts_resp()02222bf4f0
powerpc/secvar: fix refcount leak in format_show()fd416c3f5a
MIPS: fix fortify panic when copying asm exception handlers7c657c0694
PCI: endpoint: Fix misused goto label79cfc0052f
bnxt_en: Eliminate unintended link toggle during FW reset9567d54e70
Bluetooth: use memset avoid memory leaksf9b183f133
Bluetooth: Fix not checking for valid hdev on bt_dev_{info,warn,err,dbg}647b35aaf4
tuntap: add sanity checks about msg_controllen in sendmsg797b4ea951
macvtap: advertise link netns via netlink142ae7d4f2
mips: ralink: fix a refcount leak in ill_acc_of_setup()f2565cb40e
net/smc: correct settings of RMB window update limit224903cc60
scsi: hisi_sas: Free irq vectors in order for v3 HWf49ffaa85d
scsi: aha152x: Fix aha152x_setup() __setup handler return value91ee8a14ef
mt76: mt7615: Fix assigning negative values to unsigned variabled83574666b
scsi: pm8001: Fix memory leak in pm8001_chip_fw_flash_update_req()a0bb65eadb
scsi: pm8001: Fix tag leaks on error2051044d79
scsi: pm8001: Fix task leak in pm8001_send_abort_all()3bd9a28798
scsi: pm8001: Fix pm8001_mpi_task_abort_resp()ef969095c4
scsi: pm8001: Fix pm80xx_pci_mem_copy() interfacefe4b6d5a0d
drm/amdkfd: make CRAT table missing message informational only2f2f017ea8
dm: requeue IO if mapping table not yet available71c8df33fd
dm ioctl: prevent potential spectre v1 gadgetf655b724b4
ipv4: Invalidate neighbour for broadcast address upon address additionbae03957e8
iwlwifi: mvm: Correctly set fragmented EBS9538563d31
power: supply: axp288-charger: Set Vhold to 4.4Vc66cc04043
PCI: pciehp: Add Qualcomm quirk for Command Completed erratumb1b27b0e8d
tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.b02a1a6502
PCI: endpoint: Fix alignment fault error in copy tests4820847e8b
usb: ehci: add pci device support for Aspeed platforms0b9cf0b599
iommu/arm-smmu-v3: fix event handling soft lockupe07e420a00
PCI: aardvark: Fix support for MSI interrupts6694b8643b
drm/amdgpu: Fix recursive locking warningea21eaea7f
powerpc: Set crashkernel offset to mid of RMA regionfb5ac62fbe
ipv6: make mc_forwarding atomic5baf92a2c4
libbpf: Fix build issue with llvm-readelf26a1e4739e
cfg80211: don't add non transmitted BSS to 6GHz scanned channels9a56e2b271
mt76: dma: initialize skip_unmap in mt76_dma_rx_fillb42b6d0ec3
power: supply: axp20x_battery: properly report current when dischargingde9505936c
scsi: bfa: Replace snprintf() with sysfs_emit()ed7db95920
scsi: mvsas: Replace snprintf() with sysfs_emit()995f517888
bpf: Make dst_port field in struct bpf_sock 16-bit wide339bd0b55e
ath11k: mhi: use mhi_sync_power_up()c6a815f5ab
ath11k: fix kernel panic during unload/load ath11k modulese4d2d72013
powerpc: dts: t104xrdb: fix phy type for FMAN 4/502e2ee8619
ptp: replace snprintf with sysfs_emit9ea17b9f1d
usb: gadget: tegra-xudc: Fix control endpoint's definitions07971b818e
usb: gadget: tegra-xudc: Do not program SPARAM927beb05aa
drm/amd/amdgpu/amdgpu_cs: fix refcount leak of a dma_fence obj85313d9bc7
drm/amd/display: Add signal type check when verify stream backends same9d7d83d039
ath5k: fix OOB in ath5k_eeprom_read_pcal_info_5111850c4351e8
drm: Add orientation quirk for GPD Win Maxa24479c5e9
KVM: x86/emulator: Emulate RDPID only if it is enabled in guest66b0fa6b22
KVM: x86/svm: Clear reserved bits written to PerfEvtSeln MSRs2e52a29470
rtc: wm8350: Handle error for wm8350_register_irq0777fe98a4
gfs2: gfs2_setattr_size error path fixf349d7f9ee
gfs2: Fix gfs2_release for non-writers regression3f53715fd5
gfs2: Check for active reservation in gfs2_release2dc49f58a2
ubifs: Rectify space amount budget for mkdir/tmpfile operations Update the .xml file with the following needed changes that came in from the -lts branch to handle ABI issues with LTS security fixes: Leaf changes summary: 3 artifacts changed Changed leaf types summary: 2 leaf types changed Removed/Changed/Added functions summary: 0 Removed, 1 Changed, 0 Added function Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 1 function with some sub-type change: [C] 'function int hex_to_bin(char)' at hexdump.c:53:1 has some sub-type changes: parameter 1 of type 'char' changed: type name changed from 'char' to 'unsigned char' type size hasn't changed 'struct gpio_chip at driver.h:362:1' changed (indirectly): type size hasn't changed there are data member changes: type 'struct gpio_irq_chip' of 'gpio_chip::irq' changed: type size hasn't changed there are data member changes: data member u64 android_kabi_reserved1 at offset 2304 (in bits) became anonymous data member 'union {bool initialized; struct {u64 android_kabi_reserved1;}; union {};}' 1265 impacted interfaces 1265 impacted interfaces 'struct gpio_irq_chip at driver.h:32:1' changed: details were reported earlier Change-Id: Iface7385c5d82fbcdaeb92fda79ac3cd1835d323 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2020 lines
56 KiB
C
2020 lines
56 KiB
C
/*
|
|
* mm/rmap.c - physical to virtual reverse mappings
|
|
*
|
|
* Copyright 2001, Rik van Riel <riel@conectiva.com.br>
|
|
* Released under the General Public License (GPL).
|
|
*
|
|
* Simple, low overhead reverse mapping scheme.
|
|
* Please try to keep this thing as modular as possible.
|
|
*
|
|
* Provides methods for unmapping each kind of mapped page:
|
|
* the anon methods track anonymous pages, and
|
|
* the file methods track pages belonging to an inode.
|
|
*
|
|
* Original design by Rik van Riel <riel@conectiva.com.br> 2001
|
|
* File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
|
|
* Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
|
|
* Contributions by Hugh Dickins 2003, 2004
|
|
*/
|
|
|
|
/*
|
|
* Lock ordering in mm:
|
|
*
|
|
* inode->i_mutex (while writing or truncating, not reading or faulting)
|
|
* mm->mmap_lock
|
|
* page->flags PG_locked (lock_page) * (see huegtlbfs below)
|
|
* hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
|
|
* mapping->i_mmap_rwsem
|
|
* hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
|
|
* anon_vma->rwsem
|
|
* mm->page_table_lock or pte_lock
|
|
* pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
|
|
* swap_lock (in swap_duplicate, swap_info_get)
|
|
* mmlist_lock (in mmput, drain_mmlist and others)
|
|
* mapping->private_lock (in __set_page_dirty_buffers)
|
|
* mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
|
|
* i_pages lock (widely used)
|
|
* inode->i_lock (in set_page_dirty's __mark_inode_dirty)
|
|
* bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
|
|
* sb_lock (within inode_lock in fs/fs-writeback.c)
|
|
* i_pages lock (widely used, in set_page_dirty,
|
|
* in arch-dependent flush_dcache_mmap_lock,
|
|
* within bdi.wb->list_lock in __sync_single_inode)
|
|
*
|
|
* anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
|
|
* ->tasklist_lock
|
|
* pte map lock
|
|
*
|
|
* * hugetlbfs PageHuge() pages take locks in this order:
|
|
* mapping->i_mmap_rwsem
|
|
* hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
|
|
* page->flags PG_locked (lock_page)
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/sched/task.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/swapops.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/init.h>
|
|
#include <linux/ksm.h>
|
|
#include <linux/rmap.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/export.h>
|
|
#include <linux/memcontrol.h>
|
|
#include <linux/mmu_notifier.h>
|
|
#include <linux/migrate.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/huge_mm.h>
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/page_idle.h>
|
|
#include <linux/memremap.h>
|
|
#include <linux/userfaultfd_k.h>
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include <trace/events/tlb.h>
|
|
|
|
#include <trace/hooks/mm.h>
|
|
|
|
#include "internal.h"
|
|
|
|
static struct kmem_cache *anon_vma_cachep;
|
|
static struct kmem_cache *anon_vma_chain_cachep;
|
|
|
|
static inline struct anon_vma *anon_vma_alloc(void)
|
|
{
|
|
struct anon_vma *anon_vma;
|
|
|
|
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
|
|
if (anon_vma) {
|
|
atomic_set(&anon_vma->refcount, 1);
|
|
anon_vma->degree = 1; /* Reference for first vma */
|
|
anon_vma->parent = anon_vma;
|
|
/*
|
|
* Initialise the anon_vma root to point to itself. If called
|
|
* from fork, the root will be reset to the parents anon_vma.
|
|
*/
|
|
anon_vma->root = anon_vma;
|
|
}
|
|
|
|
return anon_vma;
|
|
}
|
|
|
|
static inline void anon_vma_free(struct anon_vma *anon_vma)
|
|
{
|
|
VM_BUG_ON(atomic_read(&anon_vma->refcount));
|
|
|
|
/*
|
|
* Synchronize against page_lock_anon_vma_read() such that
|
|
* we can safely hold the lock without the anon_vma getting
|
|
* freed.
|
|
*
|
|
* Relies on the full mb implied by the atomic_dec_and_test() from
|
|
* put_anon_vma() against the acquire barrier implied by
|
|
* down_read_trylock() from page_lock_anon_vma_read(). This orders:
|
|
*
|
|
* page_lock_anon_vma_read() VS put_anon_vma()
|
|
* down_read_trylock() atomic_dec_and_test()
|
|
* LOCK MB
|
|
* atomic_read() rwsem_is_locked()
|
|
*
|
|
* LOCK should suffice since the actual taking of the lock must
|
|
* happen _before_ what follows.
|
|
*/
|
|
might_sleep();
|
|
if (rwsem_is_locked(&anon_vma->root->rwsem)) {
|
|
anon_vma_lock_write(anon_vma);
|
|
anon_vma_unlock_write(anon_vma);
|
|
}
|
|
|
|
kmem_cache_free(anon_vma_cachep, anon_vma);
|
|
}
|
|
|
|
static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
|
|
{
|
|
return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
|
|
}
|
|
|
|
static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
|
|
{
|
|
kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
|
|
}
|
|
|
|
static void anon_vma_chain_link(struct vm_area_struct *vma,
|
|
struct anon_vma_chain *avc,
|
|
struct anon_vma *anon_vma)
|
|
{
|
|
avc->vma = vma;
|
|
avc->anon_vma = anon_vma;
|
|
list_add(&avc->same_vma, &vma->anon_vma_chain);
|
|
anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
|
|
}
|
|
|
|
/**
|
|
* __anon_vma_prepare - attach an anon_vma to a memory region
|
|
* @vma: the memory region in question
|
|
*
|
|
* This makes sure the memory mapping described by 'vma' has
|
|
* an 'anon_vma' attached to it, so that we can associate the
|
|
* anonymous pages mapped into it with that anon_vma.
|
|
*
|
|
* The common case will be that we already have one, which
|
|
* is handled inline by anon_vma_prepare(). But if
|
|
* not we either need to find an adjacent mapping that we
|
|
* can re-use the anon_vma from (very common when the only
|
|
* reason for splitting a vma has been mprotect()), or we
|
|
* allocate a new one.
|
|
*
|
|
* Anon-vma allocations are very subtle, because we may have
|
|
* optimistically looked up an anon_vma in page_lock_anon_vma_read()
|
|
* and that may actually touch the spinlock even in the newly
|
|
* allocated vma (it depends on RCU to make sure that the
|
|
* anon_vma isn't actually destroyed).
|
|
*
|
|
* As a result, we need to do proper anon_vma locking even
|
|
* for the new allocation. At the same time, we do not want
|
|
* to do any locking for the common case of already having
|
|
* an anon_vma.
|
|
*
|
|
* This must be called with the mmap_lock held for reading.
|
|
*/
|
|
int __anon_vma_prepare(struct vm_area_struct *vma)
|
|
{
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
struct anon_vma *anon_vma, *allocated;
|
|
struct anon_vma_chain *avc;
|
|
|
|
might_sleep();
|
|
|
|
avc = anon_vma_chain_alloc(GFP_KERNEL);
|
|
if (!avc)
|
|
goto out_enomem;
|
|
|
|
anon_vma = find_mergeable_anon_vma(vma);
|
|
allocated = NULL;
|
|
if (!anon_vma) {
|
|
anon_vma = anon_vma_alloc();
|
|
if (unlikely(!anon_vma))
|
|
goto out_enomem_free_avc;
|
|
allocated = anon_vma;
|
|
}
|
|
|
|
anon_vma_lock_write(anon_vma);
|
|
/* page_table_lock to protect against threads */
|
|
spin_lock(&mm->page_table_lock);
|
|
if (likely(!vma->anon_vma)) {
|
|
vma->anon_vma = anon_vma;
|
|
anon_vma_chain_link(vma, avc, anon_vma);
|
|
/* vma reference or self-parent link for new root */
|
|
anon_vma->degree++;
|
|
allocated = NULL;
|
|
avc = NULL;
|
|
}
|
|
spin_unlock(&mm->page_table_lock);
|
|
anon_vma_unlock_write(anon_vma);
|
|
|
|
if (unlikely(allocated))
|
|
put_anon_vma(allocated);
|
|
if (unlikely(avc))
|
|
anon_vma_chain_free(avc);
|
|
|
|
return 0;
|
|
|
|
out_enomem_free_avc:
|
|
anon_vma_chain_free(avc);
|
|
out_enomem:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/*
|
|
* This is a useful helper function for locking the anon_vma root as
|
|
* we traverse the vma->anon_vma_chain, looping over anon_vma's that
|
|
* have the same vma.
|
|
*
|
|
* Such anon_vma's should have the same root, so you'd expect to see
|
|
* just a single mutex_lock for the whole traversal.
|
|
*/
|
|
static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
|
|
{
|
|
struct anon_vma *new_root = anon_vma->root;
|
|
if (new_root != root) {
|
|
if (WARN_ON_ONCE(root))
|
|
up_write(&root->rwsem);
|
|
root = new_root;
|
|
down_write(&root->rwsem);
|
|
}
|
|
return root;
|
|
}
|
|
|
|
static inline void unlock_anon_vma_root(struct anon_vma *root)
|
|
{
|
|
if (root)
|
|
up_write(&root->rwsem);
|
|
}
|
|
|
|
/*
|
|
* Attach the anon_vmas from src to dst.
|
|
* Returns 0 on success, -ENOMEM on failure.
|
|
*
|
|
* anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
|
|
* anon_vma_fork(). The first three want an exact copy of src, while the last
|
|
* one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
|
|
* endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
|
|
* we can identify this case by checking (!dst->anon_vma && src->anon_vma).
|
|
*
|
|
* If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
|
|
* and reuse existing anon_vma which has no vmas and only one child anon_vma.
|
|
* This prevents degradation of anon_vma hierarchy to endless linear chain in
|
|
* case of constantly forking task. On the other hand, an anon_vma with more
|
|
* than one child isn't reused even if there was no alive vma, thus rmap
|
|
* walker has a good chance of avoiding scanning the whole hierarchy when it
|
|
* searches where page is mapped.
|
|
*/
|
|
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
|
|
{
|
|
struct anon_vma_chain *avc, *pavc;
|
|
struct anon_vma *root = NULL;
|
|
|
|
list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
|
|
struct anon_vma *anon_vma;
|
|
|
|
avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
|
|
if (unlikely(!avc)) {
|
|
unlock_anon_vma_root(root);
|
|
root = NULL;
|
|
avc = anon_vma_chain_alloc(GFP_KERNEL);
|
|
if (!avc)
|
|
goto enomem_failure;
|
|
}
|
|
anon_vma = pavc->anon_vma;
|
|
root = lock_anon_vma_root(root, anon_vma);
|
|
anon_vma_chain_link(dst, avc, anon_vma);
|
|
|
|
/*
|
|
* Reuse existing anon_vma if its degree lower than two,
|
|
* that means it has no vma and only one anon_vma child.
|
|
*
|
|
* Do not chose parent anon_vma, otherwise first child
|
|
* will always reuse it. Root anon_vma is never reused:
|
|
* it has self-parent reference and at least one child.
|
|
*/
|
|
if (!dst->anon_vma && src->anon_vma &&
|
|
anon_vma != src->anon_vma && anon_vma->degree < 2)
|
|
dst->anon_vma = anon_vma;
|
|
}
|
|
if (dst->anon_vma)
|
|
dst->anon_vma->degree++;
|
|
unlock_anon_vma_root(root);
|
|
return 0;
|
|
|
|
enomem_failure:
|
|
/*
|
|
* dst->anon_vma is dropped here otherwise its degree can be incorrectly
|
|
* decremented in unlink_anon_vmas().
|
|
* We can safely do this because callers of anon_vma_clone() don't care
|
|
* about dst->anon_vma if anon_vma_clone() failed.
|
|
*/
|
|
dst->anon_vma = NULL;
|
|
unlink_anon_vmas(dst);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/*
|
|
* Attach vma to its own anon_vma, as well as to the anon_vmas that
|
|
* the corresponding VMA in the parent process is attached to.
|
|
* Returns 0 on success, non-zero on failure.
|
|
*/
|
|
int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
|
|
{
|
|
struct anon_vma_chain *avc;
|
|
struct anon_vma *anon_vma;
|
|
int error;
|
|
|
|
/* Don't bother if the parent process has no anon_vma here. */
|
|
if (!pvma->anon_vma)
|
|
return 0;
|
|
|
|
/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
|
|
vma->anon_vma = NULL;
|
|
|
|
/*
|
|
* First, attach the new VMA to the parent VMA's anon_vmas,
|
|
* so rmap can find non-COWed pages in child processes.
|
|
*/
|
|
error = anon_vma_clone(vma, pvma);
|
|
if (error)
|
|
return error;
|
|
|
|
/* An existing anon_vma has been reused, all done then. */
|
|
if (vma->anon_vma)
|
|
return 0;
|
|
|
|
/* Then add our own anon_vma. */
|
|
anon_vma = anon_vma_alloc();
|
|
if (!anon_vma)
|
|
goto out_error;
|
|
avc = anon_vma_chain_alloc(GFP_KERNEL);
|
|
if (!avc)
|
|
goto out_error_free_anon_vma;
|
|
|
|
/*
|
|
* The root anon_vma's spinlock is the lock actually used when we
|
|
* lock any of the anon_vmas in this anon_vma tree.
|
|
*/
|
|
anon_vma->root = pvma->anon_vma->root;
|
|
anon_vma->parent = pvma->anon_vma;
|
|
/*
|
|
* With refcounts, an anon_vma can stay around longer than the
|
|
* process it belongs to. The root anon_vma needs to be pinned until
|
|
* this anon_vma is freed, because the lock lives in the root.
|
|
*/
|
|
get_anon_vma(anon_vma->root);
|
|
/* Mark this anon_vma as the one where our new (COWed) pages go. */
|
|
vma->anon_vma = anon_vma;
|
|
anon_vma_lock_write(anon_vma);
|
|
anon_vma_chain_link(vma, avc, anon_vma);
|
|
anon_vma->parent->degree++;
|
|
anon_vma_unlock_write(anon_vma);
|
|
|
|
return 0;
|
|
|
|
out_error_free_anon_vma:
|
|
put_anon_vma(anon_vma);
|
|
out_error:
|
|
unlink_anon_vmas(vma);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
void unlink_anon_vmas(struct vm_area_struct *vma)
|
|
{
|
|
struct anon_vma_chain *avc, *next;
|
|
struct anon_vma *root = NULL;
|
|
|
|
/*
|
|
* Unlink each anon_vma chained to the VMA. This list is ordered
|
|
* from newest to oldest, ensuring the root anon_vma gets freed last.
|
|
*/
|
|
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
|
|
struct anon_vma *anon_vma = avc->anon_vma;
|
|
|
|
root = lock_anon_vma_root(root, anon_vma);
|
|
anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
|
|
|
|
/*
|
|
* Leave empty anon_vmas on the list - we'll need
|
|
* to free them outside the lock.
|
|
*/
|
|
if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
|
|
anon_vma->parent->degree--;
|
|
continue;
|
|
}
|
|
|
|
list_del(&avc->same_vma);
|
|
anon_vma_chain_free(avc);
|
|
}
|
|
if (vma->anon_vma)
|
|
vma->anon_vma->degree--;
|
|
unlock_anon_vma_root(root);
|
|
|
|
/*
|
|
* Iterate the list once more, it now only contains empty and unlinked
|
|
* anon_vmas, destroy them. Could not do before due to __put_anon_vma()
|
|
* needing to write-acquire the anon_vma->root->rwsem.
|
|
*/
|
|
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
|
|
struct anon_vma *anon_vma = avc->anon_vma;
|
|
|
|
VM_WARN_ON(anon_vma->degree);
|
|
put_anon_vma(anon_vma);
|
|
|
|
list_del(&avc->same_vma);
|
|
anon_vma_chain_free(avc);
|
|
}
|
|
}
|
|
|
|
static void anon_vma_ctor(void *data)
|
|
{
|
|
struct anon_vma *anon_vma = data;
|
|
|
|
init_rwsem(&anon_vma->rwsem);
|
|
atomic_set(&anon_vma->refcount, 0);
|
|
anon_vma->rb_root = RB_ROOT_CACHED;
|
|
}
|
|
|
|
void __init anon_vma_init(void)
|
|
{
|
|
anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
|
|
0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
|
|
anon_vma_ctor);
|
|
anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
|
|
SLAB_PANIC|SLAB_ACCOUNT);
|
|
}
|
|
|
|
/*
|
|
* Getting a lock on a stable anon_vma from a page off the LRU is tricky!
|
|
*
|
|
* Since there is no serialization what so ever against page_remove_rmap()
|
|
* the best this function can do is return a locked anon_vma that might
|
|
* have been relevant to this page.
|
|
*
|
|
* The page might have been remapped to a different anon_vma or the anon_vma
|
|
* returned may already be freed (and even reused).
|
|
*
|
|
* In case it was remapped to a different anon_vma, the new anon_vma will be a
|
|
* child of the old anon_vma, and the anon_vma lifetime rules will therefore
|
|
* ensure that any anon_vma obtained from the page will still be valid for as
|
|
* long as we observe page_mapped() [ hence all those page_mapped() tests ].
|
|
*
|
|
* All users of this function must be very careful when walking the anon_vma
|
|
* chain and verify that the page in question is indeed mapped in it
|
|
* [ something equivalent to page_mapped_in_vma() ].
|
|
*
|
|
* Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
|
|
* page_remove_rmap() that the anon_vma pointer from page->mapping is valid
|
|
* if there is a mapcount, we can dereference the anon_vma after observing
|
|
* those.
|
|
*/
|
|
struct anon_vma *page_get_anon_vma(struct page *page)
|
|
{
|
|
struct anon_vma *anon_vma = NULL;
|
|
unsigned long anon_mapping;
|
|
|
|
rcu_read_lock();
|
|
anon_mapping = (unsigned long)READ_ONCE(page->mapping);
|
|
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
|
|
goto out;
|
|
if (!page_mapped(page))
|
|
goto out;
|
|
|
|
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
|
|
if (!atomic_inc_not_zero(&anon_vma->refcount)) {
|
|
anon_vma = NULL;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* If this page is still mapped, then its anon_vma cannot have been
|
|
* freed. But if it has been unmapped, we have no security against the
|
|
* anon_vma structure being freed and reused (for another anon_vma:
|
|
* SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
|
|
* above cannot corrupt).
|
|
*/
|
|
if (!page_mapped(page)) {
|
|
rcu_read_unlock();
|
|
put_anon_vma(anon_vma);
|
|
return NULL;
|
|
}
|
|
out:
|
|
rcu_read_unlock();
|
|
|
|
return anon_vma;
|
|
}
|
|
|
|
/*
|
|
* Similar to page_get_anon_vma() except it locks the anon_vma.
|
|
*
|
|
* Its a little more complex as it tries to keep the fast path to a single
|
|
* atomic op -- the trylock. If we fail the trylock, we fall back to getting a
|
|
* reference like with page_get_anon_vma() and then block on the mutex.
|
|
*/
|
|
struct anon_vma *page_lock_anon_vma_read(struct page *page)
|
|
{
|
|
struct anon_vma *anon_vma = NULL;
|
|
struct anon_vma *root_anon_vma;
|
|
unsigned long anon_mapping;
|
|
|
|
rcu_read_lock();
|
|
anon_mapping = (unsigned long)READ_ONCE(page->mapping);
|
|
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
|
|
goto out;
|
|
if (!page_mapped(page))
|
|
goto out;
|
|
|
|
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
|
|
root_anon_vma = READ_ONCE(anon_vma->root);
|
|
if (down_read_trylock(&root_anon_vma->rwsem)) {
|
|
/*
|
|
* If the page is still mapped, then this anon_vma is still
|
|
* its anon_vma, and holding the mutex ensures that it will
|
|
* not go away, see anon_vma_free().
|
|
*/
|
|
if (!page_mapped(page)) {
|
|
up_read(&root_anon_vma->rwsem);
|
|
anon_vma = NULL;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
/* trylock failed, we got to sleep */
|
|
if (!atomic_inc_not_zero(&anon_vma->refcount)) {
|
|
anon_vma = NULL;
|
|
goto out;
|
|
}
|
|
|
|
if (!page_mapped(page)) {
|
|
rcu_read_unlock();
|
|
put_anon_vma(anon_vma);
|
|
return NULL;
|
|
}
|
|
|
|
/* we pinned the anon_vma, its safe to sleep */
|
|
rcu_read_unlock();
|
|
anon_vma_lock_read(anon_vma);
|
|
|
|
if (atomic_dec_and_test(&anon_vma->refcount)) {
|
|
/*
|
|
* Oops, we held the last refcount, release the lock
|
|
* and bail -- can't simply use put_anon_vma() because
|
|
* we'll deadlock on the anon_vma_lock_write() recursion.
|
|
*/
|
|
anon_vma_unlock_read(anon_vma);
|
|
__put_anon_vma(anon_vma);
|
|
anon_vma = NULL;
|
|
}
|
|
|
|
return anon_vma;
|
|
|
|
out:
|
|
rcu_read_unlock();
|
|
return anon_vma;
|
|
}
|
|
|
|
void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
|
|
{
|
|
anon_vma_unlock_read(anon_vma);
|
|
}
|
|
|
|
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
|
/*
|
|
* Flush TLB entries for recently unmapped pages from remote CPUs. It is
|
|
* important if a PTE was dirty when it was unmapped that it's flushed
|
|
* before any IO is initiated on the page to prevent lost writes. Similarly,
|
|
* it must be flushed before freeing to prevent data leakage.
|
|
*/
|
|
void try_to_unmap_flush(void)
|
|
{
|
|
struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
|
|
|
|
if (!tlb_ubc->flush_required)
|
|
return;
|
|
|
|
arch_tlbbatch_flush(&tlb_ubc->arch);
|
|
tlb_ubc->flush_required = false;
|
|
tlb_ubc->writable = false;
|
|
}
|
|
|
|
/* Flush iff there are potentially writable TLB entries that can race with IO */
|
|
void try_to_unmap_flush_dirty(void)
|
|
{
|
|
struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
|
|
|
|
if (tlb_ubc->writable)
|
|
try_to_unmap_flush();
|
|
}
|
|
|
|
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
|
|
{
|
|
struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
|
|
|
|
arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
|
|
tlb_ubc->flush_required = true;
|
|
|
|
/*
|
|
* Ensure compiler does not re-order the setting of tlb_flush_batched
|
|
* before the PTE is cleared.
|
|
*/
|
|
barrier();
|
|
mm->tlb_flush_batched = true;
|
|
|
|
/*
|
|
* If the PTE was dirty then it's best to assume it's writable. The
|
|
* caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
|
|
* before the page is queued for IO.
|
|
*/
|
|
if (writable)
|
|
tlb_ubc->writable = true;
|
|
}
|
|
|
|
/*
|
|
* Returns true if the TLB flush should be deferred to the end of a batch of
|
|
* unmap operations to reduce IPIs.
|
|
*/
|
|
static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
|
|
{
|
|
bool should_defer = false;
|
|
|
|
if (!(flags & TTU_BATCH_FLUSH))
|
|
return false;
|
|
|
|
/* If remote CPUs need to be flushed then defer batch the flush */
|
|
if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
|
|
should_defer = true;
|
|
put_cpu();
|
|
|
|
return should_defer;
|
|
}
|
|
|
|
/*
|
|
* Reclaim unmaps pages under the PTL but do not flush the TLB prior to
|
|
* releasing the PTL if TLB flushes are batched. It's possible for a parallel
|
|
* operation such as mprotect or munmap to race between reclaim unmapping
|
|
* the page and flushing the page. If this race occurs, it potentially allows
|
|
* access to data via a stale TLB entry. Tracking all mm's that have TLB
|
|
* batching in flight would be expensive during reclaim so instead track
|
|
* whether TLB batching occurred in the past and if so then do a flush here
|
|
* if required. This will cost one additional flush per reclaim cycle paid
|
|
* by the first operation at risk such as mprotect and mumap.
|
|
*
|
|
* This must be called under the PTL so that an access to tlb_flush_batched
|
|
* that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
|
|
* via the PTL.
|
|
*/
|
|
void flush_tlb_batched_pending(struct mm_struct *mm)
|
|
{
|
|
if (data_race(mm->tlb_flush_batched)) {
|
|
flush_tlb_mm(mm);
|
|
|
|
/*
|
|
* Do not allow the compiler to re-order the clearing of
|
|
* tlb_flush_batched before the tlb is flushed.
|
|
*/
|
|
barrier();
|
|
mm->tlb_flush_batched = false;
|
|
}
|
|
}
|
|
#else
|
|
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
|
|
{
|
|
}
|
|
|
|
static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
|
|
|
|
/*
|
|
* At what user virtual address is page expected in vma?
|
|
* Caller should check the page is actually part of the vma.
|
|
*/
|
|
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
|
|
{
|
|
if (PageAnon(page)) {
|
|
struct anon_vma *page__anon_vma = page_anon_vma(page);
|
|
/*
|
|
* Note: swapoff's unuse_vma() is more efficient with this
|
|
* check, and needs it to match anon_vma when KSM is active.
|
|
*/
|
|
if (!vma->anon_vma || !page__anon_vma ||
|
|
vma->anon_vma->root != page__anon_vma->root)
|
|
return -EFAULT;
|
|
} else if (!vma->vm_file) {
|
|
return -EFAULT;
|
|
} else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
|
|
return -EFAULT;
|
|
}
|
|
|
|
return vma_address(page, vma);
|
|
}
|
|
|
|
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
|
|
{
|
|
pgd_t *pgd;
|
|
p4d_t *p4d;
|
|
pud_t *pud;
|
|
pmd_t *pmd = NULL;
|
|
pmd_t pmde;
|
|
|
|
pgd = pgd_offset(mm, address);
|
|
if (!pgd_present(*pgd))
|
|
goto out;
|
|
|
|
p4d = p4d_offset(pgd, address);
|
|
if (!p4d_present(*p4d))
|
|
goto out;
|
|
|
|
pud = pud_offset(p4d, address);
|
|
if (!pud_present(*pud))
|
|
goto out;
|
|
|
|
pmd = pmd_offset(pud, address);
|
|
/*
|
|
* Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
|
|
* without holding anon_vma lock for write. So when looking for a
|
|
* genuine pmde (in which to find pte), test present and !THP together.
|
|
*/
|
|
pmde = *pmd;
|
|
barrier();
|
|
if (!pmd_present(pmde) || pmd_trans_huge(pmde))
|
|
pmd = NULL;
|
|
out:
|
|
return pmd;
|
|
}
|
|
|
|
struct page_referenced_arg {
|
|
int mapcount;
|
|
int referenced;
|
|
unsigned long vm_flags;
|
|
struct mem_cgroup *memcg;
|
|
};
|
|
/*
|
|
* arg: page_referenced_arg will be passed
|
|
*/
|
|
static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
|
|
unsigned long address, void *arg)
|
|
{
|
|
struct page_referenced_arg *pra = arg;
|
|
struct page_vma_mapped_walk pvmw = {
|
|
.page = page,
|
|
.vma = vma,
|
|
.address = address,
|
|
};
|
|
int referenced = 0;
|
|
|
|
while (page_vma_mapped_walk(&pvmw)) {
|
|
address = pvmw.address;
|
|
|
|
if (vma->vm_flags & VM_LOCKED) {
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
pra->vm_flags |= VM_LOCKED;
|
|
return false; /* To break the loop */
|
|
}
|
|
|
|
if (pvmw.pte) {
|
|
if (ptep_clear_flush_young_notify(vma, address,
|
|
pvmw.pte)) {
|
|
/*
|
|
* Don't treat a reference through
|
|
* a sequentially read mapping as such.
|
|
* If the page has been used in another mapping,
|
|
* we will catch it; if this other mapping is
|
|
* already gone, the unmap path will have set
|
|
* PG_referenced or activated the page.
|
|
*/
|
|
if (likely(!(vma->vm_flags & VM_SEQ_READ)))
|
|
referenced++;
|
|
}
|
|
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
|
|
if (pmdp_clear_flush_young_notify(vma, address,
|
|
pvmw.pmd))
|
|
referenced++;
|
|
} else {
|
|
/* unexpected pmd-mapped page? */
|
|
WARN_ON_ONCE(1);
|
|
}
|
|
|
|
pra->mapcount--;
|
|
}
|
|
|
|
if (referenced)
|
|
clear_page_idle(page);
|
|
if (test_and_clear_page_young(page))
|
|
referenced++;
|
|
|
|
if (referenced) {
|
|
pra->referenced++;
|
|
pra->vm_flags |= vma->vm_flags;
|
|
}
|
|
|
|
trace_android_vh_page_referenced_one_end(vma, page, referenced);
|
|
if (!pra->mapcount)
|
|
return false; /* To break the loop */
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
|
|
{
|
|
struct page_referenced_arg *pra = arg;
|
|
struct mem_cgroup *memcg = pra->memcg;
|
|
|
|
if (!mm_match_cgroup(vma->vm_mm, memcg))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* page_referenced - test if the page was referenced
|
|
* @page: the page to test
|
|
* @is_locked: caller holds lock on the page
|
|
* @memcg: target memory cgroup
|
|
* @vm_flags: collect encountered vma->vm_flags who actually referenced the page
|
|
*
|
|
* Quick test_and_clear_referenced for all mappings to a page,
|
|
* returns the number of ptes which referenced the page.
|
|
*/
|
|
int page_referenced(struct page *page,
|
|
int is_locked,
|
|
struct mem_cgroup *memcg,
|
|
unsigned long *vm_flags)
|
|
{
|
|
int we_locked = 0;
|
|
struct page_referenced_arg pra = {
|
|
.mapcount = total_mapcount(page),
|
|
.memcg = memcg,
|
|
};
|
|
struct rmap_walk_control rwc = {
|
|
.rmap_one = page_referenced_one,
|
|
.arg = (void *)&pra,
|
|
.anon_lock = page_lock_anon_vma_read,
|
|
};
|
|
|
|
*vm_flags = 0;
|
|
if (!pra.mapcount)
|
|
return 0;
|
|
|
|
if (!page_rmapping(page))
|
|
return 0;
|
|
|
|
if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
|
|
we_locked = trylock_page(page);
|
|
if (!we_locked)
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* If we are reclaiming on behalf of a cgroup, skip
|
|
* counting on behalf of references from different
|
|
* cgroups
|
|
*/
|
|
if (memcg) {
|
|
rwc.invalid_vma = invalid_page_referenced_vma;
|
|
}
|
|
|
|
rmap_walk(page, &rwc);
|
|
*vm_flags = pra.vm_flags;
|
|
|
|
if (we_locked)
|
|
unlock_page(page);
|
|
|
|
return pra.referenced;
|
|
}
|
|
|
|
static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
|
|
unsigned long address, void *arg)
|
|
{
|
|
struct page_vma_mapped_walk pvmw = {
|
|
.page = page,
|
|
.vma = vma,
|
|
.address = address,
|
|
.flags = PVMW_SYNC,
|
|
};
|
|
struct mmu_notifier_range range;
|
|
int *cleaned = arg;
|
|
|
|
/*
|
|
* We have to assume the worse case ie pmd for invalidation. Note that
|
|
* the page can not be free from this function.
|
|
*/
|
|
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
|
|
0, vma, vma->vm_mm, address,
|
|
vma_address_end(page, vma));
|
|
mmu_notifier_invalidate_range_start(&range);
|
|
|
|
while (page_vma_mapped_walk(&pvmw)) {
|
|
int ret = 0;
|
|
|
|
address = pvmw.address;
|
|
if (pvmw.pte) {
|
|
pte_t entry;
|
|
pte_t *pte = pvmw.pte;
|
|
|
|
if (!pte_dirty(*pte) && !pte_write(*pte))
|
|
continue;
|
|
|
|
flush_cache_page(vma, address, pte_pfn(*pte));
|
|
entry = ptep_clear_flush(vma, address, pte);
|
|
entry = pte_wrprotect(entry);
|
|
entry = pte_mkclean(entry);
|
|
set_pte_at(vma->vm_mm, address, pte, entry);
|
|
ret = 1;
|
|
} else {
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
pmd_t *pmd = pvmw.pmd;
|
|
pmd_t entry;
|
|
|
|
if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
|
|
continue;
|
|
|
|
flush_cache_page(vma, address, page_to_pfn(page));
|
|
entry = pmdp_invalidate(vma, address, pmd);
|
|
entry = pmd_wrprotect(entry);
|
|
entry = pmd_mkclean(entry);
|
|
set_pmd_at(vma->vm_mm, address, pmd, entry);
|
|
ret = 1;
|
|
#else
|
|
/* unexpected pmd-mapped page? */
|
|
WARN_ON_ONCE(1);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* No need to call mmu_notifier_invalidate_range() as we are
|
|
* downgrading page table protection not changing it to point
|
|
* to a new page.
|
|
*
|
|
* See Documentation/vm/mmu_notifier.rst
|
|
*/
|
|
if (ret)
|
|
(*cleaned)++;
|
|
}
|
|
|
|
mmu_notifier_invalidate_range_end(&range);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
|
|
{
|
|
if (vma->vm_flags & VM_SHARED)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
int page_mkclean(struct page *page)
|
|
{
|
|
int cleaned = 0;
|
|
struct address_space *mapping;
|
|
struct rmap_walk_control rwc = {
|
|
.arg = (void *)&cleaned,
|
|
.rmap_one = page_mkclean_one,
|
|
.invalid_vma = invalid_mkclean_vma,
|
|
};
|
|
|
|
BUG_ON(!PageLocked(page));
|
|
|
|
if (!page_mapped(page))
|
|
return 0;
|
|
|
|
mapping = page_mapping(page);
|
|
if (!mapping)
|
|
return 0;
|
|
|
|
rmap_walk(page, &rwc);
|
|
|
|
return cleaned;
|
|
}
|
|
EXPORT_SYMBOL_GPL(page_mkclean);
|
|
|
|
/**
|
|
* page_move_anon_rmap - move a page to our anon_vma
|
|
* @page: the page to move to our anon_vma
|
|
* @vma: the vma the page belongs to
|
|
*
|
|
* When a page belongs exclusively to one process after a COW event,
|
|
* that page can be moved into the anon_vma that belongs to just that
|
|
* process, so the rmap code will not search the parent or sibling
|
|
* processes.
|
|
*/
|
|
void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
|
|
{
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
|
|
|
page = compound_head(page);
|
|
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
VM_BUG_ON_VMA(!anon_vma, vma);
|
|
|
|
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
|
|
/*
|
|
* Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
|
|
* simultaneously, so a concurrent reader (eg page_referenced()'s
|
|
* PageAnon()) will not see one without the other.
|
|
*/
|
|
WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
|
|
}
|
|
|
|
/**
|
|
* __page_set_anon_rmap - set up new anonymous rmap
|
|
* @page: Page or Hugepage to add to rmap
|
|
* @vma: VM area to add page to.
|
|
* @address: User virtual address of the mapping
|
|
* @exclusive: the page is exclusively owned by the current process
|
|
*/
|
|
static void __page_set_anon_rmap(struct page *page,
|
|
struct vm_area_struct *vma, unsigned long address, int exclusive)
|
|
{
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
|
|
|
BUG_ON(!anon_vma);
|
|
|
|
if (PageAnon(page))
|
|
return;
|
|
|
|
/*
|
|
* If the page isn't exclusively mapped into this vma,
|
|
* we must use the _oldest_ possible anon_vma for the
|
|
* page mapping!
|
|
*/
|
|
if (!exclusive)
|
|
anon_vma = anon_vma->root;
|
|
|
|
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
|
|
page->mapping = (struct address_space *) anon_vma;
|
|
page->index = linear_page_index(vma, address);
|
|
}
|
|
|
|
/**
|
|
* __page_check_anon_rmap - sanity check anonymous rmap addition
|
|
* @page: the page to add the mapping to
|
|
* @vma: the vm area in which the mapping is added
|
|
* @address: the user virtual address mapped
|
|
*/
|
|
static void __page_check_anon_rmap(struct page *page,
|
|
struct vm_area_struct *vma, unsigned long address)
|
|
{
|
|
/*
|
|
* The page's anon-rmap details (mapping and index) are guaranteed to
|
|
* be set up correctly at this point.
|
|
*
|
|
* We have exclusion against page_add_anon_rmap because the caller
|
|
* always holds the page locked, except if called from page_dup_rmap,
|
|
* in which case the page is already known to be setup.
|
|
*
|
|
* We have exclusion against page_add_new_anon_rmap because those pages
|
|
* are initially only visible via the pagetables, and the pte is locked
|
|
* over the call to page_add_new_anon_rmap.
|
|
*/
|
|
VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
|
|
VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
|
|
page);
|
|
}
|
|
|
|
/**
|
|
* page_add_anon_rmap - add pte mapping to an anonymous page
|
|
* @page: the page to add the mapping to
|
|
* @vma: the vm area in which the mapping is added
|
|
* @address: the user virtual address mapped
|
|
* @compound: charge the page as compound or small page
|
|
*
|
|
* The caller needs to hold the pte lock, and the page must be locked in
|
|
* the anon_vma case: to serialize mapping,index checking after setting,
|
|
* and to ensure that PageAnon is not being upgraded racily to PageKsm
|
|
* (but PageKsm is never downgraded to PageAnon).
|
|
*/
|
|
void page_add_anon_rmap(struct page *page,
|
|
struct vm_area_struct *vma, unsigned long address, bool compound)
|
|
{
|
|
do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
|
|
}
|
|
|
|
/*
|
|
* Special version of the above for do_swap_page, which often runs
|
|
* into pages that are exclusively owned by the current process.
|
|
* Everybody else should continue to use page_add_anon_rmap above.
|
|
*/
|
|
void do_page_add_anon_rmap(struct page *page,
|
|
struct vm_area_struct *vma, unsigned long address, int flags)
|
|
{
|
|
bool compound = flags & RMAP_COMPOUND;
|
|
bool first;
|
|
|
|
if (unlikely(PageKsm(page)))
|
|
lock_page_memcg(page);
|
|
else
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
|
|
if (compound) {
|
|
atomic_t *mapcount;
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
|
mapcount = compound_mapcount_ptr(page);
|
|
first = atomic_inc_and_test(mapcount);
|
|
} else {
|
|
first = atomic_inc_and_test(&page->_mapcount);
|
|
}
|
|
|
|
if (first) {
|
|
int nr = compound ? thp_nr_pages(page) : 1;
|
|
/*
|
|
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
|
|
* these counters are not modified in interrupt context, and
|
|
* pte lock(a spinlock) is held, which implies preemption
|
|
* disabled.
|
|
*/
|
|
if (compound)
|
|
__inc_lruvec_page_state(page, NR_ANON_THPS);
|
|
__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
|
|
}
|
|
|
|
if (unlikely(PageKsm(page))) {
|
|
unlock_page_memcg(page);
|
|
return;
|
|
}
|
|
|
|
/* address might be in next vma when migration races vma_adjust */
|
|
if (first)
|
|
__page_set_anon_rmap(page, vma, address,
|
|
flags & RMAP_EXCLUSIVE);
|
|
else
|
|
__page_check_anon_rmap(page, vma, address);
|
|
}
|
|
|
|
/**
|
|
* __page_add_new_anon_rmap - add pte mapping to a new anonymous page
|
|
* @page: the page to add the mapping to
|
|
* @vma: the vm area in which the mapping is added
|
|
* @address: the user virtual address mapped
|
|
* @compound: charge the page as compound or small page
|
|
*
|
|
* Same as page_add_anon_rmap but must only be called on *new* pages.
|
|
* This means the inc-and-test can be bypassed.
|
|
* Page does not have to be locked.
|
|
*/
|
|
void __page_add_new_anon_rmap(struct page *page,
|
|
struct vm_area_struct *vma, unsigned long address, bool compound)
|
|
{
|
|
int nr = compound ? thp_nr_pages(page) : 1;
|
|
|
|
__SetPageSwapBacked(page);
|
|
if (compound) {
|
|
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
|
/* increment count (starts at -1) */
|
|
atomic_set(compound_mapcount_ptr(page), 0);
|
|
if (hpage_pincount_available(page))
|
|
atomic_set(compound_pincount_ptr(page), 0);
|
|
|
|
__inc_lruvec_page_state(page, NR_ANON_THPS);
|
|
} else {
|
|
/* Anon THP always mapped first with PMD */
|
|
VM_BUG_ON_PAGE(PageTransCompound(page), page);
|
|
/* increment count (starts at -1) */
|
|
atomic_set(&page->_mapcount, 0);
|
|
}
|
|
__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
|
|
__page_set_anon_rmap(page, vma, address, 1);
|
|
}
|
|
|
|
/**
|
|
* page_add_file_rmap - add pte mapping to a file page
|
|
* @page: the page to add the mapping to
|
|
* @compound: charge the page as compound or small page
|
|
*
|
|
* The caller needs to hold the pte lock.
|
|
*/
|
|
void page_add_file_rmap(struct page *page, bool compound)
|
|
{
|
|
int i, nr = 1;
|
|
|
|
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
|
|
lock_page_memcg(page);
|
|
if (compound && PageTransHuge(page)) {
|
|
for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
|
|
if (atomic_inc_and_test(&page[i]._mapcount))
|
|
nr++;
|
|
}
|
|
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
|
|
goto out;
|
|
if (PageSwapBacked(page))
|
|
__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
|
|
else
|
|
__inc_node_page_state(page, NR_FILE_PMDMAPPED);
|
|
} else {
|
|
if (PageTransCompound(page) && page_mapping(page)) {
|
|
VM_WARN_ON_ONCE(!PageLocked(page));
|
|
|
|
SetPageDoubleMap(compound_head(page));
|
|
if (PageMlocked(page))
|
|
clear_page_mlock(compound_head(page));
|
|
}
|
|
if (!atomic_inc_and_test(&page->_mapcount))
|
|
goto out;
|
|
}
|
|
__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
|
|
out:
|
|
unlock_page_memcg(page);
|
|
}
|
|
|
|
static void page_remove_file_rmap(struct page *page, bool compound)
|
|
{
|
|
int i, nr = 1;
|
|
|
|
VM_BUG_ON_PAGE(compound && !PageHead(page), page);
|
|
|
|
/* Hugepages are not counted in NR_FILE_MAPPED for now. */
|
|
if (unlikely(PageHuge(page))) {
|
|
/* hugetlb pages are always mapped with pmds */
|
|
atomic_dec(compound_mapcount_ptr(page));
|
|
return;
|
|
}
|
|
|
|
/* page still mapped by someone else? */
|
|
if (compound && PageTransHuge(page)) {
|
|
for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
|
|
if (atomic_add_negative(-1, &page[i]._mapcount))
|
|
nr++;
|
|
}
|
|
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
|
|
return;
|
|
if (PageSwapBacked(page))
|
|
__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
|
|
else
|
|
__dec_node_page_state(page, NR_FILE_PMDMAPPED);
|
|
} else {
|
|
if (!atomic_add_negative(-1, &page->_mapcount))
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* We use the irq-unsafe __{inc|mod}_lruvec_page_state because
|
|
* these counters are not modified in interrupt context, and
|
|
* pte lock(a spinlock) is held, which implies preemption disabled.
|
|
*/
|
|
__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
|
|
|
|
if (unlikely(PageMlocked(page)))
|
|
clear_page_mlock(page);
|
|
}
|
|
|
|
static void page_remove_anon_compound_rmap(struct page *page)
|
|
{
|
|
int i, nr;
|
|
|
|
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
|
|
return;
|
|
|
|
/* Hugepages are not counted in NR_ANON_PAGES for now. */
|
|
if (unlikely(PageHuge(page)))
|
|
return;
|
|
|
|
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
|
|
return;
|
|
|
|
__dec_lruvec_page_state(page, NR_ANON_THPS);
|
|
|
|
if (TestClearPageDoubleMap(page)) {
|
|
/*
|
|
* Subpages can be mapped with PTEs too. Check how many of
|
|
* them are still mapped.
|
|
*/
|
|
for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
|
|
if (atomic_add_negative(-1, &page[i]._mapcount))
|
|
nr++;
|
|
}
|
|
|
|
/*
|
|
* Queue the page for deferred split if at least one small
|
|
* page of the compound page is unmapped, but at least one
|
|
* small page is still mapped.
|
|
*/
|
|
if (nr && nr < thp_nr_pages(page))
|
|
deferred_split_huge_page(page);
|
|
} else {
|
|
nr = thp_nr_pages(page);
|
|
}
|
|
|
|
if (unlikely(PageMlocked(page)))
|
|
clear_page_mlock(page);
|
|
|
|
if (nr)
|
|
__mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
|
|
}
|
|
|
|
/**
|
|
* page_remove_rmap - take down pte mapping from a page
|
|
* @page: page to remove mapping from
|
|
* @compound: uncharge the page as compound or small page
|
|
*
|
|
* The caller needs to hold the pte lock.
|
|
*/
|
|
void page_remove_rmap(struct page *page, bool compound)
|
|
{
|
|
lock_page_memcg(page);
|
|
|
|
if (!PageAnon(page)) {
|
|
page_remove_file_rmap(page, compound);
|
|
goto out;
|
|
}
|
|
|
|
if (compound) {
|
|
page_remove_anon_compound_rmap(page);
|
|
goto out;
|
|
}
|
|
|
|
/* page still mapped by someone else? */
|
|
if (!atomic_add_negative(-1, &page->_mapcount))
|
|
goto out;
|
|
|
|
/*
|
|
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
|
|
* these counters are not modified in interrupt context, and
|
|
* pte lock(a spinlock) is held, which implies preemption disabled.
|
|
*/
|
|
__dec_lruvec_page_state(page, NR_ANON_MAPPED);
|
|
|
|
if (unlikely(PageMlocked(page)))
|
|
clear_page_mlock(page);
|
|
|
|
if (PageTransCompound(page))
|
|
deferred_split_huge_page(compound_head(page));
|
|
|
|
/*
|
|
* It would be tidy to reset the PageAnon mapping here,
|
|
* but that might overwrite a racing page_add_anon_rmap
|
|
* which increments mapcount after us but sets mapping
|
|
* before us: so leave the reset to free_unref_page,
|
|
* and remember that it's only reliable while mapped.
|
|
* Leaving it set also helps swapoff to reinstate ptes
|
|
* faster for those pages still in swapcache.
|
|
*/
|
|
out:
|
|
unlock_page_memcg(page);
|
|
}
|
|
|
|
/*
|
|
* @arg: enum ttu_flags will be passed to this argument
|
|
*/
|
|
static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
|
unsigned long address, void *arg)
|
|
{
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
struct page_vma_mapped_walk pvmw = {
|
|
.page = page,
|
|
.vma = vma,
|
|
.address = address,
|
|
};
|
|
pte_t pteval;
|
|
struct page *subpage;
|
|
bool ret = true;
|
|
struct mmu_notifier_range range;
|
|
enum ttu_flags flags = (enum ttu_flags)(long)arg;
|
|
|
|
/*
|
|
* When racing against e.g. zap_pte_range() on another cpu,
|
|
* in between its ptep_get_and_clear_full() and page_remove_rmap(),
|
|
* try_to_unmap() may return false when it is about to become true,
|
|
* if page table locking is skipped: use TTU_SYNC to wait for that.
|
|
*/
|
|
if (flags & TTU_SYNC)
|
|
pvmw.flags = PVMW_SYNC;
|
|
|
|
/* munlock has nothing to gain from examining un-locked vmas */
|
|
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
|
|
return true;
|
|
|
|
if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
|
|
is_zone_device_page(page) && !is_device_private_page(page))
|
|
return true;
|
|
|
|
if (flags & TTU_SPLIT_HUGE_PMD) {
|
|
split_huge_pmd_address(vma, address,
|
|
flags & TTU_SPLIT_FREEZE, page);
|
|
}
|
|
|
|
/*
|
|
* For THP, we have to assume the worse case ie pmd for invalidation.
|
|
* For hugetlb, it could be much worse if we need to do pud
|
|
* invalidation in the case of pmd sharing.
|
|
*
|
|
* Note that the page can not be free in this function as call of
|
|
* try_to_unmap() must hold a reference on the page.
|
|
*/
|
|
range.end = PageKsm(page) ?
|
|
address + PAGE_SIZE : vma_address_end(page, vma);
|
|
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
|
|
address, range.end);
|
|
if (PageHuge(page)) {
|
|
/*
|
|
* If sharing is possible, start and end will be adjusted
|
|
* accordingly.
|
|
*/
|
|
adjust_range_if_pmd_sharing_possible(vma, &range.start,
|
|
&range.end);
|
|
}
|
|
mmu_notifier_invalidate_range_start(&range);
|
|
|
|
while (page_vma_mapped_walk(&pvmw)) {
|
|
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
|
|
/* PMD-mapped THP migration entry */
|
|
if (!pvmw.pte && (flags & TTU_MIGRATION)) {
|
|
VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
|
|
|
|
set_pmd_migration_entry(&pvmw, page);
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* If the page is mlock()d, we cannot swap it out.
|
|
* If it's recently referenced (perhaps page_referenced
|
|
* skipped over this mm) then we should reactivate it.
|
|
*/
|
|
if (!(flags & TTU_IGNORE_MLOCK)) {
|
|
if (vma->vm_flags & VM_LOCKED) {
|
|
/* PTE-mapped THP are never mlocked */
|
|
if (!PageTransCompound(page)) {
|
|
/*
|
|
* Holding pte lock, we do *not* need
|
|
* mmap_lock here
|
|
*/
|
|
mlock_vma_page(page);
|
|
}
|
|
ret = false;
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
break;
|
|
}
|
|
if (flags & TTU_MUNLOCK)
|
|
continue;
|
|
}
|
|
|
|
/* Unexpected PMD-mapped THP? */
|
|
VM_BUG_ON_PAGE(!pvmw.pte, page);
|
|
|
|
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
|
|
address = pvmw.address;
|
|
|
|
if (PageHuge(page) && !PageAnon(page)) {
|
|
/*
|
|
* To call huge_pmd_unshare, i_mmap_rwsem must be
|
|
* held in write mode. Caller needs to explicitly
|
|
* do this outside rmap routines.
|
|
*/
|
|
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
|
|
if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
|
|
/*
|
|
* huge_pmd_unshare unmapped an entire PMD
|
|
* page. There is no way of knowing exactly
|
|
* which PMDs may be cached for this mm, so
|
|
* we must flush them all. start/end were
|
|
* already adjusted above to cover this range.
|
|
*/
|
|
flush_cache_range(vma, range.start, range.end);
|
|
flush_tlb_range(vma, range.start, range.end);
|
|
mmu_notifier_invalidate_range(mm, range.start,
|
|
range.end);
|
|
|
|
/*
|
|
* The ref count of the PMD page was dropped
|
|
* which is part of the way map counting
|
|
* is done for shared PMDs. Return 'true'
|
|
* here. When there is no other sharing,
|
|
* huge_pmd_unshare returns false and we will
|
|
* unmap the actual page and drop map count
|
|
* to zero.
|
|
*/
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (IS_ENABLED(CONFIG_MIGRATION) &&
|
|
(flags & TTU_MIGRATION) &&
|
|
is_zone_device_page(page)) {
|
|
swp_entry_t entry;
|
|
pte_t swp_pte;
|
|
|
|
pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
|
|
|
|
/*
|
|
* Store the pfn of the page in a special migration
|
|
* pte. do_swap_page() will wait until the migration
|
|
* pte is removed and then restart fault handling.
|
|
*/
|
|
entry = make_migration_entry(page, 0);
|
|
swp_pte = swp_entry_to_pte(entry);
|
|
|
|
/*
|
|
* pteval maps a zone device page and is therefore
|
|
* a swap pte.
|
|
*/
|
|
if (pte_swp_soft_dirty(pteval))
|
|
swp_pte = pte_swp_mksoft_dirty(swp_pte);
|
|
if (pte_swp_uffd_wp(pteval))
|
|
swp_pte = pte_swp_mkuffd_wp(swp_pte);
|
|
set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
|
|
/*
|
|
* No need to invalidate here it will synchronize on
|
|
* against the special swap migration pte.
|
|
*
|
|
* The assignment to subpage above was computed from a
|
|
* swap PTE which results in an invalid pointer.
|
|
* Since only PAGE_SIZE pages can currently be
|
|
* migrated, just set it to page. This will need to be
|
|
* changed when hugepage migrations to device private
|
|
* memory are supported.
|
|
*/
|
|
subpage = page;
|
|
goto discard;
|
|
}
|
|
|
|
/* Nuke the page table entry. */
|
|
flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
|
|
if (should_defer_flush(mm, flags)) {
|
|
/*
|
|
* We clear the PTE but do not flush so potentially
|
|
* a remote CPU could still be writing to the page.
|
|
* If the entry was previously clean then the
|
|
* architecture must guarantee that a clear->dirty
|
|
* transition on a cached TLB entry is written through
|
|
* and traps if the PTE is unmapped.
|
|
*/
|
|
pteval = ptep_get_and_clear(mm, address, pvmw.pte);
|
|
|
|
set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
|
|
} else {
|
|
pteval = ptep_clear_flush(vma, address, pvmw.pte);
|
|
}
|
|
|
|
/* Move the dirty bit to the page. Now the pte is gone. */
|
|
if (pte_dirty(pteval))
|
|
set_page_dirty(page);
|
|
|
|
/* Update high watermark before we lower rss */
|
|
update_hiwater_rss(mm);
|
|
|
|
if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
|
|
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
|
|
if (PageHuge(page)) {
|
|
hugetlb_count_sub(compound_nr(page), mm);
|
|
set_huge_swap_pte_at(mm, address,
|
|
pvmw.pte, pteval,
|
|
vma_mmu_pagesize(vma));
|
|
} else {
|
|
dec_mm_counter(mm, mm_counter(page));
|
|
set_pte_at(mm, address, pvmw.pte, pteval);
|
|
}
|
|
|
|
} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
|
|
/*
|
|
* The guest indicated that the page content is of no
|
|
* interest anymore. Simply discard the pte, vmscan
|
|
* will take care of the rest.
|
|
* A future reference will then fault in a new zero
|
|
* page. When userfaultfd is active, we must not drop
|
|
* this page though, as its main user (postcopy
|
|
* migration) will not expect userfaults on already
|
|
* copied pages.
|
|
*/
|
|
dec_mm_counter(mm, mm_counter(page));
|
|
/* We have to invalidate as we cleared the pte */
|
|
mmu_notifier_invalidate_range(mm, address,
|
|
address + PAGE_SIZE);
|
|
} else if (IS_ENABLED(CONFIG_MIGRATION) &&
|
|
(flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
|
|
swp_entry_t entry;
|
|
pte_t swp_pte;
|
|
|
|
if (arch_unmap_one(mm, vma, address, pteval) < 0) {
|
|
set_pte_at(mm, address, pvmw.pte, pteval);
|
|
ret = false;
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Store the pfn of the page in a special migration
|
|
* pte. do_swap_page() will wait until the migration
|
|
* pte is removed and then restart fault handling.
|
|
*/
|
|
entry = make_migration_entry(subpage,
|
|
pte_write(pteval));
|
|
swp_pte = swp_entry_to_pte(entry);
|
|
if (pte_soft_dirty(pteval))
|
|
swp_pte = pte_swp_mksoft_dirty(swp_pte);
|
|
if (pte_uffd_wp(pteval))
|
|
swp_pte = pte_swp_mkuffd_wp(swp_pte);
|
|
set_pte_at(mm, address, pvmw.pte, swp_pte);
|
|
/*
|
|
* No need to invalidate here it will synchronize on
|
|
* against the special swap migration pte.
|
|
*/
|
|
} else if (PageAnon(page)) {
|
|
swp_entry_t entry = { .val = page_private(subpage) };
|
|
pte_t swp_pte;
|
|
/*
|
|
* Store the swap location in the pte.
|
|
* See handle_pte_fault() ...
|
|
*/
|
|
if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
|
|
WARN_ON_ONCE(1);
|
|
ret = false;
|
|
/* We have to invalidate as we cleared the pte */
|
|
mmu_notifier_invalidate_range(mm, address,
|
|
address + PAGE_SIZE);
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
break;
|
|
}
|
|
|
|
/* MADV_FREE page check */
|
|
if (!PageSwapBacked(page)) {
|
|
int ref_count, map_count;
|
|
|
|
/*
|
|
* Synchronize with gup_pte_range():
|
|
* - clear PTE; barrier; read refcount
|
|
* - inc refcount; barrier; read PTE
|
|
*/
|
|
smp_mb();
|
|
|
|
ref_count = page_ref_count(page);
|
|
map_count = page_mapcount(page);
|
|
|
|
/*
|
|
* Order reads for page refcount and dirty flag
|
|
* (see comments in __remove_mapping()).
|
|
*/
|
|
smp_rmb();
|
|
|
|
/*
|
|
* The only page refs must be one from isolation
|
|
* plus the rmap(s) (dropped by discard:).
|
|
*/
|
|
if (ref_count == 1 + map_count &&
|
|
!PageDirty(page)) {
|
|
/* Invalidate as we cleared the pte */
|
|
mmu_notifier_invalidate_range(mm,
|
|
address, address + PAGE_SIZE);
|
|
dec_mm_counter(mm, MM_ANONPAGES);
|
|
goto discard;
|
|
}
|
|
|
|
/*
|
|
* If the page was redirtied, it cannot be
|
|
* discarded. Remap the page to page table.
|
|
*/
|
|
set_pte_at(mm, address, pvmw.pte, pteval);
|
|
SetPageSwapBacked(page);
|
|
ret = false;
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
break;
|
|
}
|
|
|
|
if (swap_duplicate(entry) < 0) {
|
|
set_pte_at(mm, address, pvmw.pte, pteval);
|
|
ret = false;
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
break;
|
|
}
|
|
if (arch_unmap_one(mm, vma, address, pteval) < 0) {
|
|
set_pte_at(mm, address, pvmw.pte, pteval);
|
|
ret = false;
|
|
page_vma_mapped_walk_done(&pvmw);
|
|
break;
|
|
}
|
|
if (list_empty(&mm->mmlist)) {
|
|
spin_lock(&mmlist_lock);
|
|
if (list_empty(&mm->mmlist))
|
|
list_add(&mm->mmlist, &init_mm.mmlist);
|
|
spin_unlock(&mmlist_lock);
|
|
}
|
|
dec_mm_counter(mm, MM_ANONPAGES);
|
|
inc_mm_counter(mm, MM_SWAPENTS);
|
|
swp_pte = swp_entry_to_pte(entry);
|
|
if (pte_soft_dirty(pteval))
|
|
swp_pte = pte_swp_mksoft_dirty(swp_pte);
|
|
if (pte_uffd_wp(pteval))
|
|
swp_pte = pte_swp_mkuffd_wp(swp_pte);
|
|
set_pte_at(mm, address, pvmw.pte, swp_pte);
|
|
/* Invalidate as we cleared the pte */
|
|
mmu_notifier_invalidate_range(mm, address,
|
|
address + PAGE_SIZE);
|
|
} else {
|
|
/*
|
|
* This is a locked file-backed page, thus it cannot
|
|
* be removed from the page cache and replaced by a new
|
|
* page before mmu_notifier_invalidate_range_end, so no
|
|
* concurrent thread might update its page table to
|
|
* point at new page while a device still is using this
|
|
* page.
|
|
*
|
|
* See Documentation/vm/mmu_notifier.rst
|
|
*/
|
|
dec_mm_counter(mm, mm_counter_file(page));
|
|
}
|
|
discard:
|
|
/*
|
|
* No need to call mmu_notifier_invalidate_range() it has be
|
|
* done above for all cases requiring it to happen under page
|
|
* table lock before mmu_notifier_invalidate_range_end()
|
|
*
|
|
* See Documentation/vm/mmu_notifier.rst
|
|
*/
|
|
page_remove_rmap(subpage, PageHuge(page));
|
|
put_page(page);
|
|
}
|
|
|
|
mmu_notifier_invalidate_range_end(&range);
|
|
trace_android_vh_try_to_unmap_one(vma, page, address, ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
|
|
{
|
|
return vma_is_temporary_stack(vma);
|
|
}
|
|
|
|
static int page_not_mapped(struct page *page)
|
|
{
|
|
return !page_mapped(page);
|
|
}
|
|
|
|
/**
|
|
* try_to_unmap - try to remove all page table mappings to a page
|
|
* @page: the page to get unmapped
|
|
* @flags: action and flags
|
|
*
|
|
* Tries to remove all the page table entries which are mapping this
|
|
* page, used in the pageout path. Caller must hold the page lock.
|
|
*
|
|
* If unmap is successful, return true. Otherwise, false.
|
|
*/
|
|
bool try_to_unmap(struct page *page, enum ttu_flags flags)
|
|
{
|
|
struct rmap_walk_control rwc = {
|
|
.rmap_one = try_to_unmap_one,
|
|
.arg = (void *)flags,
|
|
.done = page_not_mapped,
|
|
.anon_lock = page_lock_anon_vma_read,
|
|
};
|
|
|
|
/*
|
|
* During exec, a temporary VMA is setup and later moved.
|
|
* The VMA is moved under the anon_vma lock but not the
|
|
* page tables leading to a race where migration cannot
|
|
* find the migration ptes. Rather than increasing the
|
|
* locking requirements of exec(), migration skips
|
|
* temporary VMAs until after exec() completes.
|
|
*/
|
|
if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
|
|
&& !PageKsm(page) && PageAnon(page))
|
|
rwc.invalid_vma = invalid_migration_vma;
|
|
|
|
if (flags & TTU_RMAP_LOCKED)
|
|
rmap_walk_locked(page, &rwc);
|
|
else
|
|
rmap_walk(page, &rwc);
|
|
|
|
/*
|
|
* When racing against e.g. zap_pte_range() on another cpu,
|
|
* in between its ptep_get_and_clear_full() and page_remove_rmap(),
|
|
* try_to_unmap() may return false when it is about to become true,
|
|
* if page table locking is skipped: use TTU_SYNC to wait for that.
|
|
*/
|
|
return !page_mapcount(page);
|
|
}
|
|
|
|
/**
|
|
* try_to_munlock - try to munlock a page
|
|
* @page: the page to be munlocked
|
|
*
|
|
* Called from munlock code. Checks all of the VMAs mapping the page
|
|
* to make sure nobody else has this page mlocked. The page will be
|
|
* returned with PG_mlocked cleared if no other vmas have it mlocked.
|
|
*/
|
|
|
|
void try_to_munlock(struct page *page)
|
|
{
|
|
struct rmap_walk_control rwc = {
|
|
.rmap_one = try_to_unmap_one,
|
|
.arg = (void *)TTU_MUNLOCK,
|
|
.done = page_not_mapped,
|
|
.anon_lock = page_lock_anon_vma_read,
|
|
|
|
};
|
|
|
|
VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
|
|
VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
|
|
|
|
rmap_walk(page, &rwc);
|
|
}
|
|
|
|
void __put_anon_vma(struct anon_vma *anon_vma)
|
|
{
|
|
struct anon_vma *root = anon_vma->root;
|
|
|
|
anon_vma_free(anon_vma);
|
|
if (root != anon_vma && atomic_dec_and_test(&root->refcount))
|
|
anon_vma_free(root);
|
|
}
|
|
|
|
static struct anon_vma *rmap_walk_anon_lock(struct page *page,
|
|
struct rmap_walk_control *rwc)
|
|
{
|
|
struct anon_vma *anon_vma;
|
|
|
|
if (rwc->anon_lock)
|
|
return rwc->anon_lock(page);
|
|
|
|
/*
|
|
* Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
|
|
* because that depends on page_mapped(); but not all its usages
|
|
* are holding mmap_lock. Users without mmap_lock are required to
|
|
* take a reference count to prevent the anon_vma disappearing
|
|
*/
|
|
anon_vma = page_anon_vma(page);
|
|
if (!anon_vma)
|
|
return NULL;
|
|
|
|
anon_vma_lock_read(anon_vma);
|
|
return anon_vma;
|
|
}
|
|
|
|
/*
|
|
* rmap_walk_anon - do something to anonymous page using the object-based
|
|
* rmap method
|
|
* @page: the page to be handled
|
|
* @rwc: control variable according to each walk type
|
|
*
|
|
* Find all the mappings of a page using the mapping pointer and the vma chains
|
|
* contained in the anon_vma struct it points to.
|
|
*
|
|
* When called from try_to_munlock(), the mmap_lock of the mm containing the vma
|
|
* where the page was found will be held for write. So, we won't recheck
|
|
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
|
|
* LOCKED.
|
|
*/
|
|
static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
|
|
bool locked)
|
|
{
|
|
struct anon_vma *anon_vma;
|
|
pgoff_t pgoff_start, pgoff_end;
|
|
struct anon_vma_chain *avc;
|
|
|
|
if (locked) {
|
|
anon_vma = page_anon_vma(page);
|
|
/* anon_vma disappear under us? */
|
|
VM_BUG_ON_PAGE(!anon_vma, page);
|
|
} else {
|
|
anon_vma = rmap_walk_anon_lock(page, rwc);
|
|
}
|
|
if (!anon_vma)
|
|
return;
|
|
|
|
pgoff_start = page_to_pgoff(page);
|
|
pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
|
|
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
|
|
pgoff_start, pgoff_end) {
|
|
struct vm_area_struct *vma = avc->vma;
|
|
unsigned long address = vma_address(page, vma);
|
|
|
|
VM_BUG_ON_VMA(address == -EFAULT, vma);
|
|
cond_resched();
|
|
|
|
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
|
|
continue;
|
|
|
|
if (!rwc->rmap_one(page, vma, address, rwc->arg))
|
|
break;
|
|
if (rwc->done && rwc->done(page))
|
|
break;
|
|
}
|
|
|
|
if (!locked)
|
|
anon_vma_unlock_read(anon_vma);
|
|
}
|
|
|
|
/*
|
|
* rmap_walk_file - do something to file page using the object-based rmap method
|
|
* @page: the page to be handled
|
|
* @rwc: control variable according to each walk type
|
|
*
|
|
* Find all the mappings of a page using the mapping pointer and the vma chains
|
|
* contained in the address_space struct it points to.
|
|
*
|
|
* When called from try_to_munlock(), the mmap_lock of the mm containing the vma
|
|
* where the page was found will be held for write. So, we won't recheck
|
|
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
|
|
* LOCKED.
|
|
*/
|
|
static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
|
|
bool locked)
|
|
{
|
|
struct address_space *mapping = page_mapping(page);
|
|
pgoff_t pgoff_start, pgoff_end;
|
|
struct vm_area_struct *vma;
|
|
|
|
/*
|
|
* The page lock not only makes sure that page->mapping cannot
|
|
* suddenly be NULLified by truncation, it makes sure that the
|
|
* structure at mapping cannot be freed and reused yet,
|
|
* so we can safely take mapping->i_mmap_rwsem.
|
|
*/
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
|
|
if (!mapping)
|
|
return;
|
|
|
|
pgoff_start = page_to_pgoff(page);
|
|
pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
|
|
if (!locked)
|
|
i_mmap_lock_read(mapping);
|
|
vma_interval_tree_foreach(vma, &mapping->i_mmap,
|
|
pgoff_start, pgoff_end) {
|
|
unsigned long address = vma_address(page, vma);
|
|
|
|
VM_BUG_ON_VMA(address == -EFAULT, vma);
|
|
cond_resched();
|
|
|
|
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
|
|
continue;
|
|
|
|
if (!rwc->rmap_one(page, vma, address, rwc->arg))
|
|
goto done;
|
|
if (rwc->done && rwc->done(page))
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
if (!locked)
|
|
i_mmap_unlock_read(mapping);
|
|
}
|
|
|
|
void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
|
|
{
|
|
if (unlikely(PageKsm(page)))
|
|
rmap_walk_ksm(page, rwc);
|
|
else if (PageAnon(page))
|
|
rmap_walk_anon(page, rwc, false);
|
|
else
|
|
rmap_walk_file(page, rwc, false);
|
|
}
|
|
|
|
/* Like rmap_walk, but caller holds relevant rmap lock */
|
|
void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
|
|
{
|
|
/* no ksm support for now */
|
|
VM_BUG_ON_PAGE(PageKsm(page), page);
|
|
if (PageAnon(page))
|
|
rmap_walk_anon(page, rwc, true);
|
|
else
|
|
rmap_walk_file(page, rwc, true);
|
|
}
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
/*
|
|
* The following two functions are for anonymous (private mapped) hugepages.
|
|
* Unlike common anonymous pages, anonymous hugepages have no accounting code
|
|
* and no lru code, because we handle hugepages differently from common pages.
|
|
*/
|
|
void hugepage_add_anon_rmap(struct page *page,
|
|
struct vm_area_struct *vma, unsigned long address)
|
|
{
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
|
int first;
|
|
|
|
BUG_ON(!PageLocked(page));
|
|
BUG_ON(!anon_vma);
|
|
/* address might be in next vma when migration races vma_adjust */
|
|
first = atomic_inc_and_test(compound_mapcount_ptr(page));
|
|
if (first)
|
|
__page_set_anon_rmap(page, vma, address, 0);
|
|
}
|
|
|
|
void hugepage_add_new_anon_rmap(struct page *page,
|
|
struct vm_area_struct *vma, unsigned long address)
|
|
{
|
|
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
|
|
atomic_set(compound_mapcount_ptr(page), 0);
|
|
if (hpage_pincount_available(page))
|
|
atomic_set(compound_pincount_ptr(page), 0);
|
|
|
|
__page_set_anon_rmap(page, vma, address, 1);
|
|
}
|
|
#endif /* CONFIG_HUGETLB_PAGE */
|