
Changes in 5.10.110 swiotlb: fix info leak with DMA_FROM_DEVICE USB: serial: pl2303: add IBM device IDs USB: serial: simple: add Nokia phone driver hv: utils: add PTP_1588_CLOCK to Kconfig to fix build netdevice: add the case if dev is NULL HID: logitech-dj: add new lightspeed receiver id xfrm: fix tunnel model fragmentation behavior ARM: mstar: Select HAVE_ARM_ARCH_TIMER virtio_console: break out of buf poll on remove vdpa/mlx5: should verify CTRL_VQ feature exists for MQ tools/virtio: fix virtio_test execution ethernet: sun: Free the coherent when failing in probing gpio: Revert regression in sysfs-gpio (gpiolib.c) spi: Fix invalid sgs value net:mcf8390: Use platform_get_irq() to get the interrupt Revert "gpio: Revert regression in sysfs-gpio (gpiolib.c)" spi: Fix erroneous sgs value with min_t() Input: zinitix - do not report shadow fingers af_key: add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register net: dsa: microchip: add spi_device_id tables locking/lockdep: Avoid potential access of invalid memory in lock_class iommu/iova: Improve 32-bit free space estimate tpm: fix reference counting for struct tpm_chip virtio-blk: Use blk_validate_block_size() to validate block size USB: usb-storage: Fix use of bitfields for hardware data in ene_ub6250.c xhci: fix garbage USBSTS being logged in some cases xhci: fix runtime PM imbalance in USB2 resume xhci: make xhci_handshake timeout for xhci_reset() adjustable xhci: fix uninitialized string returned by xhci_decode_ctrl_ctx() mei: me: add Alder Lake N device id. mei: avoid iterator usage outside of list_for_each_entry coresight: Fix TRCCONFIGR.QE sysfs interface iio: afe: rescale: use s64 for temporary scale calculations iio: inkern: apply consumer scale on IIO_VAL_INT cases iio: inkern: apply consumer scale when no channel scale is available iio: inkern: make a best effort on offset calculation greybus: svc: fix an error handling bug in gb_svc_hello() clk: uniphier: Fix fixed-rate initialization ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE KEYS: fix length validation in keyctl_pkey_params_get_2() Documentation: add link to stable release candidate tree Documentation: update stable tree link firmware: stratix10-svc: add missing callback parameter on RSU HID: intel-ish-hid: Use dma_alloc_coherent for firmware update SUNRPC: avoid race between mod_timer() and del_timer_sync() NFSD: prevent underflow in nfssvc_decode_writeargs() NFSD: prevent integer overflow on 32 bit systems f2fs: fix to unlock page correctly in error path of is_alive() f2fs: quota: fix loop condition at f2fs_quota_sync() f2fs: fix to do sanity check on .cp_pack_total_block_count remoteproc: Fix count check in rproc_coredump_write() pinctrl: samsung: drop pin banks references on error paths spi: mxic: Fix the transmit path mtd: rawnand: protect access to rawnand devices while in suspend can: ems_usb: ems_usb_start_xmit(): fix double dev_kfree_skb() in error path jffs2: fix use-after-free in jffs2_clear_xattr_subsystem jffs2: fix memory leak in jffs2_do_mount_fs jffs2: fix memory leak in jffs2_scan_medium mm/pages_alloc.c: don't create ZONE_MOVABLE beyond the end of a node mm: invalidate hwpoison page cache page in fault path mempolicy: mbind_range() set_policy() after vma_merge() scsi: libsas: Fix sas_ata_qc_issue() handling of NCQ NON DATA commands qed: display VF trust config qed: validate and restrict untrusted VFs vlan promisc mode riscv: Fix fill_callchain return value riscv: Increase stack size under KASAN Revert "Input: clear BTN_RIGHT/MIDDLE on buttonpads" cifs: prevent bad output lengths in smb2_ioctl_query_info() cifs: fix NULL ptr dereference in smb2_ioctl_query_info() ALSA: cs4236: fix an incorrect NULL check on list iterator ALSA: hda: Avoid unsol event during RPM suspending ALSA: pcm: Fix potential AB/BA lock with buffer_mutex and mmap_lock ALSA: hda/realtek: Fix audio regression on Mi Notebook Pro 2020 mm: madvise: skip unmapped vma holes passed to process_madvise mm: madvise: return correct bytes advised with process_madvise Revert "mm: madvise: skip unmapped vma holes passed to process_madvise" mm,hwpoison: unmap poisoned page before invalidation mm/kmemleak: reset tag when compare object pointer dm integrity: set journal entry unused when shrinking device drbd: fix potential silent data corruption can: isotp: sanitize CAN ID checks in isotp_bind() powerpc/kvm: Fix kvm_use_magic_page udp: call udp_encap_enable for v6 sockets when enabling encap arm64: signal: nofpsimd: Do not allocate fp/simd context when not available arm64: dts: ti: k3-am65: Fix gic-v3 compatible regs arm64: dts: ti: k3-j721e: Fix gic-v3 compatible regs arm64: dts: ti: k3-j7200: Fix gic-v3 compatible regs ACPI: properties: Consistently return -ENOENT if there are no more references coredump: Also dump first pages of non-executable ELF libraries ext4: fix ext4_fc_stats trace point ext4: fix fs corruption when tring to remove a non-empty directory with IO error drivers: hamradio: 6pack: fix UAF bug caused by mod_timer() mailbox: tegra-hsp: Flush whole channel block: limit request dispatch loop duration block: don't merge across cgroup boundaries if blkcg is enabled drm/edid: check basic audio support on CEA extension block video: fbdev: sm712fb: Fix crash in smtcfb_read() video: fbdev: atari: Atari 2 bpp (STe) palette bugfix ARM: dts: at91: sama5d2: Fix PMERRLOC resource size ARM: dts: exynos: fix UART3 pins configuration in Exynos5250 ARM: dts: exynos: add missing HDMI supplies on SMDK5250 ARM: dts: exynos: add missing HDMI supplies on SMDK5420 mgag200 fix memmapsl configuration in GCTL6 register carl9170: fix missing bit-wise or operator for tx_params pstore: Don't use semaphores in always-atomic-context code thermal: int340x: Increase bitmap size lib/raid6/test: fix multiple definition linking error exec: Force single empty string when argv is empty crypto: rsa-pkcs1pad - only allow with rsa crypto: rsa-pkcs1pad - correctly get hash from source scatterlist crypto: rsa-pkcs1pad - restore signature length check crypto: rsa-pkcs1pad - fix buffer overread in pkcs1pad_verify_complete() bcache: fixup multiple threads crash DEC: Limit PMAX memory probing to R3k systems media: gpio-ir-tx: fix transmit with long spaces on Orange Pi PC media: davinci: vpif: fix unbalanced runtime PM get media: davinci: vpif: fix unbalanced runtime PM enable xtensa: fix stop_machine_cpuslocked call in patch_text xtensa: fix xtensa_wsr always writing 0 brcmfmac: firmware: Allocate space for default boardrev in nvram brcmfmac: pcie: Release firmwares in the brcmf_pcie_setup error path brcmfmac: pcie: Replace brcmf_pcie_copy_mem_todev with memcpy_toio brcmfmac: pcie: Fix crashes due to early IRQs drm/i915/opregion: check port number bounds for SWSCI display power state drm/i915/gem: add missing boundary check in vm_access PCI: pciehp: Clear cmd_busy bit in polling mode PCI: xgene: Revert "PCI: xgene: Fix IB window setup" regulator: qcom_smd: fix for_each_child.cocci warnings selinux: check return value of sel_make_avc_files hwrng: cavium - Check health status while reading random data hwrng: cavium - HW_RANDOM_CAVIUM should depend on ARCH_THUNDER crypto: sun8i-ss - really disable hash on A80 crypto: authenc - Fix sleep in atomic context in decrypt_tail crypto: mxs-dcp - Fix scatterlist processing thermal: int340x: Check for NULL after calling kmemdup() spi: tegra114: Add missing IRQ check in tegra_spi_probe arm64/mm: avoid fixmap race condition when create pud mapping selftests/x86: Add validity check and allow field splitting crypto: rockchip - ECB does not need IV audit: log AUDIT_TIME_* records only from rules EVM: fix the evm= __setup handler return value crypto: ccree - don't attempt 0 len DMA mappings spi: pxa2xx-pci: Balance reference count for PCI DMA device hwmon: (pmbus) Add mutex to regulator ops hwmon: (sch56xx-common) Replace WDOG_ACTIVE with WDOG_HW_RUNNING nvme: cleanup __nvme_check_ids block: don't delete queue kobject before its children PM: hibernate: fix __setup handler error handling PM: suspend: fix return value of __setup handler spi: spi-zynqmp-gqspi: Handle error for dma_set_mask hwrng: atmel - disable trng on failure path crypto: sun8i-ss - call finalize with bh disabled crypto: sun8i-ce - call finalize with bh disabled crypto: amlogic - call finalize with bh disabled crypto: vmx - add missing dependencies clocksource/drivers/timer-ti-dm: Fix regression from errata i940 fix clocksource/drivers/exynos_mct: Refactor resources allocation clocksource/drivers/exynos_mct: Handle DTS with higher number of interrupts clocksource/drivers/timer-microchip-pit64b: Use notrace clocksource/drivers/timer-of: Check return value of of_iomap in timer_of_base_init() ACPI: APEI: fix return value of __setup handlers crypto: ccp - ccp_dmaengine_unregister release dma channels crypto: ccree - Fix use after free in cc_cipher_exit() vfio: platform: simplify device removal amba: Make the remove callback return void hwrng: nomadik - Change clk_disable to clk_disable_unprepare hwmon: (pmbus) Add Vin unit off handling clocksource: acpi_pm: fix return value of __setup handler io_uring: terminate manual loop iterator loop correctly for non-vecs watch_queue: Fix NULL dereference in error cleanup watch_queue: Actually free the watch f2fs: fix to enable ATGC correctly via gc_idle sysfs interface sched/debug: Remove mpol_get/put and task_lock/unlock from sched_show_numa sched/core: Export pelt_thermal_tp rseq: Optimise rseq_get_rseq_cs() and clear_rseq_cs() rseq: Remove broken uapi field layout on 32-bit little endian perf/core: Fix address filter parser for multiple filters perf/x86/intel/pt: Fix address filter config for 32-bit kernel f2fs: fix missing free nid in f2fs_handle_failed_inode nfsd: more robust allocation failure handling in nfsd_file_cache_init f2fs: fix to avoid potential deadlock btrfs: fix unexpected error path when reflinking an inline extent f2fs: compress: remove unneeded read when rewrite whole cluster f2fs: fix compressed file start atomic write may cause data corruption selftests, x86: fix how check_cc.sh is being invoked kunit: make kunit_test_timeout compatible with comment media: staging: media: zoran: fix usage of vb2_dma_contig_set_max_seg_size media: v4l2-mem2mem: Apply DST_QUEUE_OFF_BASE on MMAP buffers across ioctls media: mtk-vcodec: potential dereference of null pointer media: bttv: fix WARNING regression on tunerless devices ASoC: xilinx: xlnx_formatter_pcm: Handle sysclk setting ASoC: generic: simple-card-utils: remove useless assignment media: coda: Fix missing put_device() call in coda_get_vdoa_data media: meson: vdec: potential dereference of null pointer media: hantro: Fix overfill bottom register field name media: aspeed: Correct value for h-total-pixels video: fbdev: matroxfb: set maxvram of vbG200eW to the same as vbG200 to avoid black screen video: fbdev: controlfb: Fix set but not used warnings video: fbdev: controlfb: Fix COMPILE_TEST build video: fbdev: smscufx: Fix null-ptr-deref in ufx_usb_probe() video: fbdev: atmel_lcdfb: fix an error code in atmel_lcdfb_probe() video: fbdev: fbcvt.c: fix printing in fb_cvt_print_name() firmware: qcom: scm: Remove reassignment to desc following initializer ARM: dts: qcom: ipq4019: fix sleep clock soc: qcom: rpmpd: Check for null return of devm_kcalloc soc: qcom: ocmem: Fix missing put_device() call in of_get_ocmem soc: qcom: aoss: remove spurious IRQF_ONESHOT flags arm64: dts: qcom: sdm845: fix microphone bias properties and values arm64: dts: qcom: sm8150: Correct TCS configuration for apps rsc firmware: ti_sci: Fix compilation failure when CONFIG_TI_SCI_PROTOCOL is not defined soc: ti: wkup_m3_ipc: Fix IRQ check in wkup_m3_ipc_probe ARM: dts: sun8i: v3s: Move the csi1 block to follow address order ARM: dts: imx: Add missing LVDS decoder on M53Menlo media: video/hdmi: handle short reads of hdmi info frame. media: em28xx: initialize refcount before kref_get media: usb: go7007: s2250-board: fix leak in probe() media: cedrus: H265: Fix neighbour info buffer size media: cedrus: h264: Fix neighbour info buffer size ASoC: codecs: wcd934x: fix return value of wcd934x_rx_hph_mode_put uaccess: fix nios2 and microblaze get_user_8() ASoC: rt5663: check the return value of devm_kzalloc() in rt5663_parse_dp() ASoC: ti: davinci-i2s: Add check for clk_enable() ALSA: spi: Add check for clk_enable() arm64: dts: ns2: Fix spi-cpol and spi-cpha property arm64: dts: broadcom: Fix sata nodename printk: fix return value of printk.devkmsg __setup handler ASoC: mxs-saif: Handle errors for clk_enable ASoC: atmel_ssc_dai: Handle errors for clk_enable ASoC: dwc-i2s: Handle errors for clk_enable ASoC: soc-compress: prevent the potentially use of null pointer memory: emif: Add check for setup_interrupts memory: emif: check the pointer temp in get_device_details() ALSA: firewire-lib: fix uninitialized flag for AV/C deferred transaction arm64: dts: rockchip: Fix SDIO regulator supply properties on rk3399-firefly m68k: coldfire/device.c: only build for MCF_EDMA when h/w macros are defined media: stk1160: If start stream fails, return buffers with VB2_BUF_STATE_QUEUED media: vidtv: Check for null return of vzalloc ASoC: atmel: Add missing of_node_put() in at91sam9g20ek_audio_probe ASoC: wm8350: Handle error for wm8350_register_irq ASoC: fsi: Add check for clk_enable video: fbdev: omapfb: Add missing of_node_put() in dvic_probe_of media: saa7134: convert list_for_each to entry variant media: saa7134: fix incorrect use to determine if list is empty ivtv: fix incorrect device_caps for ivtvfb ASoC: rockchip: i2s: Use devm_platform_get_and_ioremap_resource() ASoC: rockchip: i2s: Fix missing clk_disable_unprepare() in rockchip_i2s_probe ASoC: SOF: Add missing of_node_put() in imx8m_probe ASoC: dmaengine: do not use a NULL prepare_slave_config() callback ASoC: mxs: Fix error handling in mxs_sgtl5000_probe ASoC: fsl_spdif: Disable TX clock when stop ASoC: imx-es8328: Fix error return code in imx_es8328_probe() ASoC: msm8916-wcd-digital: Fix missing clk_disable_unprepare() in msm8916_wcd_digital_probe mmc: davinci_mmc: Handle error for clk_enable ASoC: atmel: sam9x5_wm8731: use devm_snd_soc_register_card() ASoC: atmel: Fix error handling in sam9x5_wm8731_driver_probe ASoC: msm8916-wcd-analog: Fix error handling in pm8916_wcd_analog_spmi_probe ASoC: codecs: wcd934x: Add missing of_node_put() in wcd934x_codec_parse_data ARM: configs: multi_v5_defconfig: re-enable CONFIG_V4L_PLATFORM_DRIVERS drm/meson: osd_afbcd: Add an exit callback to struct meson_afbcd_ops drm/bridge: Fix free wrong object in sii8620_init_rcp_input_dev drm/bridge: Add missing pm_runtime_disable() in __dw_mipi_dsi_probe drm/bridge: nwl-dsi: Fix PM disable depth imbalance in nwl_dsi_probe drm: bridge: adv7511: Fix ADV7535 HPD enablement ath10k: fix memory overwrite of the WoWLAN wakeup packet pattern drm/panfrost: Check for error num after setting mask libbpf: Fix possible NULL pointer dereference when destroying skeleton udmabuf: validate ubuf->pagecount Bluetooth: hci_serdev: call init_rwsem() before p->open() mtd: onenand: Check for error irq mtd: rawnand: gpmi: fix controller timings setting drm/edid: Don't clear formats if using deep color ionic: fix type complaint in ionic_dev_cmd_clean() drm/nouveau/acr: Fix undefined behavior in nvkm_acr_hsfw_load_bl() drm/amd/display: Fix a NULL pointer dereference in amdgpu_dm_connector_add_common_modes() drm/amd/pm: return -ENOTSUPP if there is no get_dpm_ultimate_freq function ath9k_htc: fix uninit value bugs RDMA/core: Set MR type in ib_reg_user_mr KVM: PPC: Fix vmx/vsx mixup in mmio emulation i40e: don't reserve excessive XDP_PACKET_HEADROOM on XSK Rx to skb i40e: respect metadata on XSK Rx to skb power: reset: gemini-poweroff: Fix IRQ check in gemini_poweroff_probe ray_cs: Check ioremap return value powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch KVM: PPC: Book3S HV: Check return value of kvmppc_radix_init powerpc/perf: Don't use perf_hw_context for trace IMC PMU mt76: mt7915: use proper aid value in mt7915_mcu_wtbl_generic_tlv in sta mode mt76: mt7915: use proper aid value in mt7915_mcu_sta_basic_tlv mt76: mt7603: check sta_rates pointer in mt7603_sta_rate_tbl_update mt76: mt7615: check sta_rates pointer in mt7615_sta_rate_tbl_update net: dsa: mv88e6xxx: Enable port policy support on 6097 scripts/dtc: Call pkg-config POSIXly correct livepatch: Fix build failure on 32 bits processors PCI: aardvark: Fix reading PCI_EXP_RTSTA_PME bit on emulated bridge drm/bridge: dw-hdmi: use safe format when first in bridge chain power: supply: ab8500: Fix memory leak in ab8500_fg_sysfs_init HID: i2c-hid: fix GET/SET_REPORT for unnumbered reports iommu/ipmmu-vmsa: Check for error num after setting mask drm/amd/pm: enable pm sysfs write for one VF mode drm/amd/display: Add affected crtcs to atomic state for dsc mst unplug IB/cma: Allow XRC INI QPs to set their local ACK timeout dax: make sure inodes are flushed before destroy cache iwlwifi: Fix -EIO error code that is never returned iwlwifi: mvm: Fix an error code in iwl_mvm_up() drm/msm/dp: populate connector of struct dp_panel drm/msm/dpu: add DSPP blocks teardown drm/msm/dpu: fix dp audio condition dm crypt: fix get_key_size compiler warning if !CONFIG_KEYS scsi: pm8001: Fix command initialization in pm80XX_send_read_log() scsi: pm8001: Fix command initialization in pm8001_chip_ssp_tm_req() scsi: pm8001: Fix payload initialization in pm80xx_set_thermal_config() scsi: pm8001: Fix le32 values handling in pm80xx_set_sas_protocol_timer_config() scsi: pm8001: Fix payload initialization in pm80xx_encrypt_update() scsi: pm8001: Fix le32 values handling in pm80xx_chip_ssp_io_req() scsi: pm8001: Fix le32 values handling in pm80xx_chip_sata_req() scsi: pm8001: Fix NCQ NON DATA command task initialization scsi: pm8001: Fix NCQ NON DATA command completion handling scsi: pm8001: Fix abort all task initialization RDMA/mlx5: Fix the flow of a miss in the allocation of a cache ODP MR drm/amd/display: Remove vupdate_int_entry definition TOMOYO: fix __setup handlers return values ext2: correct max file size computing drm/tegra: Fix reference leak in tegra_dsi_ganged_probe power: supply: bq24190_charger: Fix bq24190_vbus_is_enabled() wrong false return scsi: hisi_sas: Change permission of parameter prot_mask drm/bridge: cdns-dsi: Make sure to to create proper aliases for dt bpf, arm64: Call build_prologue() first in first JIT pass bpf, arm64: Feed byte-offset into bpf line info gpu: host1x: Fix a memory leak in 'host1x_remove()' libbpf: Skip forward declaration when counting duplicated type names powerpc/mm/numa: skip NUMA_NO_NODE onlining in parse_numa_properties() powerpc/Makefile: Don't pass -mcpu=powerpc64 when building 32-bit KVM: x86: Fix emulation in writing cr8 KVM: x86/emulator: Defer not-present segment check in __load_segment_descriptor() hv_balloon: rate-limit "Unhandled message" warning i2c: xiic: Make bus names unique power: supply: wm8350-power: Handle error for wm8350_register_irq power: supply: wm8350-power: Add missing free in free_charger_irq IB/hfi1: Allow larger MTU without AIP PCI: Reduce warnings on possible RW1C corruption net: axienet: fix RX ring refill allocation failure handling mips: DEC: honor CONFIG_MIPS_FP_SUPPORT=n powerpc/sysdev: fix incorrect use to determine if list is empty mfd: mc13xxx: Add check for mc13xxx_irq_request libbpf: Unmap rings when umem deleted selftests/bpf: Make test_lwt_ip_encap more stable and faster platform/x86: huawei-wmi: check the return value of device_create_file() powerpc: 8xx: fix a return value error in mpc8xx_pic_init vxcan: enable local echo for sent CAN frames ath10k: Fix error handling in ath10k_setup_msa_resources mips: cdmm: Fix refcount leak in mips_cdmm_phys_base MIPS: RB532: fix return value of __setup handler MIPS: pgalloc: fix memory leak caused by pgd_free() mtd: rawnand: atmel: fix refcount issue in atmel_nand_controller_init RDMA/mlx5: Fix memory leak in error flow for subscribe event routine bpf, sockmap: Fix memleak in tcp_bpf_sendmsg while sk msg is full bpf, sockmap: Fix more uncharged while msg has more_data bpf, sockmap: Fix double uncharge the mem of sk_msg samples/bpf, xdpsock: Fix race when running for fix duration of time USB: storage: ums-realtek: fix error code in rts51x_read_mem() can: isotp: return -EADDRNOTAVAIL when reading from unbound socket can: isotp: support MSG_TRUNC flag when reading from socket bareudp: use ipv6_mod_enabled to check if IPv6 enabled selftests/bpf: Fix error reporting from sock_fields programs Bluetooth: call hci_le_conn_failed with hdev lock in hci_le_conn_failed Bluetooth: btmtksdio: Fix kernel oops in btmtksdio_interrupt ipv4: Fix route lookups when handling ICMP redirects and PMTU updates af_netlink: Fix shift out of bounds in group mask calculation i2c: meson: Fix wrong speed use from probe i2c: mux: demux-pinctrl: do not deactivate a master that is not active selftests/bpf/test_lirc_mode2.sh: Exit with proper code PCI: Avoid broken MSI on SB600 USB devices net: bcmgenet: Use stronger register read/writes to assure ordering tcp: ensure PMTU updates are processed during fastopen openvswitch: always update flow key after nat tipc: fix the timer expires after interval 100ms mfd: asic3: Add missing iounmap() on error asic3_mfd_probe mxser: fix xmit_buf leak in activate when LSR == 0xff pwm: lpc18xx-sct: Initialize driver data and hardware before pwmchip_add() fsi: aspeed: convert to devm_platform_ioremap_resource fsi: Aspeed: Fix a potential double free misc: alcor_pci: Fix an error handling path cpufreq: qcom-cpufreq-nvmem: fix reading of PVS Valid fuse soundwire: intel: fix wrong register name in intel_shim_wake clk: qcom: ipq8074: fix PCI-E clock oops iio: mma8452: Fix probe failing when an i2c_device_id is used staging:iio:adc:ad7280a: Fix handing of device address bit reversing. pinctrl: renesas: r8a77470: Reduce size for narrow VIN1 channel pinctrl: renesas: checker: Fix miscalculation of number of states clk: qcom: ipq8074: Use floor ops for SDCC1 clock phy: dphy: Correct lpx parameter and its derivatives(ta_{get,go,sure}) serial: 8250_mid: Balance reference count for PCI DMA device serial: 8250_lpss: Balance reference count for PCI DMA device NFS: Use of mapping_set_error() results in spurious errors serial: 8250: Fix race condition in RTS-after-send handling iio: adc: Add check for devm_request_threaded_irq habanalabs: Add check for pci_enable_device NFS: Return valid errors from nfs2/3_decode_dirent() dma-debug: fix return value of __setup handlers clk: imx7d: Remove audio_mclk_root_clk clk: at91: sama7g5: fix parents of PDMCs' GCLK clk: qcom: clk-rcg2: Update logic to calculate D value for RCG clk: qcom: clk-rcg2: Update the frac table for pixel clock dmaengine: hisi_dma: fix MSI allocate fail when reload hisi_dma remoteproc: qcom: Fix missing of_node_put in adsp_alloc_memory_region remoteproc: qcom_wcnss: Add missing of_node_put() in wcnss_alloc_memory_region remoteproc: qcom_q6v5_mss: Fix some leaks in q6v5_alloc_memory_region nvdimm/region: Fix default alignment for small regions clk: actions: Terminate clk_div_table with sentinel element clk: loongson1: Terminate clk_div_table with sentinel element clk: clps711x: Terminate clk_div_table with sentinel element clk: tegra: tegra124-emc: Fix missing put_device() call in emc_ensure_emc_driver NFS: remove unneeded check in decode_devicenotify_args() staging: mt7621-dts: fix LEDs and pinctrl on GB-PC1 devicetree staging: mt7621-dts: fix formatting staging: mt7621-dts: fix pinctrl properties for ethernet staging: mt7621-dts: fix GB-PC2 devicetree pinctrl: mediatek: Fix missing of_node_put() in mtk_pctrl_init pinctrl: mediatek: paris: Fix PIN_CONFIG_BIAS_* readback pinctrl: mediatek: paris: Fix "argument" argument type for mtk_pinconf_get() pinctrl: mediatek: paris: Fix pingroup pin config state readback pinctrl: mediatek: paris: Skip custom extra pin config dump for virtual GPIOs pinctrl: nomadik: Add missing of_node_put() in nmk_pinctrl_probe pinctrl/rockchip: Add missing of_node_put() in rockchip_pinctrl_probe tty: hvc: fix return value of __setup handler kgdboc: fix return value of __setup handler serial: 8250: fix XOFF/XON sending when DMA is used kgdbts: fix return value of __setup handler firmware: google: Properly state IOMEM dependency driver core: dd: fix return value of __setup handler jfs: fix divide error in dbNextAG netfilter: nf_conntrack_tcp: preserve liberal flag in tcp options NFSv4.1: don't retry BIND_CONN_TO_SESSION on session error kdb: Fix the putarea helper function clk: qcom: gcc-msm8994: Fix gpll4 width clk: Initialize orphan req_rate xen: fix is_xen_pmu() net: enetc: report software timestamping via SO_TIMESTAMPING net: hns3: fix bug when PF set the duplicate MAC address for VFs net: phy: broadcom: Fix brcm_fet_config_init() selftests: test_vxlan_under_vrf: Fix broken test case qlcnic: dcb: default to returning -EOPNOTSUPP net/x25: Fix null-ptr-deref caused by x25_disconnect NFSv4/pNFS: Fix another issue with a list iterator pointing to the head net: dsa: bcm_sf2_cfp: fix an incorrect NULL check on list iterator fs: fd tables have to be multiples of BITS_PER_LONG lib/test: use after free in register_test_dev_kmod() fs: fix fd table size alignment properly LSM: general protection fault in legacy_parse_param regulator: rpi-panel: Handle I2C errors/timing to the Atmel gcc-plugins/stackleak: Exactly match strings instead of prefixes pinctrl: npcm: Fix broken references to chip->parent_device block, bfq: don't move oom_bfqq selinux: use correct type for context length selinux: allow FIOCLEX and FIONCLEX with policy capability loop: use sysfs_emit() in the sysfs xxx show() Fix incorrect type in assignment of ipv6 port for audit irqchip/qcom-pdc: Fix broken locking irqchip/nvic: Release nvic_base upon failure fs/binfmt_elf: Fix AT_PHDR for unusual ELF files bfq: fix use-after-free in bfq_dispatch_request ACPICA: Avoid walking the ACPI Namespace if it is not there lib/raid6/test/Makefile: Use $(pound) instead of \# for Make 4.3 Revert "Revert "block, bfq: honor already-setup queue merges"" ACPI/APEI: Limit printable size of BERT table data PM: core: keep irq flags in device_pm_check_callbacks() parisc: Fix handling off probe non-access faults nvme-tcp: lockdep: annotate in-kernel sockets spi: tegra20: Use of_device_get_match_data() locking/lockdep: Iterate lock_classes directly when reading lockdep files ext4: correct cluster len and clusters changed accounting in ext4_mb_mark_bb ext4: fix ext4_mb_mark_bb() with flex_bg with fast_commit ext4: don't BUG if someone dirty pages without asking ext4 first f2fs: fix to do sanity check on curseg->alloc_type NFSD: Fix nfsd_breaker_owns_lease() return values f2fs: compress: fix to print raw data size in error path of lz4 decompression ntfs: add sanity check on allocation size media: staging: media: zoran: move videodev alloc media: staging: media: zoran: calculate the right buffer number for zoran_reap_stat_com media: staging: media: zoran: fix various V4L2 compliance errors media: ir_toy: free before error exiting video: fbdev: nvidiafb: Use strscpy() to prevent buffer overflow video: fbdev: w100fb: Reset global state video: fbdev: cirrusfb: check pixclock to avoid divide by zero video: fbdev: omapfb: acx565akm: replace snprintf with sysfs_emit ARM: dts: qcom: fix gic_irq_domain_translate warnings for msm8960 ARM: dts: bcm2837: Add the missing L1/L2 cache information ASoC: madera: Add dependencies on MFD media: atomisp_gmin_platform: Add DMI quirk to not turn AXP ELDO2 regulator off on some boards media: atomisp: fix dummy_ptr check to avoid duplicate active_bo ARM: ftrace: avoid redundant loads or clobbering IP ARM: dts: imx7: Use audio_mclk_post_div instead audio_mclk_root_clk arm64: defconfig: build imx-sdma as a module video: fbdev: omapfb: panel-dsi-cm: Use sysfs_emit() instead of snprintf() video: fbdev: omapfb: panel-tpo-td043mtea1: Use sysfs_emit() instead of snprintf() video: fbdev: udlfb: replace snprintf in show functions with sysfs_emit ARM: dts: bcm2711: Add the missing L1/L2 cache information ASoC: soc-core: skip zero num_dai component in searching dai name media: cx88-mpeg: clear interrupt status register before streaming video uaccess: fix type mismatch warnings from access_ok() lib/test_lockup: fix kernel pointer check for separate address spaces ARM: tegra: tamonten: Fix I2C3 pad setting ARM: mmp: Fix failure to remove sram device video: fbdev: sm712fb: Fix crash in smtcfb_write() media: Revert "media: em28xx: add missing em28xx_close_extension" media: hdpvr: initialize dev->worker at hdpvr_register_videodev mmc: host: Return an error when ->enable_sdio_irq() ops is missing media: atomisp: fix bad usage at error handling logic ALSA: hda/realtek: Add alc256-samsung-headphone fixup KVM: x86/mmu: Check for present SPTE when clearing dirty bit in TDP MMU powerpc/kasan: Fix early region not updated correctly powerpc/lib/sstep: Fix 'sthcx' instruction powerpc/lib/sstep: Fix build errors with newer binutils powerpc: Fix build errors with newer binutils scsi: qla2xxx: Fix stuck session in gpdb scsi: qla2xxx: Fix scheduling while atomic scsi: qla2xxx: Fix wrong FDMI data for 64G adapter scsi: qla2xxx: Fix warning for missing error code scsi: qla2xxx: Fix device reconnect in loop topology scsi: qla2xxx: Add devids and conditionals for 28xx scsi: qla2xxx: Check for firmware dump already collected scsi: qla2xxx: Suppress a kernel complaint in qla_create_qpair() scsi: qla2xxx: Fix disk failure to rediscover scsi: qla2xxx: Fix incorrect reporting of task management failure scsi: qla2xxx: Fix hang due to session stuck scsi: qla2xxx: Fix missed DMA unmap for NVMe ls requests scsi: qla2xxx: Fix N2N inconsistent PLOGI scsi: qla2xxx: Reduce false trigger to login scsi: qla2xxx: Use correct feature type field during RFF_ID processing platform: chrome: Split trace include file KVM: x86: Forbid VMM to set SYNIC/STIMER MSRs when SynIC wasn't activated KVM: Prevent module exit until all VMs are freed KVM: x86: fix sending PV IPI KVM: SVM: fix panic on out-of-bounds guest IRQ ASoC: SOF: Intel: Fix NULL ptr dereference when ENOMEM ubifs: rename_whiteout: Fix double free for whiteout_ui->data ubifs: Fix deadlock in concurrent rename whiteout and inode writeback ubifs: Add missing iput if do_tmpfile() failed in rename whiteout ubifs: setflags: Make dirtied_ino_d 8 bytes aligned ubifs: Fix read out-of-bounds in ubifs_wbuf_write_nolock() ubifs: Fix to add refcount once page is set private ubifs: rename_whiteout: correct old_dir size computing wireguard: queueing: use CFI-safe ptr_ring cleanup function wireguard: socket: free skb in send6 when ipv6 is disabled wireguard: socket: ignore v6 endpoints when ipv6 is disabled XArray: Fix xas_create_range() when multi-order entry present can: mcba_usb: mcba_usb_start_xmit(): fix double dev_kfree_skb in error path can: mcba_usb: properly check endpoint type can: mcp251xfd: mcp251xfd_register_get_dev_id(): fix return of error value XArray: Update the LRU list in xas_split() rtc: check if __rtc_read_time was successful gfs2: Make sure FITRIM minlen is rounded up to fs block size net: hns3: fix software vlan talbe of vlan 0 inconsistent with hardware rxrpc: Fix call timer start racing with call destruction mailbox: imx: fix wakeup failure from freeze mode crypto: arm/aes-neonbs-cbc - Select generic cbc and aes watch_queue: Free the page array when watch_queue is dismantled pinctrl: pinconf-generic: Print arguments for bias-pull-* watchdog: rti-wdt: Add missing pm_runtime_disable() in probe function pinctrl: nuvoton: npcm7xx: Rename DS() macro to DSTR() pinctrl: nuvoton: npcm7xx: Use %zu printk format for ARRAY_SIZE() ASoC: mediatek: mt6358: add missing EXPORT_SYMBOLs ubi: Fix race condition between ctrl_cdev_ioctl and ubi_cdev_ioctl ARM: iop32x: offset IRQ numbers by 1 io_uring: fix memory leak of uid in files registration riscv module: remove (NOLOAD) ACPI: CPPC: Avoid out of bounds access when parsing _CPC data platform/chrome: cros_ec_typec: Check for EC device can: isotp: restore accidentally removed MSG_PEEK feature proc: bootconfig: Add null pointer check staging: mt7621-dts: fix pinctrl-0 items to be size-1 items on ethernet ASoC: soc-compress: Change the check for codec_dai batman-adv: Check ptr for NULL before reducing its refcnt mm/mmap: return 1 from stack_guard_gap __setup() handler ARM: 9187/1: JIVE: fix return value of __setup handler mm/memcontrol: return 1 from cgroup.memory __setup() handler mm/usercopy: return 1 from hardened_usercopy __setup() handler bpf: Adjust BPF stack helper functions to accommodate skip > 0 bpf: Fix comment for helper bpf_current_task_under_cgroup() dt-bindings: mtd: nand-controller: Fix the reg property description dt-bindings: mtd: nand-controller: Fix a comment in the examples dt-bindings: spi: mxic: The interrupt property is not mandatory ubi: fastmap: Return error code if memory allocation fails in add_aeb() ASoC: topology: Allow TLV control to be either read or write ARM: dts: spear1340: Update serial node properties ARM: dts: spear13xx: Update SPI dma properties um: Fix uml_mconsole stop/go docs: sysctl/kernel: add missing bit to panic_print openvswitch: Fixed nd target mask field in the flow dump. KVM: x86/mmu: do compare-and-exchange of gPTE via the user address can: m_can: m_can_tx_handler(): fix use after free of skb can: usb_8dev: usb_8dev_start_xmit(): fix double dev_kfree_skb() in error path coredump: Snapshot the vmas in do_coredump coredump: Remove the WARN_ON in dump_vma_snapshot coredump/elf: Pass coredump_params into fill_note_info coredump: Use the vma snapshot in fill_files_note arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones PCI: xgene: Revert "PCI: xgene: Use inbound resources for setup" Linux 5.10.110 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I12fbe227793dd40c0582588e1700cf88cafd0ac6
1670 lines
43 KiB
C
1670 lines
43 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Based on arch/arm/mm/mmu.c
|
|
*
|
|
* Copyright (C) 1995-2005 Russell King
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
*/
|
|
|
|
#include <linux/cache.h>
|
|
#include <linux/export.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/init.h>
|
|
#include <linux/ioport.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/libfdt.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/nodemask.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/memory.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/io.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <asm/barrier.h>
|
|
#include <asm/cputype.h>
|
|
#include <asm/fixmap.h>
|
|
#include <asm/kasan.h>
|
|
#include <asm/kernel-pgtable.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/setup.h>
|
|
#include <linux/sizes.h>
|
|
#include <asm/tlb.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/ptdump.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/pgalloc.h>
|
|
|
|
#define NO_BLOCK_MAPPINGS BIT(0)
|
|
#define NO_CONT_MAPPINGS BIT(1)
|
|
|
|
u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
|
|
u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
|
|
|
|
u64 __section(".mmuoff.data.write") vabits_actual;
|
|
EXPORT_SYMBOL(vabits_actual);
|
|
|
|
u64 kimage_voffset __ro_after_init;
|
|
EXPORT_SYMBOL(kimage_voffset);
|
|
|
|
/*
|
|
* Empty_zero_page is a special page that is used for zero-initialized data
|
|
* and COW.
|
|
*/
|
|
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
|
|
EXPORT_SYMBOL(empty_zero_page);
|
|
|
|
static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
|
|
static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
|
|
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
|
|
|
|
static DEFINE_SPINLOCK(swapper_pgdir_lock);
|
|
static DEFINE_MUTEX(fixmap_lock);
|
|
|
|
void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
|
|
{
|
|
pgd_t *fixmap_pgdp;
|
|
|
|
spin_lock(&swapper_pgdir_lock);
|
|
fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp));
|
|
WRITE_ONCE(*fixmap_pgdp, pgd);
|
|
/*
|
|
* We need dsb(ishst) here to ensure the page-table-walker sees
|
|
* our new entry before set_p?d() returns. The fixmap's
|
|
* flush_tlb_kernel_range() via clear_fixmap() does this for us.
|
|
*/
|
|
pgd_clear_fixmap();
|
|
spin_unlock(&swapper_pgdir_lock);
|
|
}
|
|
|
|
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
unsigned long size, pgprot_t vma_prot)
|
|
{
|
|
if (!pfn_valid(pfn))
|
|
return pgprot_noncached(vma_prot);
|
|
else if (file->f_flags & O_SYNC)
|
|
return pgprot_writecombine(vma_prot);
|
|
return vma_prot;
|
|
}
|
|
EXPORT_SYMBOL(phys_mem_access_prot);
|
|
|
|
static phys_addr_t __init early_pgtable_alloc(int shift)
|
|
{
|
|
phys_addr_t phys;
|
|
void *ptr;
|
|
|
|
phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
|
|
if (!phys)
|
|
panic("Failed to allocate page table page\n");
|
|
|
|
/*
|
|
* The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
|
|
* slot will be free, so we can (ab)use the FIX_PTE slot to initialise
|
|
* any level of table.
|
|
*/
|
|
ptr = pte_set_fixmap(phys);
|
|
|
|
memset(ptr, 0, PAGE_SIZE);
|
|
|
|
/*
|
|
* Implicit barriers also ensure the zeroed page is visible to the page
|
|
* table walker
|
|
*/
|
|
pte_clear_fixmap();
|
|
|
|
return phys;
|
|
}
|
|
|
|
static bool pgattr_change_is_safe(u64 old, u64 new)
|
|
{
|
|
/*
|
|
* The following mapping attributes may be updated in live
|
|
* kernel mappings without the need for break-before-make.
|
|
*/
|
|
pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
|
|
|
|
/* creating or taking down mappings is always safe */
|
|
if (old == 0 || new == 0)
|
|
return true;
|
|
|
|
/* live contiguous mappings may not be manipulated at all */
|
|
if ((old | new) & PTE_CONT)
|
|
return false;
|
|
|
|
/* Transitioning from Non-Global to Global is unsafe */
|
|
if (old & ~new & PTE_NG)
|
|
return false;
|
|
|
|
/*
|
|
* Changing the memory type between Normal and Normal-Tagged is safe
|
|
* since Tagged is considered a permission attribute from the
|
|
* mismatched attribute aliases perspective.
|
|
*/
|
|
if (((old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) ||
|
|
(old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)) &&
|
|
((new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) ||
|
|
(new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)))
|
|
mask |= PTE_ATTRINDX_MASK;
|
|
|
|
return ((old ^ new) & ~mask) == 0;
|
|
}
|
|
|
|
static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
|
phys_addr_t phys, pgprot_t prot)
|
|
{
|
|
pte_t *ptep;
|
|
|
|
ptep = pte_set_fixmap_offset(pmdp, addr);
|
|
do {
|
|
pte_t old_pte = READ_ONCE(*ptep);
|
|
|
|
set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
|
|
|
|
/*
|
|
* After the PTE entry has been populated once, we
|
|
* only allow updates to the permission attributes.
|
|
*/
|
|
BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
|
|
READ_ONCE(pte_val(*ptep))));
|
|
|
|
phys += PAGE_SIZE;
|
|
} while (ptep++, addr += PAGE_SIZE, addr != end);
|
|
|
|
pte_clear_fixmap();
|
|
}
|
|
|
|
static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
|
|
unsigned long end, phys_addr_t phys,
|
|
pgprot_t prot,
|
|
phys_addr_t (*pgtable_alloc)(int),
|
|
int flags)
|
|
{
|
|
unsigned long next;
|
|
pmd_t pmd = READ_ONCE(*pmdp);
|
|
|
|
BUG_ON(pmd_sect(pmd));
|
|
if (pmd_none(pmd)) {
|
|
phys_addr_t pte_phys;
|
|
BUG_ON(!pgtable_alloc);
|
|
pte_phys = pgtable_alloc(PAGE_SHIFT);
|
|
__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
|
|
pmd = READ_ONCE(*pmdp);
|
|
}
|
|
BUG_ON(pmd_bad(pmd));
|
|
|
|
do {
|
|
pgprot_t __prot = prot;
|
|
|
|
next = pte_cont_addr_end(addr, end);
|
|
|
|
/* use a contiguous mapping if the range is suitably aligned */
|
|
if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) &&
|
|
(flags & NO_CONT_MAPPINGS) == 0)
|
|
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
|
|
|
|
init_pte(pmdp, addr, next, phys, __prot);
|
|
|
|
phys += next - addr;
|
|
} while (addr = next, addr != end);
|
|
}
|
|
|
|
static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
|
|
phys_addr_t phys, pgprot_t prot,
|
|
phys_addr_t (*pgtable_alloc)(int), int flags)
|
|
{
|
|
unsigned long next;
|
|
pmd_t *pmdp;
|
|
|
|
pmdp = pmd_set_fixmap_offset(pudp, addr);
|
|
do {
|
|
pmd_t old_pmd = READ_ONCE(*pmdp);
|
|
|
|
next = pmd_addr_end(addr, end);
|
|
|
|
/* try section mapping first */
|
|
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
|
|
(flags & NO_BLOCK_MAPPINGS) == 0) {
|
|
pmd_set_huge(pmdp, phys, prot);
|
|
|
|
/*
|
|
* After the PMD entry has been populated once, we
|
|
* only allow updates to the permission attributes.
|
|
*/
|
|
BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
|
|
READ_ONCE(pmd_val(*pmdp))));
|
|
} else {
|
|
alloc_init_cont_pte(pmdp, addr, next, phys, prot,
|
|
pgtable_alloc, flags);
|
|
|
|
BUG_ON(pmd_val(old_pmd) != 0 &&
|
|
pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
|
|
}
|
|
phys += next - addr;
|
|
} while (pmdp++, addr = next, addr != end);
|
|
|
|
pmd_clear_fixmap();
|
|
}
|
|
|
|
static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
|
|
unsigned long end, phys_addr_t phys,
|
|
pgprot_t prot,
|
|
phys_addr_t (*pgtable_alloc)(int), int flags)
|
|
{
|
|
unsigned long next;
|
|
pud_t pud = READ_ONCE(*pudp);
|
|
|
|
/*
|
|
* Check for initial section mappings in the pgd/pud.
|
|
*/
|
|
BUG_ON(pud_sect(pud));
|
|
if (pud_none(pud)) {
|
|
phys_addr_t pmd_phys;
|
|
BUG_ON(!pgtable_alloc);
|
|
pmd_phys = pgtable_alloc(PMD_SHIFT);
|
|
__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
|
|
pud = READ_ONCE(*pudp);
|
|
}
|
|
BUG_ON(pud_bad(pud));
|
|
|
|
do {
|
|
pgprot_t __prot = prot;
|
|
|
|
next = pmd_cont_addr_end(addr, end);
|
|
|
|
/* use a contiguous mapping if the range is suitably aligned */
|
|
if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) &&
|
|
(flags & NO_CONT_MAPPINGS) == 0)
|
|
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
|
|
|
|
init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
|
|
|
|
phys += next - addr;
|
|
} while (addr = next, addr != end);
|
|
}
|
|
|
|
static inline bool use_1G_block(unsigned long addr, unsigned long next,
|
|
unsigned long phys)
|
|
{
|
|
if (PAGE_SHIFT != 12)
|
|
return false;
|
|
|
|
if (((addr | next | phys) & ~PUD_MASK) != 0)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
|
|
phys_addr_t phys, pgprot_t prot,
|
|
phys_addr_t (*pgtable_alloc)(int),
|
|
int flags)
|
|
{
|
|
unsigned long next;
|
|
pud_t *pudp;
|
|
p4d_t *p4dp = p4d_offset(pgdp, addr);
|
|
p4d_t p4d = READ_ONCE(*p4dp);
|
|
|
|
if (p4d_none(p4d)) {
|
|
phys_addr_t pud_phys;
|
|
BUG_ON(!pgtable_alloc);
|
|
pud_phys = pgtable_alloc(PUD_SHIFT);
|
|
__p4d_populate(p4dp, pud_phys, PUD_TYPE_TABLE);
|
|
p4d = READ_ONCE(*p4dp);
|
|
}
|
|
BUG_ON(p4d_bad(p4d));
|
|
|
|
/*
|
|
* No need for locking during early boot. And it doesn't work as
|
|
* expected with KASLR enabled.
|
|
*/
|
|
if (system_state != SYSTEM_BOOTING)
|
|
mutex_lock(&fixmap_lock);
|
|
pudp = pud_set_fixmap_offset(p4dp, addr);
|
|
do {
|
|
pud_t old_pud = READ_ONCE(*pudp);
|
|
|
|
next = pud_addr_end(addr, end);
|
|
|
|
/*
|
|
* For 4K granule only, attempt to put down a 1GB block
|
|
*/
|
|
if (use_1G_block(addr, next, phys) &&
|
|
(flags & NO_BLOCK_MAPPINGS) == 0) {
|
|
pud_set_huge(pudp, phys, prot);
|
|
|
|
/*
|
|
* After the PUD entry has been populated once, we
|
|
* only allow updates to the permission attributes.
|
|
*/
|
|
BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
|
|
READ_ONCE(pud_val(*pudp))));
|
|
} else {
|
|
alloc_init_cont_pmd(pudp, addr, next, phys, prot,
|
|
pgtable_alloc, flags);
|
|
|
|
BUG_ON(pud_val(old_pud) != 0 &&
|
|
pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
|
|
}
|
|
phys += next - addr;
|
|
} while (pudp++, addr = next, addr != end);
|
|
|
|
pud_clear_fixmap();
|
|
if (system_state != SYSTEM_BOOTING)
|
|
mutex_unlock(&fixmap_lock);
|
|
}
|
|
|
|
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
|
|
unsigned long virt, phys_addr_t size,
|
|
pgprot_t prot,
|
|
phys_addr_t (*pgtable_alloc)(int),
|
|
int flags)
|
|
{
|
|
unsigned long addr, end, next;
|
|
pgd_t *pgdp = pgd_offset_pgd(pgdir, virt);
|
|
|
|
/*
|
|
* If the virtual and physical address don't have the same offset
|
|
* within a page, we cannot map the region as the caller expects.
|
|
*/
|
|
if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
|
|
return;
|
|
|
|
phys &= PAGE_MASK;
|
|
addr = virt & PAGE_MASK;
|
|
end = PAGE_ALIGN(virt + size);
|
|
|
|
do {
|
|
next = pgd_addr_end(addr, end);
|
|
alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
|
|
flags);
|
|
phys += next - addr;
|
|
} while (pgdp++, addr = next, addr != end);
|
|
}
|
|
|
|
static phys_addr_t __pgd_pgtable_alloc(int shift)
|
|
{
|
|
void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
|
|
BUG_ON(!ptr);
|
|
|
|
/* Ensure the zeroed page is visible to the page table walker */
|
|
dsb(ishst);
|
|
return __pa(ptr);
|
|
}
|
|
|
|
static phys_addr_t pgd_pgtable_alloc(int shift)
|
|
{
|
|
phys_addr_t pa = __pgd_pgtable_alloc(shift);
|
|
|
|
/*
|
|
* Call proper page table ctor in case later we need to
|
|
* call core mm functions like apply_to_page_range() on
|
|
* this pre-allocated page table.
|
|
*
|
|
* We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is
|
|
* folded, and if so pgtable_pmd_page_ctor() becomes nop.
|
|
*/
|
|
if (shift == PAGE_SHIFT)
|
|
BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa)));
|
|
else if (shift == PMD_SHIFT)
|
|
BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa)));
|
|
|
|
return pa;
|
|
}
|
|
|
|
/*
|
|
* This function can only be used to modify existing table entries,
|
|
* without allocating new levels of table. Note that this permits the
|
|
* creation of new section or page entries.
|
|
*/
|
|
static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
|
|
phys_addr_t size, pgprot_t prot)
|
|
{
|
|
if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
|
|
pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
|
|
&phys, virt);
|
|
return;
|
|
}
|
|
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
|
|
NO_CONT_MAPPINGS);
|
|
}
|
|
|
|
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
|
|
unsigned long virt, phys_addr_t size,
|
|
pgprot_t prot, bool page_mappings_only)
|
|
{
|
|
int flags = 0;
|
|
|
|
BUG_ON(mm == &init_mm);
|
|
|
|
if (page_mappings_only)
|
|
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
|
|
|
|
__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
|
|
pgd_pgtable_alloc, flags);
|
|
}
|
|
|
|
static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
|
|
phys_addr_t size, pgprot_t prot)
|
|
{
|
|
if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
|
|
pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n",
|
|
&phys, virt);
|
|
return;
|
|
}
|
|
|
|
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
|
|
NO_CONT_MAPPINGS);
|
|
|
|
/* flush the TLBs after updating live kernel mappings */
|
|
flush_tlb_kernel_range(virt, virt + size);
|
|
}
|
|
|
|
static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
|
|
phys_addr_t end, pgprot_t prot, int flags)
|
|
{
|
|
__create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
|
|
prot, early_pgtable_alloc, flags);
|
|
}
|
|
|
|
void __init mark_linear_text_alias_ro(void)
|
|
{
|
|
/*
|
|
* Remove the write permissions from the linear alias of .text/.rodata
|
|
*/
|
|
update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
|
|
(unsigned long)__init_begin - (unsigned long)_text,
|
|
PAGE_KERNEL_RO);
|
|
}
|
|
|
|
static bool crash_mem_map __initdata;
|
|
|
|
static int __init enable_crash_mem_map(char *arg)
|
|
{
|
|
/*
|
|
* Proper parameter parsing is done by reserve_crashkernel(). We only
|
|
* need to know if the linear map has to avoid block mappings so that
|
|
* the crashkernel reservations can be unmapped later.
|
|
*/
|
|
crash_mem_map = true;
|
|
|
|
return 0;
|
|
}
|
|
early_param("crashkernel", enable_crash_mem_map);
|
|
|
|
static void __init map_mem(pgd_t *pgdp)
|
|
{
|
|
phys_addr_t kernel_start = __pa_symbol(_text);
|
|
phys_addr_t kernel_end = __pa_symbol(__init_begin);
|
|
phys_addr_t start, end;
|
|
int flags = 0;
|
|
u64 i;
|
|
|
|
if (rodata_full || debug_pagealloc_enabled() ||
|
|
IS_ENABLED(CONFIG_KFENCE))
|
|
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
|
|
|
|
/*
|
|
* Take care not to create a writable alias for the
|
|
* read-only text and rodata sections of the kernel image.
|
|
* So temporarily mark them as NOMAP to skip mappings in
|
|
* the following for-loop
|
|
*/
|
|
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
|
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
|
if (crash_mem_map) {
|
|
if (IS_ENABLED(CONFIG_ZONE_DMA) ||
|
|
IS_ENABLED(CONFIG_ZONE_DMA32))
|
|
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
|
|
else if (crashk_res.end)
|
|
memblock_mark_nomap(crashk_res.start,
|
|
resource_size(&crashk_res));
|
|
}
|
|
#endif
|
|
|
|
/* map all the memory banks */
|
|
for_each_mem_range(i, &start, &end) {
|
|
if (start >= end)
|
|
break;
|
|
/*
|
|
* The linear map must allow allocation tags reading/writing
|
|
* if MTE is present. Otherwise, it has the same attributes as
|
|
* PAGE_KERNEL.
|
|
*/
|
|
__map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL),
|
|
flags);
|
|
}
|
|
|
|
/*
|
|
* Map the linear alias of the [_text, __init_begin) interval
|
|
* as non-executable now, and remove the write permission in
|
|
* mark_linear_text_alias_ro() below (which will be called after
|
|
* alternative patching has completed). This makes the contents
|
|
* of the region accessible to subsystems such as hibernate,
|
|
* but protects it from inadvertent modification or execution.
|
|
* Note that contiguous mappings cannot be remapped in this way,
|
|
* so we should avoid them here.
|
|
*/
|
|
__map_memblock(pgdp, kernel_start, kernel_end,
|
|
PAGE_KERNEL, NO_CONT_MAPPINGS);
|
|
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
|
|
|
|
/*
|
|
* Use page-level mappings here so that we can shrink the region
|
|
* in page granularity and put back unused memory to buddy system
|
|
* through /sys/kernel/kexec_crash_size interface.
|
|
*/
|
|
#ifdef CONFIG_KEXEC_CORE
|
|
if (crash_mem_map &&
|
|
!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
|
|
if (crashk_res.end) {
|
|
__map_memblock(pgdp, crashk_res.start,
|
|
crashk_res.end + 1,
|
|
PAGE_KERNEL,
|
|
NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
|
|
memblock_clear_nomap(crashk_res.start,
|
|
resource_size(&crashk_res));
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void mark_rodata_ro(void)
|
|
{
|
|
unsigned long section_size;
|
|
|
|
/*
|
|
* mark .rodata as read only. Use __init_begin rather than __end_rodata
|
|
* to cover NOTES and EXCEPTION_TABLE.
|
|
*/
|
|
section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
|
|
update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
|
|
section_size, PAGE_KERNEL_RO);
|
|
|
|
debug_checkwx();
|
|
}
|
|
|
|
static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
|
|
pgprot_t prot, struct vm_struct *vma,
|
|
int flags, unsigned long vm_flags)
|
|
{
|
|
phys_addr_t pa_start = __pa_symbol(va_start);
|
|
unsigned long size = va_end - va_start;
|
|
|
|
BUG_ON(!PAGE_ALIGNED(pa_start));
|
|
BUG_ON(!PAGE_ALIGNED(size));
|
|
|
|
__create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
|
|
early_pgtable_alloc, flags);
|
|
|
|
if (!(vm_flags & VM_NO_GUARD))
|
|
size += PAGE_SIZE;
|
|
|
|
vma->addr = va_start;
|
|
vma->phys_addr = pa_start;
|
|
vma->size = size;
|
|
vma->flags = VM_MAP | vm_flags;
|
|
vma->caller = __builtin_return_address(0);
|
|
|
|
vm_area_add_early(vma);
|
|
}
|
|
|
|
static int __init parse_rodata(char *arg)
|
|
{
|
|
int ret = strtobool(arg, &rodata_enabled);
|
|
if (!ret) {
|
|
rodata_full = false;
|
|
return 0;
|
|
}
|
|
|
|
/* permit 'full' in addition to boolean options */
|
|
if (strcmp(arg, "full"))
|
|
return -EINVAL;
|
|
|
|
rodata_enabled = true;
|
|
rodata_full = true;
|
|
return 0;
|
|
}
|
|
early_param("rodata", parse_rodata);
|
|
|
|
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
|
|
static int __init map_entry_trampoline(void)
|
|
{
|
|
int i;
|
|
|
|
pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
|
|
phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
|
|
|
|
/* The trampoline is always mapped and can therefore be global */
|
|
pgprot_val(prot) &= ~PTE_NG;
|
|
|
|
/* Map only the text into the trampoline page table */
|
|
memset(tramp_pg_dir, 0, PGD_SIZE);
|
|
__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
|
|
entry_tramp_text_size(), prot,
|
|
__pgd_pgtable_alloc, NO_BLOCK_MAPPINGS);
|
|
|
|
/* Map both the text and data into the kernel page table */
|
|
for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
|
|
__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
|
|
pa_start + i * PAGE_SIZE, prot);
|
|
|
|
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
|
|
extern char __entry_tramp_data_start[];
|
|
|
|
__set_fixmap(FIX_ENTRY_TRAMP_DATA,
|
|
__pa_symbol(__entry_tramp_data_start),
|
|
PAGE_KERNEL_RO);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
core_initcall(map_entry_trampoline);
|
|
#endif
|
|
|
|
/*
|
|
* Open coded check for BTI, only for use to determine configuration
|
|
* for early mappings for before the cpufeature code has run.
|
|
*/
|
|
static bool arm64_early_this_cpu_has_bti(void)
|
|
{
|
|
u64 pfr1;
|
|
|
|
if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
|
|
return false;
|
|
|
|
pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1);
|
|
return cpuid_feature_extract_unsigned_field(pfr1,
|
|
ID_AA64PFR1_BT_SHIFT);
|
|
}
|
|
|
|
/*
|
|
* Create fine-grained mappings for the kernel.
|
|
*/
|
|
static void __init map_kernel(pgd_t *pgdp)
|
|
{
|
|
static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
|
|
vmlinux_initdata, vmlinux_data;
|
|
|
|
/*
|
|
* External debuggers may need to write directly to the text
|
|
* mapping to install SW breakpoints. Allow this (only) when
|
|
* explicitly requested with rodata=off.
|
|
*/
|
|
pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
|
|
|
|
/*
|
|
* If we have a CPU that supports BTI and a kernel built for
|
|
* BTI then mark the kernel executable text as guarded pages
|
|
* now so we don't have to rewrite the page tables later.
|
|
*/
|
|
if (arm64_early_this_cpu_has_bti())
|
|
text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
|
|
|
|
/*
|
|
* Only rodata will be remapped with different permissions later on,
|
|
* all other segments are allowed to use contiguous mappings.
|
|
*/
|
|
map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
|
|
VM_NO_GUARD);
|
|
map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
|
|
&vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
|
|
map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
|
|
&vmlinux_inittext, 0, VM_NO_GUARD);
|
|
map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
|
|
&vmlinux_initdata, 0, VM_NO_GUARD);
|
|
map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
|
|
|
|
if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdp, FIXADDR_START)))) {
|
|
/*
|
|
* The fixmap falls in a separate pgd to the kernel, and doesn't
|
|
* live in the carveout for the swapper_pg_dir. We can simply
|
|
* re-use the existing dir for the fixmap.
|
|
*/
|
|
set_pgd(pgd_offset_pgd(pgdp, FIXADDR_START),
|
|
READ_ONCE(*pgd_offset_k(FIXADDR_START)));
|
|
} else if (CONFIG_PGTABLE_LEVELS > 3) {
|
|
pgd_t *bm_pgdp;
|
|
p4d_t *bm_p4dp;
|
|
pud_t *bm_pudp;
|
|
/*
|
|
* The fixmap shares its top level pgd entry with the kernel
|
|
* mapping. This can really only occur when we are running
|
|
* with 16k/4 levels, so we can simply reuse the pud level
|
|
* entry instead.
|
|
*/
|
|
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
|
|
bm_pgdp = pgd_offset_pgd(pgdp, FIXADDR_START);
|
|
bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_START);
|
|
bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_START);
|
|
pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd));
|
|
pud_clear_fixmap();
|
|
} else {
|
|
BUG();
|
|
}
|
|
|
|
kasan_copy_shadow(pgdp);
|
|
}
|
|
|
|
void __init paging_init(void)
|
|
{
|
|
pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
|
|
|
|
map_kernel(pgdp);
|
|
map_mem(pgdp);
|
|
|
|
pgd_clear_fixmap();
|
|
|
|
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
|
|
init_mm.pgd = swapper_pg_dir;
|
|
|
|
memblock_free(__pa_symbol(init_pg_dir),
|
|
__pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
|
|
|
|
memblock_allow_resize();
|
|
}
|
|
|
|
/*
|
|
* Check whether a kernel address is valid (derived from arch/x86/).
|
|
*/
|
|
int kern_addr_valid(unsigned long addr)
|
|
{
|
|
pgd_t *pgdp;
|
|
p4d_t *p4dp;
|
|
pud_t *pudp, pud;
|
|
pmd_t *pmdp, pmd;
|
|
pte_t *ptep, pte;
|
|
|
|
addr = arch_kasan_reset_tag(addr);
|
|
if ((((long)addr) >> VA_BITS) != -1UL)
|
|
return 0;
|
|
|
|
pgdp = pgd_offset_k(addr);
|
|
if (pgd_none(READ_ONCE(*pgdp)))
|
|
return 0;
|
|
|
|
p4dp = p4d_offset(pgdp, addr);
|
|
if (p4d_none(READ_ONCE(*p4dp)))
|
|
return 0;
|
|
|
|
pudp = pud_offset(p4dp, addr);
|
|
pud = READ_ONCE(*pudp);
|
|
if (pud_none(pud))
|
|
return 0;
|
|
|
|
if (pud_sect(pud))
|
|
return pfn_valid(pud_pfn(pud));
|
|
|
|
pmdp = pmd_offset(pudp, addr);
|
|
pmd = READ_ONCE(*pmdp);
|
|
if (pmd_none(pmd))
|
|
return 0;
|
|
|
|
if (pmd_sect(pmd))
|
|
return pfn_valid(pmd_pfn(pmd));
|
|
|
|
ptep = pte_offset_kernel(pmdp, addr);
|
|
pte = READ_ONCE(*ptep);
|
|
if (pte_none(pte))
|
|
return 0;
|
|
|
|
return pfn_valid(pte_pfn(pte));
|
|
}
|
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
static void free_hotplug_page_range(struct page *page, size_t size,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
if (altmap) {
|
|
vmem_altmap_free(altmap, size >> PAGE_SHIFT);
|
|
} else {
|
|
WARN_ON(PageReserved(page));
|
|
free_pages((unsigned long)page_address(page), get_order(size));
|
|
}
|
|
}
|
|
|
|
static void free_hotplug_pgtable_page(struct page *page)
|
|
{
|
|
free_hotplug_page_range(page, PAGE_SIZE, NULL);
|
|
}
|
|
|
|
static bool pgtable_range_aligned(unsigned long start, unsigned long end,
|
|
unsigned long floor, unsigned long ceiling,
|
|
unsigned long mask)
|
|
{
|
|
start &= mask;
|
|
if (start < floor)
|
|
return false;
|
|
|
|
if (ceiling) {
|
|
ceiling &= mask;
|
|
if (!ceiling)
|
|
return false;
|
|
}
|
|
|
|
if (end - 1 > ceiling - 1)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
|
|
unsigned long end, bool free_mapped,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
pte_t *ptep, pte;
|
|
|
|
do {
|
|
ptep = pte_offset_kernel(pmdp, addr);
|
|
pte = READ_ONCE(*ptep);
|
|
if (pte_none(pte))
|
|
continue;
|
|
|
|
WARN_ON(!pte_present(pte));
|
|
pte_clear(&init_mm, addr, ptep);
|
|
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
|
|
if (free_mapped)
|
|
free_hotplug_page_range(pte_page(pte),
|
|
PAGE_SIZE, altmap);
|
|
} while (addr += PAGE_SIZE, addr < end);
|
|
}
|
|
|
|
static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
|
|
unsigned long end, bool free_mapped,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long next;
|
|
pmd_t *pmdp, pmd;
|
|
|
|
do {
|
|
next = pmd_addr_end(addr, end);
|
|
pmdp = pmd_offset(pudp, addr);
|
|
pmd = READ_ONCE(*pmdp);
|
|
if (pmd_none(pmd))
|
|
continue;
|
|
|
|
WARN_ON(!pmd_present(pmd));
|
|
if (pmd_sect(pmd)) {
|
|
pmd_clear(pmdp);
|
|
|
|
/*
|
|
* One TLBI should be sufficient here as the PMD_SIZE
|
|
* range is mapped with a single block entry.
|
|
*/
|
|
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
|
|
if (free_mapped)
|
|
free_hotplug_page_range(pmd_page(pmd),
|
|
PMD_SIZE, altmap);
|
|
continue;
|
|
}
|
|
WARN_ON(!pmd_table(pmd));
|
|
unmap_hotplug_pte_range(pmdp, addr, next, free_mapped, altmap);
|
|
} while (addr = next, addr < end);
|
|
}
|
|
|
|
static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr,
|
|
unsigned long end, bool free_mapped,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long next;
|
|
pud_t *pudp, pud;
|
|
|
|
do {
|
|
next = pud_addr_end(addr, end);
|
|
pudp = pud_offset(p4dp, addr);
|
|
pud = READ_ONCE(*pudp);
|
|
if (pud_none(pud))
|
|
continue;
|
|
|
|
WARN_ON(!pud_present(pud));
|
|
if (pud_sect(pud)) {
|
|
pud_clear(pudp);
|
|
|
|
/*
|
|
* One TLBI should be sufficient here as the PUD_SIZE
|
|
* range is mapped with a single block entry.
|
|
*/
|
|
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
|
|
if (free_mapped)
|
|
free_hotplug_page_range(pud_page(pud),
|
|
PUD_SIZE, altmap);
|
|
continue;
|
|
}
|
|
WARN_ON(!pud_table(pud));
|
|
unmap_hotplug_pmd_range(pudp, addr, next, free_mapped, altmap);
|
|
} while (addr = next, addr < end);
|
|
}
|
|
|
|
static void unmap_hotplug_p4d_range(pgd_t *pgdp, unsigned long addr,
|
|
unsigned long end, bool free_mapped,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long next;
|
|
p4d_t *p4dp, p4d;
|
|
|
|
do {
|
|
next = p4d_addr_end(addr, end);
|
|
p4dp = p4d_offset(pgdp, addr);
|
|
p4d = READ_ONCE(*p4dp);
|
|
if (p4d_none(p4d))
|
|
continue;
|
|
|
|
WARN_ON(!p4d_present(p4d));
|
|
unmap_hotplug_pud_range(p4dp, addr, next, free_mapped, altmap);
|
|
} while (addr = next, addr < end);
|
|
}
|
|
|
|
static void unmap_hotplug_range(unsigned long addr, unsigned long end,
|
|
bool free_mapped, struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long next;
|
|
pgd_t *pgdp, pgd;
|
|
|
|
/*
|
|
* altmap can only be used as vmemmap mapping backing memory.
|
|
* In case the backing memory itself is not being freed, then
|
|
* altmap is irrelevant. Warn about this inconsistency when
|
|
* encountered.
|
|
*/
|
|
WARN_ON(!free_mapped && altmap);
|
|
|
|
do {
|
|
next = pgd_addr_end(addr, end);
|
|
pgdp = pgd_offset_k(addr);
|
|
pgd = READ_ONCE(*pgdp);
|
|
if (pgd_none(pgd))
|
|
continue;
|
|
|
|
WARN_ON(!pgd_present(pgd));
|
|
unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped, altmap);
|
|
} while (addr = next, addr < end);
|
|
}
|
|
|
|
static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
|
|
unsigned long end, unsigned long floor,
|
|
unsigned long ceiling)
|
|
{
|
|
pte_t *ptep, pte;
|
|
unsigned long i, start = addr;
|
|
|
|
do {
|
|
ptep = pte_offset_kernel(pmdp, addr);
|
|
pte = READ_ONCE(*ptep);
|
|
|
|
/*
|
|
* This is just a sanity check here which verifies that
|
|
* pte clearing has been done by earlier unmap loops.
|
|
*/
|
|
WARN_ON(!pte_none(pte));
|
|
} while (addr += PAGE_SIZE, addr < end);
|
|
|
|
if (!pgtable_range_aligned(start, end, floor, ceiling, PMD_MASK))
|
|
return;
|
|
|
|
/*
|
|
* Check whether we can free the pte page if the rest of the
|
|
* entries are empty. Overlap with other regions have been
|
|
* handled by the floor/ceiling check.
|
|
*/
|
|
ptep = pte_offset_kernel(pmdp, 0UL);
|
|
for (i = 0; i < PTRS_PER_PTE; i++) {
|
|
if (!pte_none(READ_ONCE(ptep[i])))
|
|
return;
|
|
}
|
|
|
|
pmd_clear(pmdp);
|
|
__flush_tlb_kernel_pgtable(start);
|
|
free_hotplug_pgtable_page(virt_to_page(ptep));
|
|
}
|
|
|
|
static void free_empty_pmd_table(pud_t *pudp, unsigned long addr,
|
|
unsigned long end, unsigned long floor,
|
|
unsigned long ceiling)
|
|
{
|
|
pmd_t *pmdp, pmd;
|
|
unsigned long i, next, start = addr;
|
|
|
|
do {
|
|
next = pmd_addr_end(addr, end);
|
|
pmdp = pmd_offset(pudp, addr);
|
|
pmd = READ_ONCE(*pmdp);
|
|
if (pmd_none(pmd))
|
|
continue;
|
|
|
|
WARN_ON(!pmd_present(pmd) || !pmd_table(pmd) || pmd_sect(pmd));
|
|
free_empty_pte_table(pmdp, addr, next, floor, ceiling);
|
|
} while (addr = next, addr < end);
|
|
|
|
if (CONFIG_PGTABLE_LEVELS <= 2)
|
|
return;
|
|
|
|
if (!pgtable_range_aligned(start, end, floor, ceiling, PUD_MASK))
|
|
return;
|
|
|
|
/*
|
|
* Check whether we can free the pmd page if the rest of the
|
|
* entries are empty. Overlap with other regions have been
|
|
* handled by the floor/ceiling check.
|
|
*/
|
|
pmdp = pmd_offset(pudp, 0UL);
|
|
for (i = 0; i < PTRS_PER_PMD; i++) {
|
|
if (!pmd_none(READ_ONCE(pmdp[i])))
|
|
return;
|
|
}
|
|
|
|
pud_clear(pudp);
|
|
__flush_tlb_kernel_pgtable(start);
|
|
free_hotplug_pgtable_page(virt_to_page(pmdp));
|
|
}
|
|
|
|
static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr,
|
|
unsigned long end, unsigned long floor,
|
|
unsigned long ceiling)
|
|
{
|
|
pud_t *pudp, pud;
|
|
unsigned long i, next, start = addr;
|
|
|
|
do {
|
|
next = pud_addr_end(addr, end);
|
|
pudp = pud_offset(p4dp, addr);
|
|
pud = READ_ONCE(*pudp);
|
|
if (pud_none(pud))
|
|
continue;
|
|
|
|
WARN_ON(!pud_present(pud) || !pud_table(pud) || pud_sect(pud));
|
|
free_empty_pmd_table(pudp, addr, next, floor, ceiling);
|
|
} while (addr = next, addr < end);
|
|
|
|
if (CONFIG_PGTABLE_LEVELS <= 3)
|
|
return;
|
|
|
|
if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK))
|
|
return;
|
|
|
|
/*
|
|
* Check whether we can free the pud page if the rest of the
|
|
* entries are empty. Overlap with other regions have been
|
|
* handled by the floor/ceiling check.
|
|
*/
|
|
pudp = pud_offset(p4dp, 0UL);
|
|
for (i = 0; i < PTRS_PER_PUD; i++) {
|
|
if (!pud_none(READ_ONCE(pudp[i])))
|
|
return;
|
|
}
|
|
|
|
p4d_clear(p4dp);
|
|
__flush_tlb_kernel_pgtable(start);
|
|
free_hotplug_pgtable_page(virt_to_page(pudp));
|
|
}
|
|
|
|
static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr,
|
|
unsigned long end, unsigned long floor,
|
|
unsigned long ceiling)
|
|
{
|
|
unsigned long next;
|
|
p4d_t *p4dp, p4d;
|
|
|
|
do {
|
|
next = p4d_addr_end(addr, end);
|
|
p4dp = p4d_offset(pgdp, addr);
|
|
p4d = READ_ONCE(*p4dp);
|
|
if (p4d_none(p4d))
|
|
continue;
|
|
|
|
WARN_ON(!p4d_present(p4d));
|
|
free_empty_pud_table(p4dp, addr, next, floor, ceiling);
|
|
} while (addr = next, addr < end);
|
|
}
|
|
|
|
static void free_empty_tables(unsigned long addr, unsigned long end,
|
|
unsigned long floor, unsigned long ceiling)
|
|
{
|
|
unsigned long next;
|
|
pgd_t *pgdp, pgd;
|
|
|
|
do {
|
|
next = pgd_addr_end(addr, end);
|
|
pgdp = pgd_offset_k(addr);
|
|
pgd = READ_ONCE(*pgdp);
|
|
if (pgd_none(pgd))
|
|
continue;
|
|
|
|
WARN_ON(!pgd_present(pgd));
|
|
free_empty_p4d_table(pgdp, addr, next, floor, ceiling);
|
|
} while (addr = next, addr < end);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
|
#if !ARM64_SWAPPER_USES_SECTION_MAPS
|
|
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
return vmemmap_populate_basepages(start, end, node, altmap);
|
|
}
|
|
#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
|
|
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long addr = start;
|
|
unsigned long next;
|
|
pgd_t *pgdp;
|
|
p4d_t *p4dp;
|
|
pud_t *pudp;
|
|
pmd_t *pmdp;
|
|
|
|
do {
|
|
next = pmd_addr_end(addr, end);
|
|
|
|
pgdp = vmemmap_pgd_populate(addr, node);
|
|
if (!pgdp)
|
|
return -ENOMEM;
|
|
|
|
p4dp = vmemmap_p4d_populate(pgdp, addr, node);
|
|
if (!p4dp)
|
|
return -ENOMEM;
|
|
|
|
pudp = vmemmap_pud_populate(p4dp, addr, node);
|
|
if (!pudp)
|
|
return -ENOMEM;
|
|
|
|
pmdp = pmd_offset(pudp, addr);
|
|
if (pmd_none(READ_ONCE(*pmdp))) {
|
|
void *p = NULL;
|
|
|
|
p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
|
|
if (!p) {
|
|
if (vmemmap_populate_basepages(addr, next, node, altmap))
|
|
return -ENOMEM;
|
|
continue;
|
|
}
|
|
|
|
pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
|
|
} else
|
|
vmemmap_verify((pte_t *)pmdp, node, addr, next);
|
|
} while (addr = next, addr != end);
|
|
|
|
return 0;
|
|
}
|
|
#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */
|
|
void vmemmap_free(unsigned long start, unsigned long end,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
|
|
|
|
unmap_hotplug_range(start, end, true, altmap);
|
|
free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END);
|
|
#endif
|
|
}
|
|
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
|
|
|
|
static inline pud_t * fixmap_pud(unsigned long addr)
|
|
{
|
|
pgd_t *pgdp = pgd_offset_k(addr);
|
|
p4d_t *p4dp = p4d_offset(pgdp, addr);
|
|
p4d_t p4d = READ_ONCE(*p4dp);
|
|
|
|
BUG_ON(p4d_none(p4d) || p4d_bad(p4d));
|
|
|
|
return pud_offset_kimg(p4dp, addr);
|
|
}
|
|
|
|
static inline pmd_t * fixmap_pmd(unsigned long addr)
|
|
{
|
|
pud_t *pudp = fixmap_pud(addr);
|
|
pud_t pud = READ_ONCE(*pudp);
|
|
|
|
BUG_ON(pud_none(pud) || pud_bad(pud));
|
|
|
|
return pmd_offset_kimg(pudp, addr);
|
|
}
|
|
|
|
static inline pte_t * fixmap_pte(unsigned long addr)
|
|
{
|
|
return &bm_pte[pte_index(addr)];
|
|
}
|
|
|
|
/*
|
|
* The p*d_populate functions call virt_to_phys implicitly so they can't be used
|
|
* directly on kernel symbols (bm_p*d). This function is called too early to use
|
|
* lm_alias so __p*d_populate functions must be used to populate with the
|
|
* physical address from __pa_symbol.
|
|
*/
|
|
void __init early_fixmap_init(void)
|
|
{
|
|
pgd_t *pgdp;
|
|
p4d_t *p4dp, p4d;
|
|
pud_t *pudp;
|
|
pmd_t *pmdp;
|
|
unsigned long addr = FIXADDR_START;
|
|
|
|
pgdp = pgd_offset_k(addr);
|
|
p4dp = p4d_offset(pgdp, addr);
|
|
p4d = READ_ONCE(*p4dp);
|
|
if (CONFIG_PGTABLE_LEVELS > 3 &&
|
|
!(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) {
|
|
/*
|
|
* We only end up here if the kernel mapping and the fixmap
|
|
* share the top level pgd entry, which should only happen on
|
|
* 16k/4 levels configurations.
|
|
*/
|
|
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
|
|
pudp = pud_offset_kimg(p4dp, addr);
|
|
} else {
|
|
if (p4d_none(p4d))
|
|
__p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
|
|
pudp = fixmap_pud(addr);
|
|
}
|
|
if (pud_none(READ_ONCE(*pudp)))
|
|
__pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
|
|
pmdp = fixmap_pmd(addr);
|
|
__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
|
|
|
|
/*
|
|
* The boot-ioremap range spans multiple pmds, for which
|
|
* we are not prepared:
|
|
*/
|
|
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
|
|
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
|
|
|
|
if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
|
|
|| pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
|
|
WARN_ON(1);
|
|
pr_warn("pmdp %p != %p, %p\n",
|
|
pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
|
|
fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
|
|
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
|
|
fix_to_virt(FIX_BTMAP_BEGIN));
|
|
pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
|
|
fix_to_virt(FIX_BTMAP_END));
|
|
|
|
pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
|
|
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we
|
|
* ever need to use IPIs for TLB broadcasting, then we're in trouble here.
|
|
*/
|
|
void __set_fixmap(enum fixed_addresses idx,
|
|
phys_addr_t phys, pgprot_t flags)
|
|
{
|
|
unsigned long addr = __fix_to_virt(idx);
|
|
pte_t *ptep;
|
|
|
|
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
|
|
|
|
ptep = fixmap_pte(addr);
|
|
|
|
if (pgprot_val(flags)) {
|
|
set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
|
|
} else {
|
|
pte_clear(&init_mm, addr, ptep);
|
|
flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
|
|
}
|
|
}
|
|
|
|
void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
|
|
{
|
|
const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
|
|
int offset;
|
|
void *dt_virt;
|
|
|
|
/*
|
|
* Check whether the physical FDT address is set and meets the minimum
|
|
* alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
|
|
* at least 8 bytes so that we can always access the magic and size
|
|
* fields of the FDT header after mapping the first chunk, double check
|
|
* here if that is indeed the case.
|
|
*/
|
|
BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
|
|
if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
|
|
return NULL;
|
|
|
|
/*
|
|
* Make sure that the FDT region can be mapped without the need to
|
|
* allocate additional translation table pages, so that it is safe
|
|
* to call create_mapping_noalloc() this early.
|
|
*
|
|
* On 64k pages, the FDT will be mapped using PTEs, so we need to
|
|
* be in the same PMD as the rest of the fixmap.
|
|
* On 4k pages, we'll use section mappings for the FDT so we only
|
|
* have to be in the same PUD.
|
|
*/
|
|
BUILD_BUG_ON(dt_virt_base % SZ_2M);
|
|
|
|
BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
|
|
__fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
|
|
|
|
offset = dt_phys % SWAPPER_BLOCK_SIZE;
|
|
dt_virt = (void *)dt_virt_base + offset;
|
|
|
|
/* map the first chunk so we can read the size from the header */
|
|
create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
|
|
dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
|
|
|
|
if (fdt_magic(dt_virt) != FDT_MAGIC)
|
|
return NULL;
|
|
|
|
*size = fdt_totalsize(dt_virt);
|
|
if (*size > MAX_FDT_SIZE)
|
|
return NULL;
|
|
|
|
if (offset + *size > SWAPPER_BLOCK_SIZE)
|
|
create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
|
|
round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
|
|
|
|
return dt_virt;
|
|
}
|
|
|
|
int __init arch_ioremap_p4d_supported(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
int __init arch_ioremap_pud_supported(void)
|
|
{
|
|
/*
|
|
* Only 4k granule supports level 1 block mappings.
|
|
* SW table walks can't handle removal of intermediate entries.
|
|
*/
|
|
return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
|
|
!IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
|
|
}
|
|
|
|
int __init arch_ioremap_pmd_supported(void)
|
|
{
|
|
/* See arch_ioremap_pud_supported() */
|
|
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
|
|
}
|
|
|
|
int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
|
|
{
|
|
pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot));
|
|
|
|
/* Only allow permission changes for now */
|
|
if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
|
|
pud_val(new_pud)))
|
|
return 0;
|
|
|
|
VM_BUG_ON(phys & ~PUD_MASK);
|
|
set_pud(pudp, new_pud);
|
|
return 1;
|
|
}
|
|
|
|
int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
|
|
{
|
|
pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot));
|
|
|
|
/* Only allow permission changes for now */
|
|
if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
|
|
pmd_val(new_pmd)))
|
|
return 0;
|
|
|
|
VM_BUG_ON(phys & ~PMD_MASK);
|
|
set_pmd(pmdp, new_pmd);
|
|
return 1;
|
|
}
|
|
|
|
int pud_clear_huge(pud_t *pudp)
|
|
{
|
|
if (!pud_sect(READ_ONCE(*pudp)))
|
|
return 0;
|
|
pud_clear(pudp);
|
|
return 1;
|
|
}
|
|
|
|
int pmd_clear_huge(pmd_t *pmdp)
|
|
{
|
|
if (!pmd_sect(READ_ONCE(*pmdp)))
|
|
return 0;
|
|
pmd_clear(pmdp);
|
|
return 1;
|
|
}
|
|
|
|
int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
|
|
{
|
|
pte_t *table;
|
|
pmd_t pmd;
|
|
|
|
pmd = READ_ONCE(*pmdp);
|
|
|
|
if (!pmd_table(pmd)) {
|
|
VM_WARN_ON(1);
|
|
return 1;
|
|
}
|
|
|
|
table = pte_offset_kernel(pmdp, addr);
|
|
pmd_clear(pmdp);
|
|
__flush_tlb_kernel_pgtable(addr);
|
|
pte_free_kernel(NULL, table);
|
|
return 1;
|
|
}
|
|
|
|
int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
|
|
{
|
|
pmd_t *table;
|
|
pmd_t *pmdp;
|
|
pud_t pud;
|
|
unsigned long next, end;
|
|
|
|
pud = READ_ONCE(*pudp);
|
|
|
|
if (!pud_table(pud)) {
|
|
VM_WARN_ON(1);
|
|
return 1;
|
|
}
|
|
|
|
table = pmd_offset(pudp, addr);
|
|
pmdp = table;
|
|
next = addr;
|
|
end = addr + PUD_SIZE;
|
|
do {
|
|
pmd_free_pte_page(pmdp, next);
|
|
} while (pmdp++, next += PMD_SIZE, next != end);
|
|
|
|
pud_clear(pudp);
|
|
__flush_tlb_kernel_pgtable(addr);
|
|
pmd_free(NULL, table);
|
|
return 1;
|
|
}
|
|
|
|
int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
|
|
{
|
|
return 0; /* Don't attempt a block mapping */
|
|
}
|
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
|
|
{
|
|
unsigned long end = start + size;
|
|
|
|
WARN_ON(pgdir != init_mm.pgd);
|
|
WARN_ON((start < PAGE_OFFSET) || (end > PAGE_END));
|
|
|
|
unmap_hotplug_range(start, end, false, NULL);
|
|
free_empty_tables(start, end, PAGE_OFFSET, PAGE_END);
|
|
}
|
|
|
|
static bool inside_linear_region(u64 start, u64 size)
|
|
{
|
|
u64 start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual));
|
|
u64 end_linear_pa = __pa(PAGE_END - 1);
|
|
|
|
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
|
|
/*
|
|
* Check for a wrap, it is possible because of randomized linear
|
|
* mapping the start physical address is actually bigger than
|
|
* the end physical address. In this case set start to zero
|
|
* because [0, end_linear_pa] range must still be able to cover
|
|
* all addressable physical addresses.
|
|
*/
|
|
if (start_linear_pa > end_linear_pa)
|
|
start_linear_pa = 0;
|
|
}
|
|
|
|
WARN_ON(start_linear_pa > end_linear_pa);
|
|
|
|
/*
|
|
* Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)]
|
|
* accommodating both its ends but excluding PAGE_END. Max physical
|
|
* range which can be mapped inside this linear mapping range, must
|
|
* also be derived from its end points.
|
|
*/
|
|
return start >= start_linear_pa && (start + size - 1) <= end_linear_pa;
|
|
}
|
|
|
|
int arch_add_memory(int nid, u64 start, u64 size,
|
|
struct mhp_params *params)
|
|
{
|
|
int ret, flags = 0;
|
|
|
|
if (!inside_linear_region(start, size)) {
|
|
pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* KFENCE requires linear map to be mapped at page granularity, so that
|
|
* it is possible to protect/unprotect single pages in the KFENCE pool.
|
|
*/
|
|
if (rodata_full || debug_pagealloc_enabled() ||
|
|
IS_ENABLED(CONFIG_KFENCE))
|
|
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
|
|
|
|
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
|
|
size, params->pgprot, __pgd_pgtable_alloc,
|
|
flags);
|
|
|
|
memblock_clear_nomap(start, size);
|
|
|
|
ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
|
|
params);
|
|
if (ret)
|
|
__remove_pgd_mapping(swapper_pg_dir,
|
|
__phys_to_virt(start), size);
|
|
else {
|
|
max_pfn = PFN_UP(start + size);
|
|
max_low_pfn = max_pfn;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void arch_remove_memory(int nid, u64 start, u64 size,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long start_pfn = start >> PAGE_SHIFT;
|
|
unsigned long nr_pages = size >> PAGE_SHIFT;
|
|
|
|
__remove_pages(start_pfn, nr_pages, altmap);
|
|
__remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size);
|
|
}
|
|
|
|
int check_range_driver_managed(u64 start, u64 size, const char *resource_name)
|
|
{
|
|
struct mem_section *ms;
|
|
unsigned long pfn = __phys_to_pfn(start);
|
|
unsigned long end_pfn = __phys_to_pfn(start + size);
|
|
struct resource *res;
|
|
unsigned long flags;
|
|
|
|
res = lookup_resource(&iomem_resource, start);
|
|
if (!res) {
|
|
pr_err("%s: couldn't find memory resource for start 0x%llx\n",
|
|
__func__, start);
|
|
return -EINVAL;
|
|
}
|
|
|
|
flags = res->flags;
|
|
|
|
if (!(flags & IORESOURCE_SYSRAM_DRIVER_MANAGED) ||
|
|
strstr(resource_name, "System RAM (") != resource_name)
|
|
return -EINVAL;
|
|
|
|
for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
|
|
ms = __pfn_to_section(pfn);
|
|
if (early_section(ms))
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int populate_range_driver_managed(u64 start, u64 size,
|
|
const char *resource_name)
|
|
{
|
|
unsigned long virt = (unsigned long)phys_to_virt(start);
|
|
int flags = 0;
|
|
|
|
if (check_range_driver_managed(start, size, resource_name))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* When rodata_full is enabled, memory is mapped at page size granule,
|
|
* as opposed to block mapping.
|
|
*/
|
|
if (rodata_full || debug_pagealloc_enabled())
|
|
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
|
|
|
|
__create_pgd_mapping(init_mm.pgd, start, virt, size,
|
|
PAGE_KERNEL, NULL, flags);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(populate_range_driver_managed);
|
|
|
|
int depopulate_range_driver_managed(u64 start, u64 size,
|
|
const char *resource_name)
|
|
{
|
|
if (check_range_driver_managed(start, size, resource_name))
|
|
return -EINVAL;
|
|
|
|
unmap_hotplug_range(start, start + size, false, NULL);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(depopulate_range_driver_managed);
|
|
|
|
/*
|
|
* This memory hotplug notifier helps prevent boot memory from being
|
|
* inadvertently removed as it blocks pfn range offlining process in
|
|
* __offline_pages(). Hence this prevents both offlining as well as
|
|
* removal process for boot memory which is initially always online.
|
|
* In future if and when boot memory could be removed, this notifier
|
|
* should be dropped and free_hotplug_page_range() should handle any
|
|
* reserved pages allocated during boot.
|
|
*/
|
|
static int prevent_bootmem_remove_notifier(struct notifier_block *nb,
|
|
unsigned long action, void *data)
|
|
{
|
|
struct mem_section *ms;
|
|
struct memory_notify *arg = data;
|
|
unsigned long end_pfn = arg->start_pfn + arg->nr_pages;
|
|
unsigned long pfn = arg->start_pfn;
|
|
|
|
if (action != MEM_GOING_OFFLINE)
|
|
return NOTIFY_OK;
|
|
|
|
for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
|
|
ms = __pfn_to_section(pfn);
|
|
if (early_section(ms))
|
|
return NOTIFY_BAD;
|
|
}
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
static struct notifier_block prevent_bootmem_remove_nb = {
|
|
.notifier_call = prevent_bootmem_remove_notifier,
|
|
};
|
|
|
|
static int __init prevent_bootmem_remove_init(void)
|
|
{
|
|
return register_memory_notifier(&prevent_bootmem_remove_nb);
|
|
}
|
|
device_initcall(prevent_bootmem_remove_init);
|
|
#endif
|