diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index fcba4b61f9e8..f32878041ead 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -254,6 +254,7 @@ + @@ -301,6 +302,8 @@ + + @@ -320,6 +323,7 @@ + @@ -332,7 +336,10 @@ + + + @@ -1475,6 +1482,7 @@ + @@ -2540,6 +2548,8 @@ + + @@ -3156,6 +3166,7 @@ + @@ -3680,7 +3691,9 @@ + + @@ -4068,6 +4081,7 @@ + @@ -4093,12 +4107,14 @@ + + @@ -4108,6 +4124,7 @@ + @@ -5017,6 +5034,7 @@ + @@ -5466,6 +5484,7 @@ + @@ -5657,11 +5676,14 @@ + + + @@ -5746,6 +5768,7 @@ + @@ -5789,6 +5812,7 @@ + @@ -6214,6 +6238,8 @@ + + @@ -6239,6 +6265,7 @@ + @@ -6251,7 +6278,10 @@ + + + @@ -15148,89 +15178,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -16372,18 +16319,18 @@ - + - + - + - + - + @@ -20183,75 +20130,75 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -21469,7 +21416,7 @@ - + @@ -22192,7 +22139,7 @@ - + @@ -24682,6 +24629,23 @@ + + + + + + + + + + + + + + + + + @@ -25150,66 +25114,66 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -30810,12 +30774,12 @@ - + - + - + @@ -44337,81 +44301,81 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -46450,21 +46414,21 @@ - + - + - + - + - + - + @@ -48107,12 +48071,12 @@ - + - + - + @@ -49715,24 +49679,24 @@ - + - + - + - + - + - + - + @@ -51470,24 +51434,24 @@ - + - + - + - + - + - + - + @@ -56885,114 +56849,114 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -61014,96 +60978,96 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -63451,12 +63415,12 @@ - + - + - + @@ -67752,36 +67716,36 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -67951,14 +67915,7 @@ - - - - - - - - + @@ -80138,24 +80095,24 @@ - + - + - + - + - + - + - + @@ -80844,81 +80801,81 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -81192,6 +81149,7 @@ + @@ -84032,213 +83990,213 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -84879,7 +84837,7 @@ - + @@ -92270,12 +92228,12 @@ - + - + - + @@ -93122,7 +93080,6 @@ - @@ -94595,21 +94552,21 @@ - + - + - + - + - + - + @@ -96083,12 +96040,12 @@ - + - + - + @@ -98413,27 +98370,27 @@ - + - + - + - + - + - + - + - + @@ -108459,15 +108416,15 @@ - + - + - + - + @@ -113496,18 +113453,18 @@ - + - + - + - + - + @@ -113565,15 +113522,15 @@ - + - + - + - + @@ -114534,174 +114491,174 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -115211,15 +115168,15 @@ - + - + - + - + @@ -115593,9 +115550,9 @@ - + - + @@ -115918,49 +115875,49 @@ - - - - + + + + - - - + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - + + + + - - - - + + + + @@ -116808,7 +116765,7 @@ - + @@ -116818,12 +116775,12 @@ - - + + - - + + @@ -117072,6 +117029,10 @@ + + + + @@ -117333,6 +117294,19 @@ + + + + + + + + + + + + + @@ -117466,6 +117440,14 @@ + + + + + + + + @@ -117544,6 +117526,21 @@ + + + + + + + + + + + + + + + @@ -117553,6 +117550,12 @@ + + + + + + @@ -117931,10 +117934,10 @@ - - - - + + + + @@ -117989,17 +117992,17 @@ - - - - + + + + - - - - - + + + + + @@ -118292,27 +118295,27 @@ - - - - - - + - + - - - - - - + + + + + + + + + + + @@ -118422,12 +118425,12 @@ - - - - - - + + + + + + @@ -118502,17 +118505,17 @@ - - - - + + + + - - - - - + + + + + @@ -118620,12 +118623,12 @@ - - - - - - + + + + + + @@ -118644,10 +118647,10 @@ - - - - + + + + @@ -118665,10 +118668,10 @@ - - - - + + + + @@ -118759,18 +118762,18 @@ - - - - - - + + + + + + - - - - + + + + @@ -118827,10 +118830,10 @@ - - - - + + + + @@ -118907,10 +118910,10 @@ - - - - + + + + @@ -118921,11 +118924,11 @@ - - - - - + + + + + @@ -119018,9 +119021,9 @@ - - - + + + @@ -119211,9 +119214,9 @@ - - - + + + @@ -119286,10 +119289,10 @@ - - - - + + + + @@ -119315,15 +119318,15 @@ - - - + + + - - - - + + + + @@ -119338,9 +119341,9 @@ - - - + + + @@ -119488,10 +119491,10 @@ - - - - + + + + @@ -119545,9 +119548,9 @@ - - - + + + @@ -119951,6 +119954,8 @@ + + @@ -119976,6 +119981,7 @@ + @@ -119988,7 +119994,10 @@ + + + @@ -120054,7 +120063,7 @@ - + @@ -120062,8 +120071,8 @@ - - + + @@ -120107,10 +120116,10 @@ - - - - + + + + @@ -120128,7 +120137,7 @@ - + @@ -120141,8 +120150,8 @@ - - + + @@ -120160,15 +120169,15 @@ - + - + - + @@ -120184,9 +120193,9 @@ - - - + + + @@ -120196,7 +120205,7 @@ - + @@ -120210,9 +120219,9 @@ - + - + @@ -120230,7 +120239,7 @@ - + @@ -120271,7 +120280,7 @@ - + @@ -120284,17 +120293,17 @@ - + - - + + - + @@ -120319,7 +120328,7 @@ - + @@ -120328,7 +120337,7 @@ - + @@ -120809,12 +120818,12 @@ - - - - - - + + + + + + @@ -123214,8 +123223,8 @@ - - + + @@ -124474,6 +124483,11 @@ + + + + + @@ -126204,105 +126218,105 @@ - - - + + + - - - + + + - - - - - + + + + + - - - + + + - - - - - + + + + + - - - + + + - - - - - + + + + + - - + + - - - + + + - - + + - - - + + + - - - + + + - - - - + + + + - - + + - - + + - - + + - - - - + + + + - - + + - - + + - - - + + + @@ -130088,12 +130102,22 @@ - - - - + + + + + + + + + + + + + + @@ -130125,10 +130149,10 @@ - - - - + + + + @@ -133257,6 +133281,14 @@ + + + + + + + + @@ -135890,12 +135922,22 @@ + + + + + + + + + + @@ -136971,8 +137013,8 @@ - - + + @@ -137928,6 +137970,11 @@ + + + + + @@ -138060,6 +138107,12 @@ + + + + + + @@ -138087,6 +138140,10 @@ + + + + @@ -138130,6 +138187,11 @@ + + + + + @@ -139422,8 +139484,8 @@ - - + + @@ -140023,8 +140085,8 @@ - - + + @@ -140090,34 +140152,34 @@ - - - - + + + + - - - - + + + + - - - - - - + + + + + + - - - - - - - - + + + + + + + + @@ -141922,8 +141984,8 @@ - - + + @@ -142803,6 +142865,10 @@ + + + + @@ -144093,8 +144159,8 @@ - - + + @@ -144875,11 +144941,11 @@ - - - - - + + + + + @@ -145146,6 +145212,12 @@ + + + + + + @@ -146064,6 +146136,10 @@ + + + + @@ -146072,6 +146148,10 @@ + + + + @@ -146092,6 +146172,11 @@ + + + + + @@ -146500,6 +146585,15 @@ + + + + + + + + + @@ -146699,6 +146793,10 @@ + + + + diff --git a/android/abi_gki_aarch64_galaxy b/android/abi_gki_aarch64_galaxy index d5358ae912e2..b9540bf8b079 100644 --- a/android/abi_gki_aarch64_galaxy +++ b/android/abi_gki_aarch64_galaxy @@ -1901,6 +1901,8 @@ find_vpid finish_wait firmware_request_nowarn + fixed_phy_register + fixed_phy_unregister fixed_size_llseek flow_keys_basic_dissector flush_dcache_page @@ -2327,6 +2329,7 @@ irq_create_mapping_affinity irq_create_of_mapping irq_dispose_mapping + irq_domain_add_simple irq_domain_alloc_irqs_parent irq_domain_create_hierarchy irq_domain_free_irqs_common @@ -3023,6 +3026,7 @@ phy_ethtool_get_link_ksettings phy_ethtool_nway_reset phy_ethtool_set_link_ksettings + phy_ethtool_set_wol phy_exit phy_find_first phy_get_pause @@ -3034,9 +3038,12 @@ phy_power_off phy_power_on phy_print_status + phy_register_fixup_for_uid + phy_save_page phy_set_mode_ext phy_start phy_stop + phy_unregister_fixup_for_uid pick_highest_pushable_task pid_nr_ns pid_task @@ -4066,6 +4073,7 @@ ttm_tt_populate ttm_tt_set_placement_caching ttm_unmap_and_unpopulate_pages + tty_encode_baud_rate tty_flip_buffer_push tty_insert_flip_string_fixed_flag tty_kref_put @@ -4214,8 +4222,10 @@ usb_asmedia_modifyflowcontrol usb_assign_descriptors usb_autopm_get_interface + usb_autopm_get_interface_async usb_autopm_get_interface_no_resume usb_autopm_put_interface + usb_autopm_put_interface_async usb_bulk_msg usb_calc_bus_time usb_choose_configuration @@ -4293,6 +4303,7 @@ usb_ifnum_to_if usb_initialize_gadget usb_interface_id + usb_interrupt_msg usb_kill_urb usb_match_id usb_match_one_id diff --git a/android/abi_gki_aarch64_mtk b/android/abi_gki_aarch64_mtk index ee8da67b460e..1a3cc8ff7d30 100644 --- a/android/abi_gki_aarch64_mtk +++ b/android/abi_gki_aarch64_mtk @@ -3216,6 +3216,7 @@ update_devfreq usb_add_phy_dev usb_assign_descriptors + usb_clear_halt usb_copy_descriptors usb_ep_alloc_request usb_ep_autoconfig @@ -3239,6 +3240,7 @@ usb_phy_set_charger_current usb_remove_phy usb_role_switch_set_role + usb_unlink_urb v4l2_async_notifier_add_subdev v4l2_async_notifier_cleanup v4l2_async_subdev_notifier_register diff --git a/android/abi_gki_aarch64_oplus b/android/abi_gki_aarch64_oplus index 384bdbfa612b..4479f29e3b31 100644 --- a/android/abi_gki_aarch64_oplus +++ b/android/abi_gki_aarch64_oplus @@ -408,6 +408,7 @@ dev_fwnode __dev_get_by_index dev_get_by_index + dev_get_by_index_rcu dev_get_by_name dev_get_regmap dev_get_stats @@ -1658,7 +1659,9 @@ net_ratelimit nf_ct_attach nf_ct_delete + nf_register_net_hook nf_register_net_hooks + nf_unregister_net_hook nf_unregister_net_hooks nla_find nla_memcpy @@ -2286,6 +2289,7 @@ rtc_update_irq rtc_valid_tm rtnl_is_locked + __rtnl_link_unregister rtnl_lock rtnl_unlock runqueues diff --git a/android/abi_gki_aarch64_rockchip b/android/abi_gki_aarch64_rockchip index 5f59918af934..c1df5fc9548f 100644 --- a/android/abi_gki_aarch64_rockchip +++ b/android/abi_gki_aarch64_rockchip @@ -2626,6 +2626,7 @@ snd_pcm_fill_iec958_consumer snd_pcm_fill_iec958_consumer_hw_params snd_pcm_hw_constraint_eld + snd_pcm_stop_xrun # required by snd-soc-rk817.ko snd_soc_component_exit_regmap diff --git a/android/abi_gki_aarch64_transsion b/android/abi_gki_aarch64_transsion index c38d29451ee4..83e28c1e1746 100644 --- a/android/abi_gki_aarch64_transsion +++ b/android/abi_gki_aarch64_transsion @@ -10,6 +10,7 @@ nr_swap_pages plist_requeue plist_del + __traceiter_android_rvh_handle_pte_fault_end __traceiter_android_vh_handle_pte_fault_end __traceiter_android_vh_cow_user_page __traceiter_android_vh_swapin_add_anon_rmap @@ -20,9 +21,13 @@ __traceiter_android_vh_count_pswpout __traceiter_android_vh_count_swpout_vm_event __traceiter_android_vh_swap_slot_cache_active + __traceiter_android_rvh_drain_slots_cache_cpu __traceiter_android_vh_drain_slots_cache_cpu + __traceiter_android_rvh_alloc_swap_slot_cache __traceiter_android_vh_alloc_swap_slot_cache + __traceiter_android_rvh_free_swap_slot __traceiter_android_vh_free_swap_slot + __traceiter_android_rvh_get_swap_page __traceiter_android_vh_get_swap_page __traceiter_android_vh_page_isolated_for_reclaim __traceiter_android_vh_inactive_is_low @@ -31,10 +36,12 @@ __traceiter_android_vh_unuse_swap_page __traceiter_android_vh_init_swap_info_struct __traceiter_android_vh_si_swapinfo + __traceiter_android_rvh_alloc_si __traceiter_android_vh_alloc_si __traceiter_android_vh_free_pages __traceiter_android_vh_set_shmem_page_flag __traceiter_android_vh_ra_tuning_max_page + __tracepoint_android_rvh_handle_pte_fault_end __tracepoint_android_vh_handle_pte_fault_end __tracepoint_android_vh_cow_user_page __tracepoint_android_vh_swapin_add_anon_rmap @@ -45,9 +52,13 @@ __tracepoint_android_vh_count_pswpout __tracepoint_android_vh_count_swpout_vm_event __tracepoint_android_vh_swap_slot_cache_active + __tracepoint_android_rvh_drain_slots_cache_cpu __tracepoint_android_vh_drain_slots_cache_cpu + __tracepoint_android_rvh_alloc_swap_slot_cache __tracepoint_android_vh_alloc_swap_slot_cache + __tracepoint_android_rvh_free_swap_slot __tracepoint_android_vh_free_swap_slot + __tracepoint_android_rvh_get_swap_page __tracepoint_android_vh_get_swap_page __tracepoint_android_vh_page_isolated_for_reclaim __tracepoint_android_vh_inactive_is_low @@ -56,6 +67,7 @@ __tracepoint_android_vh_unuse_swap_page __tracepoint_android_vh_init_swap_info_struct __tracepoint_android_vh_si_swapinfo + __tracepoint_android_rvh_alloc_si __tracepoint_android_vh_alloc_si __tracepoint_android_vh_free_pages __tracepoint_android_vh_set_shmem_page_flag diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 37c4c308339e..423c55d0e165 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -117,6 +117,22 @@ struct rand_data { #define JENT_EHEALTH 9 /* Health test failed during initialization */ #define JENT_ERCT 10 /* RCT failed during initialization */ +/* + * The output n bits can receive more than n bits of min entropy, of course, + * but the fixed output of the conditioning function can only asymptotically + * approach the output size bits of min entropy, not attain that bound. Random + * maps will tend to have output collisions, which reduces the creditable + * output entropy (that is what SP 800-90B Section 3.1.5.1.2 attempts to bound). + * + * The value "64" is justified in Appendix A.4 of the current 90C draft, + * and aligns with NIST's in "epsilon" definition in this document, which is + * that a string can be considered "full entropy" if you can bound the min + * entropy in each bit of output to at least 1-epsilon, where epsilon is + * required to be <= 2^(-32). + */ +#define JENT_ENTROPY_SAFETY_FACTOR 64 + +#include #include "jitterentropy.h" /*************************************************************************** @@ -546,7 +562,10 @@ static int jent_measure_jitter(struct rand_data *ec) */ static void jent_gen_entropy(struct rand_data *ec) { - unsigned int k = 0; + unsigned int k = 0, safety_factor = 0; + + if (fips_enabled) + safety_factor = JENT_ENTROPY_SAFETY_FACTOR; /* priming of the ->prev_time value */ jent_measure_jitter(ec); @@ -560,7 +579,7 @@ static void jent_gen_entropy(struct rand_data *ec) * We multiply the loop value with ->osr to obtain the * oversampling rate requested by the caller */ - if (++k >= (DATA_SIZE_BITS * ec->osr)) + if (++k >= ((DATA_SIZE_BITS + safety_factor) * ec->osr)) break; } } diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index d4352dfeb34d..7ac6fd6d69c8 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -431,6 +431,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_binder_read_done); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_handle_tlb_conf); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_shrink_node_memcgs); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ra_tuning_max_page); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_handle_pte_fault_end); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_handle_pte_fault_end); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cow_user_page); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_swapin_add_anon_rmap); @@ -441,9 +442,13 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_count_pswpin); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_count_pswpout); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_count_swpout_vm_event); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_swap_slot_cache_active); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_drain_slots_cache_cpu); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_drain_slots_cache_cpu); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_alloc_swap_slot_cache); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_swap_slot_cache); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_free_swap_slot); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_free_swap_slot); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_get_swap_page); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_get_swap_page); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_madvise_cold_or_pageout); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_page_isolated_for_reclaim); @@ -453,6 +458,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_account_swap_pages); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_unuse_swap_page); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_init_swap_info_struct); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_si_swapinfo); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_alloc_si); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_si); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_free_pages); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_set_shmem_page_flag); diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index a4dd500bc141..1372f40d0371 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -793,8 +793,6 @@ _request_firmware(const struct firmware **firmware_p, const char *name, size_t offset, u32 opt_flags) { struct firmware *fw = NULL; - struct cred *kern_cred = NULL; - const struct cred *old_cred; bool nondirect = false; int ret; @@ -811,18 +809,6 @@ _request_firmware(const struct firmware **firmware_p, const char *name, if (ret <= 0) /* error or already assigned */ goto out; - /* - * We are about to try to access the firmware file. Because we may have been - * called by a driver when serving an unrelated request from userland, we use - * the kernel credentials to read the file. - */ - kern_cred = prepare_kernel_cred(NULL); - if (!kern_cred) { - ret = -ENOMEM; - goto out; - } - old_cred = override_creds(kern_cred); - ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); /* Only full reads can support decompression, platform, and sysfs. */ @@ -848,9 +834,6 @@ _request_firmware(const struct firmware **firmware_p, const char *name, } else ret = assign_fw(fw, device); - revert_creds(old_cred); - put_cred(kern_cred); - out: if (ret < 0) { fw_abort_batch_reqs(fw); diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c index 2389a363bd3a..3c7bb2baf8da 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "dma-buf-sysfs-stats.h" @@ -135,10 +136,51 @@ void dma_buf_uninit_sysfs_statistics(void) kset_unregister(dma_buf_stats_kset); } +static void sysfs_add_workfn(struct work_struct *work) +{ + /* The ABI would have to change for this to be false, but let's be paranoid. */ + _Static_assert(sizeof(struct kobject) >= sizeof(struct work_struct), + "kobject is smaller than work_struct"); + + struct dma_buf_sysfs_entry *sysfs_entry = + container_of((struct kobject *)work, struct dma_buf_sysfs_entry, kobj); + struct dma_buf *dmabuf = sysfs_entry->dmabuf; + + /* + * A dmabuf is ref-counted via its file member. If this handler holds the only + * reference to the dmabuf, there is no need for sysfs kobject creation. This is an + * optimization and a race; when the reference count drops to 1 immediately after + * this check it is not harmful as the sysfs entry will still get cleaned up in + * dma_buf_stats_teardown, which won't get called until the final dmabuf reference + * is released, and that can't happen until the end of this function. + */ + if (file_count(dmabuf->file) > 1) { + /* + * kobject_init_and_add expects kobject to be zero-filled, but we have populated it + * to trigger this work function. + */ + memset(&dmabuf->sysfs_entry->kobj, 0, sizeof(dmabuf->sysfs_entry->kobj)); + dmabuf->sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset; + if (kobject_init_and_add(&dmabuf->sysfs_entry->kobj, &dma_buf_ktype, NULL, + "%lu", file_inode(dmabuf->file)->i_ino)) { + kobject_put(&dmabuf->sysfs_entry->kobj); + dmabuf->sysfs_entry = NULL; + } + } else { + /* + * Free the sysfs_entry and reset the pointer so dma_buf_stats_teardown doesn't + * attempt to operate on it. + */ + kfree(dmabuf->sysfs_entry); + dmabuf->sysfs_entry = NULL; + } + dma_buf_put(dmabuf); +} + int dma_buf_stats_setup(struct dma_buf *dmabuf) { struct dma_buf_sysfs_entry *sysfs_entry; - int ret; + struct work_struct *work; if (!dmabuf || !dmabuf->file) return -EINVAL; @@ -148,25 +190,21 @@ int dma_buf_stats_setup(struct dma_buf *dmabuf) return -EINVAL; } - sysfs_entry = kzalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL); + sysfs_entry = kmalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL); if (!sysfs_entry) return -ENOMEM; - sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset; sysfs_entry->dmabuf = dmabuf; - dmabuf->sysfs_entry = sysfs_entry; - /* create the directory for buffer stats */ - ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL, - "%lu", file_inode(dmabuf->file)->i_ino); - if (ret) - goto err_sysfs_dmabuf; + /* + * The use of kobj as a work_struct is an ugly hack + * to avoid an ABI break in this frozen kernel. + */ + work = (struct work_struct *)&dmabuf->sysfs_entry->kobj; + INIT_WORK(work, sysfs_add_workfn); + get_dma_buf(dmabuf); /* This reference will be dropped in sysfs_add_workfn. */ + schedule_work(work); return 0; - -err_sysfs_dmabuf: - kobject_put(&sysfs_entry->kobj); - dmabuf->sysfs_entry = NULL; - return ret; } diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index ea35dfb22d9e..da4f934f9a44 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -486,6 +486,7 @@ EXPORT_SYMBOL_GPL(is_dma_buf_file); static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) { + static atomic64_t dmabuf_inode = ATOMIC64_INIT(0); struct file *file; struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); @@ -495,6 +496,13 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) inode->i_size = dmabuf->size; inode_set_bytes(inode, dmabuf->size); + /* + * The ->i_ino acquired from get_next_ino() is not unique thus + * not suitable for using it as dentry name by dmabuf stats. + * Override ->i_ino with the unique and dmabuffs specific + * value. + */ + inode->i_ino = atomic64_add_return(1, &dmabuf_inode); file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf", flags, &dma_buf_fops); if (IS_ERR(file)) @@ -621,10 +629,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) file->f_mode |= FMODE_LSEEK; dmabuf->file = file; - ret = dma_buf_stats_setup(dmabuf); - if (ret) - goto err_sysfs; - mutex_init(&dmabuf->lock); INIT_LIST_HEAD(&dmabuf->attachments); @@ -632,6 +636,10 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) list_add(&dmabuf->list_node, &db_list.head); mutex_unlock(&db_list.lock); + ret = dma_buf_stats_setup(dmabuf); + if (ret) + goto err_sysfs; + return dmabuf; err_sysfs: diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6054f13e5c89..de2c454d6940 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -25,12 +25,16 @@ static struct kmem_cache *ino_entry_slab; struct kmem_cache *f2fs_inode_entry_slab; -void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) +void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, + unsigned char reason) { f2fs_build_fault_attr(sbi, 0, 0); set_ckpt_flags(sbi, CP_ERROR_FLAG); - if (!end_io) + if (!end_io) { f2fs_flush_merged_writes(sbi); + + f2fs_handle_stop(sbi, reason); + } } /* @@ -120,7 +124,7 @@ retry: if (PTR_ERR(page) == -EIO && ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_META_PAGE); } return page; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3bac45d7a94c..867b2b72ec67 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -311,7 +311,8 @@ static void f2fs_write_end_io(struct bio *bio) mempool_free(page, sbi->write_io_dummy); if (unlikely(bio->bi_status)) - f2fs_stop_checkpoint(sbi, true); + f2fs_stop_checkpoint(sbi, true, + STOP_CP_REASON_WRITE_FAIL); continue; } @@ -327,7 +328,8 @@ static void f2fs_write_end_io(struct bio *bio) if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); if (type == F2FS_WB_CP_DATA) - f2fs_stop_checkpoint(sbi, true); + f2fs_stop_checkpoint(sbi, true, + STOP_CP_REASON_WRITE_FAIL); } f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 56b67a0d18d5..19d7d1f4c4fa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3482,6 +3482,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); int f2fs_quota_sync(struct super_block *sb, int type); loff_t max_file_blocks(struct inode *inode); void f2fs_quota_off_umount(struct super_block *sb); +void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); @@ -3631,7 +3632,8 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, /* * checkpoint.c */ -void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); +void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, + unsigned char reason); void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi); struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b204c6b134c6..36d8f0376f76 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2249,7 +2249,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (ret) { if (ret == -EROFS) { ret = 0; - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); trace_f2fs_shutdown(sbi, in, ret); } @@ -2262,7 +2263,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) ret = freeze_bdev(sb->s_bdev); if (ret) goto out; - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); thaw_bdev(sb->s_bdev); break; @@ -2271,16 +2272,16 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) ret = f2fs_sync_fs(sb, 1); if (ret) goto out; - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_METAFLUSH: f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NEED_FSCK: diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 52ae6a06782f..30949eac81c3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -68,7 +68,8 @@ static int gc_thread_func(void *data) if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_FAULT_INJECT); } if (!sb_start_write_trylock(sbi->sb)) { @@ -1634,7 +1635,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_CORRUPTED_SUMMARY); goto skip; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index d25853c98d4a..29bf3e215f52 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -685,7 +685,8 @@ retry: cond_resched(); goto retry; } else if (err != -ENOENT) { - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_UPDATE_INODE); } return; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 026d04156fbe..fdb41cb3fd68 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -499,7 +499,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT); } /* balance_fs_bg is able to be pending */ @@ -782,8 +782,11 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) continue; ret = __submit_flush_wait(sbi, FDEV(i).bdev); - if (ret) + if (ret) { + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_FLUSH_FAIL); break; + } spin_lock(&sbi->dev_lock); f2fs_clear_bit(i, (char *)&sbi->dirty_device); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cb049996e53b..89d90d366754 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3642,6 +3642,26 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return err; } +void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + int err; + + f2fs_bug_on(sbi, reason >= MAX_STOP_REASON); + + f2fs_down_write(&sbi->sb_lock); + + if (raw_super->s_stop_reason[reason] < ((1 << BITS_PER_BYTE) - 1)) + raw_super->s_stop_reason[reason]++; + + err = f2fs_commit_super(sbi, false); + if (err) + f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d", + reason, err); + + f2fs_up_write(&sbi->sb_lock); +} + static int f2fs_scan_devices(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d445150c5350..5dd1e52b8997 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -73,6 +73,20 @@ struct f2fs_device { __le32 total_segments; } __packed; +/* reason of stop_checkpoint */ +enum stop_cp_reason { + STOP_CP_REASON_SHUTDOWN, + STOP_CP_REASON_FAULT_INJECT, + STOP_CP_REASON_META_PAGE, + STOP_CP_REASON_WRITE_FAIL, + STOP_CP_REASON_CORRUPTED_SUMMARY, + STOP_CP_REASON_UPDATE_INODE, + STOP_CP_REASON_FLUSH_FAIL, + STOP_CP_REASON_MAX, +}; + +#define MAX_STOP_REASON 32 + struct f2fs_super_block { __le32 magic; /* Magic Number */ __le16 major_ver; /* Major Version */ @@ -116,7 +130,8 @@ struct f2fs_super_block { __u8 hot_ext_count; /* # of hot file extension */ __le16 s_encoding; /* Filename charset encoding */ __le16 s_encoding_flags; /* Filename charset encoding flags */ - __u8 reserved[306]; /* valid reserved region */ + __u8 s_stop_reason[MAX_STOP_REASON]; /* stop checkpoint reason */ + __u8 reserved[274]; /* valid reserved region */ __le32 crc; /* checksum of superblock */ } __packed; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4019e6fa3b95..8cf258fc9162 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -515,6 +515,11 @@ static inline void i_mmap_unlock_write(struct address_space *mapping) up_write(&mapping->i_mmap_rwsem); } +static inline int i_mmap_trylock_read(struct address_space *mapping) +{ + return down_read_trylock(&mapping->i_mmap_rwsem); +} + static inline void i_mmap_lock_read(struct address_space *mapping) { down_read(&mapping->i_mmap_rwsem); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 7dee138fbf0f..0a4d49ca8ccf 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -134,6 +134,11 @@ static inline void anon_vma_lock_read(struct anon_vma *anon_vma) down_read(&anon_vma->root->rwsem); } +static inline int anon_vma_trylock_read(struct anon_vma *anon_vma) +{ + return down_read_trylock(&anon_vma->root->rwsem); +} + static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) { up_read(&anon_vma->root->rwsem); @@ -261,17 +266,14 @@ void try_to_munlock(struct page *); void remove_migration_ptes(struct page *old, struct page *new, bool locked); -/* - * Called by memory-failure.c to kill processes. - */ -struct anon_vma *page_lock_anon_vma_read(struct page *page); -void page_unlock_anon_vma_read(struct anon_vma *anon_vma); int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); /* * rmap_walk_control: To control rmap traversing for specific needs * * arg: passed to rmap_one() and invalid_vma() + * try_lock: bail out if the rmap lock is contended + * contended: indicate the rmap traversal bailed out due to lock contention * rmap_one: executed on each vma where page is mapped * done: for checking traversing termination condition * anon_lock: for getting anon_lock by optimized way rather than default @@ -279,6 +281,8 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); */ struct rmap_walk_control { void *arg; + bool try_lock; + bool contended; /* * Return false if page table scanning in rmap_walk should be stopped. * Otherwise, return true. @@ -286,13 +290,21 @@ struct rmap_walk_control { bool (*rmap_one)(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct page *page); - struct anon_vma *(*anon_lock)(struct page *page); + struct anon_vma *(*anon_lock)(struct page *page, + struct rmap_walk_control *rwc); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; void rmap_walk(struct page *page, struct rmap_walk_control *rwc); void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); +/* + * Called by memory-failure.c to kill processes. + */ +struct anon_vma *page_lock_anon_vma_read(struct page *page, + struct rmap_walk_control *rwc); +void page_unlock_anon_vma_read(struct anon_vma *anon_vma); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h index 358a89380982..3f32c876441f 100644 --- a/include/trace/hooks/mm.h +++ b/include/trace/hooks/mm.h @@ -193,6 +193,9 @@ DECLARE_HOOK(android_vh_subpage_dma_contig_alloc, DECLARE_HOOK(android_vh_ra_tuning_max_page, TP_PROTO(struct readahead_control *ractl, unsigned long *max_page), TP_ARGS(ractl, max_page)); +DECLARE_RESTRICTED_HOOK(android_rvh_handle_pte_fault_end, + TP_PROTO(struct vm_fault *vmf, unsigned long highest_memmap_pfn), + TP_ARGS(vmf, highest_memmap_pfn), 1); DECLARE_HOOK(android_vh_handle_pte_fault_end, TP_PROTO(struct vm_fault *vmf, unsigned long highest_memmap_pfn), TP_ARGS(vmf, highest_memmap_pfn)); @@ -223,16 +226,30 @@ DECLARE_HOOK(android_vh_count_swpout_vm_event, DECLARE_HOOK(android_vh_swap_slot_cache_active, TP_PROTO(bool swap_slot_cache_active), TP_ARGS(swap_slot_cache_active)); +DECLARE_RESTRICTED_HOOK(android_rvh_drain_slots_cache_cpu, + TP_PROTO(struct swap_slots_cache *cache, unsigned int type, + bool free_slots, bool *skip), + TP_ARGS(cache, type, free_slots, skip), 1); DECLARE_HOOK(android_vh_drain_slots_cache_cpu, TP_PROTO(struct swap_slots_cache *cache, unsigned int type, bool free_slots, bool *skip), TP_ARGS(cache, type, free_slots, skip)); +DECLARE_RESTRICTED_HOOK(android_rvh_alloc_swap_slot_cache, + TP_PROTO(struct swap_slots_cache *cache, int *ret, bool *skip), + TP_ARGS(cache, ret, skip), 1); DECLARE_HOOK(android_vh_alloc_swap_slot_cache, TP_PROTO(struct swap_slots_cache *cache, int *ret, bool *skip), TP_ARGS(cache, ret, skip)); +DECLARE_RESTRICTED_HOOK(android_rvh_free_swap_slot, + TP_PROTO(swp_entry_t entry, struct swap_slots_cache *cache, bool *skip), + TP_ARGS(entry, cache, skip), 1); DECLARE_HOOK(android_vh_free_swap_slot, TP_PROTO(swp_entry_t entry, struct swap_slots_cache *cache, bool *skip), TP_ARGS(entry, cache, skip)); +DECLARE_RESTRICTED_HOOK(android_rvh_get_swap_page, + TP_PROTO(struct page *page, swp_entry_t *entry, + struct swap_slots_cache *cache, bool *found), + TP_ARGS(page, entry, cache, found), 1); DECLARE_HOOK(android_vh_get_swap_page, TP_PROTO(struct page *page, swp_entry_t *entry, struct swap_slots_cache *cache, bool *found), @@ -255,6 +272,9 @@ DECLARE_HOOK(android_vh_init_swap_info_struct, DECLARE_HOOK(android_vh_si_swapinfo, TP_PROTO(struct swap_info_struct *si, bool *skip), TP_ARGS(si, skip)); +DECLARE_RESTRICTED_HOOK(android_rvh_alloc_si, + TP_PROTO(struct swap_info_struct **p, bool *skip), + TP_ARGS(p, skip), 1); DECLARE_HOOK(android_vh_alloc_si, TP_PROTO(struct swap_info_struct **p, bool *skip), TP_ARGS(p, skip)); diff --git a/kernel/cpu.c b/kernel/cpu.c index 90d09bafecf6..fc15c01d61b8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1865,7 +1865,9 @@ int __boot_cpu_id; /* Horrific hacks because we can't add more to cpuhp_hp_states. */ static int random_and_perf_prepare_fusion(unsigned int cpu) { +#ifdef CONFIG_PERF_EVENTS perf_event_init_cpu(cpu); +#endif random_prepare_cpu(cpu); return 0; } diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index f46ac0f39777..e14917cd2a1d 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1020,7 +1020,7 @@ void psi_cgroup_free(struct cgroup *cgroup) */ void cgroup_move_task(struct task_struct *task, struct css_set *to) { - unsigned int task_flags = 0; + unsigned int task_flags; struct rq_flags rf; struct rq *rq; @@ -1035,15 +1035,31 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to) rq = task_rq_lock(task, &rf); - if (task_on_rq_queued(task)) { - task_flags = TSK_RUNNING; - if (task_current(rq, task)) - task_flags |= TSK_ONCPU; - } else if (task->in_iowait) - task_flags = TSK_IOWAIT; - - if (task->in_memstall) - task_flags |= TSK_MEMSTALL; + /* + * We may race with schedule() dropping the rq lock between + * deactivating prev and switching to next. Because the psi + * updates from the deactivation are deferred to the switch + * callback to save cgroup tree updates, the task's scheduling + * state here is not coherent with its psi state: + * + * schedule() cgroup_move_task() + * rq_lock() + * deactivate_task() + * p->on_rq = 0 + * psi_dequeue() // defers TSK_RUNNING & TSK_IOWAIT updates + * pick_next_task() + * rq_unlock() + * rq_lock() + * psi_task_change() // old cgroup + * task->cgroups = to + * psi_task_change() // new cgroup + * rq_unlock() + * rq_lock() + * psi_sched_switch() // does deferred updates in new cgroup + * + * Don't rely on the scheduling state. Use psi_flags instead. + */ + task_flags = task->psi_flags; if (task_flags) psi_task_change(task, task_flags, 0); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 35eb1f2bee0d..993816e3fc09 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1481,7 +1481,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) */ get_page(page); spin_unlock(vmf->ptl); - anon_vma = page_lock_anon_vma_read(page); + anon_vma = page_lock_anon_vma_read(page, NULL); /* Confirm the PMD did not change while page_table_lock was released */ spin_lock(vmf->ptl); diff --git a/mm/ksm.c b/mm/ksm.c index e2464c04ede2..2695ddbeb47e 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2626,7 +2626,13 @@ again: struct vm_area_struct *vma; cond_resched(); - anon_vma_lock_read(anon_vma); + if (!anon_vma_trylock_read(anon_vma)) { + if (rwc->try_lock) { + rwc->contended = true; + return; + } + anon_vma_lock_read(anon_vma); + } anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { unsigned long addr; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index aef267c6a724..4bd73d6dc18a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -477,7 +477,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, struct anon_vma *av; pgoff_t pgoff; - av = page_lock_anon_vma_read(page); + av = page_lock_anon_vma_read(page, NULL); if (av == NULL) /* Not actually mapped anymore */ return; diff --git a/mm/memory.c b/mm/memory.c index 85554eca3da9..70384a99bfcf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4777,6 +4777,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) if (vmf->flags & FAULT_FLAG_WRITE) flush_tlb_fix_spurious_fault(vmf->vma, vmf->address); } + trace_android_rvh_handle_pte_fault_end(vmf, highest_memmap_pfn); trace_android_vh_handle_pte_fault_end(vmf, highest_memmap_pfn); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); diff --git a/mm/page_idle.c b/mm/page_idle.c index 144fb4ed961d..b5613232e881 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -92,10 +92,10 @@ static bool page_idle_clear_pte_refs_one(struct page *page, static void page_idle_clear_pte_refs(struct page *page) { /* - * Since rwc.arg is unused, rwc is effectively immutable, so we - * can make it static const to save some cycles and stack. + * Since rwc.try_lock is unused, rwc is effectively immutable, so we + * can make it static to save some cycles and stack. */ - static const struct rmap_walk_control rwc = { + static struct rmap_walk_control rwc = { .rmap_one = page_idle_clear_pte_refs_one, .anon_lock = page_lock_anon_vma_read, }; diff --git a/mm/rmap.c b/mm/rmap.c index d48141f90360..033b04704b59 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -518,9 +518,11 @@ out: * * Its a little more complex as it tries to keep the fast path to a single * atomic op -- the trylock. If we fail the trylock, we fall back to getting a - * reference like with page_get_anon_vma() and then block on the mutex. + * reference like with page_get_anon_vma() and then block on the mutex + * on !rwc->try_lock case. */ -struct anon_vma *page_lock_anon_vma_read(struct page *page) +struct anon_vma *page_lock_anon_vma_read(struct page *page, + struct rmap_walk_control *rwc) { struct anon_vma *anon_vma = NULL; struct anon_vma *root_anon_vma; @@ -553,6 +555,13 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page) anon_vma = NULL; goto out; } + + if (rwc && rwc->try_lock) { + anon_vma = NULL; + rwc->contended = true; + goto out; + } + /* trylock failed, we got to sleep */ if (!atomic_inc_not_zero(&anon_vma->refcount)) { anon_vma = NULL; @@ -850,8 +859,10 @@ static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) * @memcg: target memory cgroup * @vm_flags: collect encountered vma->vm_flags who actually referenced the page * - * Quick test_and_clear_referenced for all mappings to a page, - * returns the number of ptes which referenced the page. + * Quick test_and_clear_referenced for all mappings of a page, + * + * Return: The number of mappings which referenced the page. Return -1 if + * the function bailed out due to rmap lock contention. */ int page_referenced(struct page *page, int is_locked, @@ -867,6 +878,7 @@ int page_referenced(struct page *page, .rmap_one = page_referenced_one, .arg = (void *)&pra, .anon_lock = page_lock_anon_vma_read, + .try_lock = true, }; *vm_flags = 0; @@ -897,7 +909,7 @@ int page_referenced(struct page *page, if (we_locked) unlock_page(page); - return pra.referenced; + return rwc.contended ? -1 : pra.referenced; } static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, @@ -1898,7 +1910,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page, struct anon_vma *anon_vma; if (rwc->anon_lock) - return rwc->anon_lock(page); + return rwc->anon_lock(page, rwc); /* * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() @@ -1910,7 +1922,17 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page, if (!anon_vma) return NULL; + if (anon_vma_trylock_read(anon_vma)) + goto out; + + if (rwc->try_lock) { + anon_vma = NULL; + rwc->contended = true; + goto out; + } + anon_vma_lock_read(anon_vma); +out: return anon_vma; } @@ -2009,9 +2031,18 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, if (!got_lock) return; } else { + if (i_mmap_trylock_read(mapping)) + goto lookup; + + if (rwc->try_lock) { + rwc->contended = true; + return; + } + i_mmap_lock_read(mapping); } } +lookup: vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff_start, pgoff_end) { unsigned long address = vma_address(page, vma); diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 1392649a4d9a..43231ae6c3fd 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -133,6 +133,8 @@ static int alloc_swap_slot_cache(unsigned int cpu) * as kvzalloc could trigger reclaim and get_swap_page, * which can lock swap_slots_cache_mutex. */ + trace_android_rvh_alloc_swap_slot_cache(&per_cpu(swp_slots, cpu), + &ret, &skip); trace_android_vh_alloc_swap_slot_cache(&per_cpu(swp_slots, cpu), &ret, &skip); if (skip) @@ -190,6 +192,8 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type, bool skip = false; cache = &per_cpu(swp_slots, cpu); + trace_android_rvh_drain_slots_cache_cpu(cache, type, + free_slots, &skip); trace_android_vh_drain_slots_cache_cpu(cache, type, free_slots, &skip); if (skip) @@ -298,6 +302,7 @@ int free_swap_slot(swp_entry_t entry) bool skip = false; cache = raw_cpu_ptr(&swp_slots); + trace_android_rvh_free_swap_slot(entry, cache, &skip); trace_android_vh_free_swap_slot(entry, cache, &skip); if (skip) return 0; @@ -335,6 +340,7 @@ swp_entry_t get_swap_page(struct page *page) bool found = false; entry.val = 0; + trace_android_rvh_get_swap_page(page, &entry, raw_cpu_ptr(&swp_slots), &found); trace_android_vh_get_swap_page(page, &entry, raw_cpu_ptr(&swp_slots), &found); if (found) goto out; diff --git a/mm/swapfile.c b/mm/swapfile.c index 677f235806c2..b3cc17423d38 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2908,6 +2908,7 @@ static struct swap_info_struct *alloc_swap_info(void) int i; bool skip = false; + trace_android_rvh_alloc_si(&p, &skip); trace_android_vh_alloc_si(&p, &skip); if (!skip) p = kvzalloc(struct_size(p, avail_lists, nr_node_ids), GFP_KERNEL); diff --git a/mm/vmscan.c b/mm/vmscan.c index 701595410992..5f63e7d30bca 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1045,6 +1045,10 @@ static enum page_references page_check_references(struct page *page, if (vm_flags & VM_LOCKED) return PAGEREF_RECLAIM; + /* rmap lock contention: rotate */ + if (referenced_ptes == -1) + return PAGEREF_KEEP; + if (referenced_ptes) { /* * All mapped pages start out with page table @@ -1348,8 +1352,8 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (unlikely(PageTransHuge(page))) flags |= TTU_SPLIT_HUGE_PMD; - - trace_android_vh_page_trylock_set(page); + if (!ignore_references) + trace_android_vh_page_trylock_set(page); if (!try_to_unmap(page, flags)) { stat->nr_unmap_fail += nr_pages; if (!was_swapbacked && PageSwapBacked(page)) @@ -2119,8 +2123,9 @@ static void shrink_active_list(unsigned long nr_to_scan, if (bypass) goto skip_page_referenced; trace_android_vh_page_trylock_set(page); + /* Referenced or rmap lock contention: rotate */ if (page_referenced(page, 0, sc->target_mem_cgroup, - &vm_flags)) { + &vm_flags) != 0) { /* * Identify referenced, file-backed active pages and * give them one more trip around the active list. So