diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc index ac5e11af79a8..a87724ecbf65 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-uvc +++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc @@ -7,6 +7,7 @@ Description: UVC function directory streaming_maxburst 0..15 (ss only) streaming_maxpacket 1..1023 (fs), 1..3072 (hs/ss) streaming_interval 1..16 + function_name string [32] =================== ============================= What: /config/usb-gadget/gadget/functions/uvc.name/control diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index 2d1fc03d346e..ef19b9c13523 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -77,6 +77,17 @@ HOSTLDLIBS ---------- Additional libraries to link against when building host programs. +.. _userkbuildflags: + +USERCFLAGS +---------- +Additional options used for $(CC) when compiling userprogs. + +USERLDFLAGS +----------- +Additional options used for $(LD) when linking userprogs. userprogs are linked +with CC, so $(USERLDFLAGS) should include "-Wl," prefix as applicable. + KBUILD_KCONFIG -------------- Set the top-level Kconfig file to the value of this environment diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 0d5dd5413af0..1db205dc1d0f 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -852,6 +852,8 @@ The syntax is quite similar. The difference is to use "userprogs" instead of When linking bpfilter_umh, it will be passed the extra option -static. + From command line, :ref:`USERCFLAGS and USERLDFLAGS ` will also be used. + 5.4 When userspace programs are actually built ---------------------------------------------- diff --git a/Documentation/usb/gadget-testing.rst b/Documentation/usb/gadget-testing.rst index 9d6276f82774..be1b085a0901 100644 --- a/Documentation/usb/gadget-testing.rst +++ b/Documentation/usb/gadget-testing.rst @@ -774,6 +774,7 @@ The uvc function provides these attributes in its function directory: streaming_maxpacket maximum packet size this endpoint is capable of sending or receiving when this configuration is selected + function_name name of the interface =================== ================================================ There are also "control" and "streaming" subdirectories, each of which contain diff --git a/Makefile b/Makefile index cfebae21df4b..3f3743799a06 100644 --- a/Makefile +++ b/Makefile @@ -429,11 +429,12 @@ HOSTCC = gcc HOSTCXX = g++ endif -export KBUILD_USERCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ - -O2 -fomit-frame-pointer -std=gnu89 -export KBUILD_USERLDFLAGS := +KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ + -O2 -fomit-frame-pointer -std=gnu89 +KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS) +KBUILD_USERLDFLAGS := $(USERLDFLAGS) -KBUILD_HOSTCFLAGS := $(KBUILD_USERCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS) +KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS) KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS) KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS) KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) @@ -526,6 +527,7 @@ export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AW export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE +export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 7ddd3f9a87f8..25c9c4a63a26 100755 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -85,6 +85,9 @@ + + + @@ -112,6 +115,14 @@ + + + + + + + + @@ -131,6 +142,8 @@ + + @@ -469,6 +482,7 @@ + @@ -601,6 +615,7 @@ + @@ -626,6 +641,7 @@ + @@ -804,6 +820,7 @@ + @@ -881,6 +898,7 @@ + @@ -933,6 +951,7 @@ + @@ -1044,6 +1063,7 @@ + @@ -1311,11 +1331,15 @@ + + + + @@ -1361,6 +1385,7 @@ + @@ -1794,8 +1819,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2337,6 +2392,7 @@ + @@ -2374,6 +2430,7 @@ + @@ -2409,17 +2466,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2464,8 +2565,11 @@ + + + @@ -2476,6 +2580,7 @@ + @@ -2761,6 +2866,8 @@ + + @@ -2790,6 +2897,7 @@ + @@ -2814,6 +2922,7 @@ + @@ -2845,6 +2954,7 @@ + @@ -3994,6 +4104,9 @@ + + + @@ -4178,6 +4291,7 @@ + @@ -4513,6 +4627,7 @@ + @@ -4560,6 +4675,7 @@ + @@ -4569,6 +4685,7 @@ + @@ -4606,6 +4723,7 @@ + @@ -5002,6 +5120,7 @@ + @@ -5049,6 +5168,12 @@ + + + + + + @@ -5071,6 +5196,8 @@ + + @@ -5346,6 +5473,7 @@ + @@ -5699,7 +5827,10 @@ + + + @@ -5836,6 +5967,7 @@ + @@ -6118,6 +6250,7 @@ + @@ -6267,6 +6400,7 @@ + @@ -6292,6 +6426,7 @@ + @@ -6461,6 +6596,7 @@ + @@ -7730,7 +7866,7 @@ - + @@ -8985,6 +9121,10 @@ + + + + @@ -9715,6 +9855,7 @@ + @@ -10416,6 +10557,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -13318,6 +13494,12 @@ + + + + + + @@ -14024,6 +14206,8 @@ + + @@ -14424,6 +14608,7 @@ + @@ -14892,6 +15077,7 @@ + @@ -15817,6 +16003,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -15883,6 +16089,20 @@ + + + + + + + + + + + + + + @@ -21105,6 +21325,7 @@ + @@ -23981,42 +24202,42 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -24459,6 +24680,15 @@ + + + + + + + + + @@ -25096,6 +25326,7 @@ + @@ -26127,6 +26358,7 @@ + @@ -30383,6 +30615,7 @@ + @@ -31908,7 +32141,20 @@ - + + + + + + + + + + + + + + @@ -31932,6 +32178,14 @@ + + + + + + + + @@ -32129,6 +32383,7 @@ + @@ -32765,6 +33020,7 @@ + @@ -33647,63 +33903,68 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + @@ -33887,6 +34148,9 @@ + + + @@ -34747,7 +35011,7 @@ - + @@ -36547,6 +36811,12 @@ + + + + + + @@ -36556,6 +36826,12 @@ + + + + + + @@ -39660,7 +39936,7 @@ - + @@ -43319,6 +43595,7 @@ + @@ -44191,6 +44468,9 @@ + + + @@ -44527,7 +44807,18 @@ - + + + + + + + + + + + + @@ -44657,6 +44948,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -44828,6 +45145,7 @@ + @@ -45974,6 +46292,7 @@ + @@ -47265,6 +47584,12 @@ + + + + + + @@ -48116,6 +48441,7 @@ + @@ -49097,6 +49423,7 @@ + @@ -49793,6 +50120,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -49875,6 +50222,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -50131,6 +50525,7 @@ + @@ -50425,18 +50820,18 @@ - + - + - + - + - + @@ -50654,6 +51049,7 @@ + @@ -52414,6 +52810,23 @@ + + + + + + + + + + + + + + + + + @@ -52672,6 +53085,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -54992,6 +55425,12 @@ + + + + + + @@ -56183,12 +56622,14 @@ + + @@ -56473,6 +56914,7 @@ + @@ -57021,390 +57463,390 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -57421,6 +57863,29 @@ + + + + + + + + + + + + + + + + + + + + + + + @@ -57595,6 +58060,17 @@ + + + + + + + + + + + @@ -57627,6 +58103,7 @@ + @@ -58688,6 +59165,7 @@ + @@ -59024,6 +59502,7 @@ + @@ -61632,6 +62111,7 @@ + @@ -61710,6 +62190,7 @@ + @@ -62455,6 +62936,7 @@ + @@ -63053,6 +63535,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -63087,6 +63637,15 @@ + + + + + + + + + @@ -63094,6 +63653,7 @@ + @@ -63295,6 +63855,17 @@ + + + + + + + + + + + @@ -65625,7 +66196,47 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -66471,6 +67082,7 @@ + @@ -66677,7 +67289,7 @@ - + @@ -66930,6 +67542,7 @@ + @@ -68075,7 +68688,26 @@ - + + + + + + + + + + + + + + + + + + + + @@ -69582,6 +70214,7 @@ + @@ -70657,6 +71290,7 @@ + @@ -71529,12 +72163,21 @@ + + + + + + + + + @@ -72158,6 +72801,7 @@ + @@ -72641,6 +73285,7 @@ + @@ -72979,6 +73624,7 @@ + @@ -75242,15 +75888,15 @@ - + - + - + - + @@ -76257,6 +76903,7 @@ + @@ -78655,7 +79302,15 @@ + + + + + + + + @@ -78773,6 +79428,7 @@ + @@ -80940,6 +81596,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -81158,6 +81834,23 @@ + + + + + + + + + + + + + + + + + @@ -84832,6 +85525,7 @@ + @@ -86136,6 +86830,7 @@ + @@ -89608,6 +90303,7 @@ + @@ -90448,6 +91144,7 @@ + @@ -94314,6 +95011,17 @@ + + + + + + + + + + + @@ -96326,6 +97034,7 @@ + @@ -98051,46 +98760,46 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -99782,6 +100491,19 @@ + + + + + + + + + + + + + @@ -102585,6 +103307,7 @@ + @@ -104925,6 +105648,10 @@ + + + + @@ -105451,6 +106178,11 @@ + + + + + @@ -109370,6 +110102,11 @@ + + + + + @@ -109675,6 +110412,7 @@ + @@ -111937,6 +112675,7 @@ + @@ -113134,6 +113873,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -114338,6 +115097,23 @@ + + + + + + + + + + + + + + + + + @@ -114467,6 +115243,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -114587,10 +115408,32 @@ - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -115283,8 +116126,8 @@ - - + + @@ -116668,6 +117511,13 @@ + + + + + + + @@ -117488,6 +118338,13 @@ + + + + + + + @@ -117636,6 +118493,13 @@ + + + + + + + @@ -117874,6 +118738,7 @@ + @@ -118023,6 +118888,7 @@ + @@ -118048,6 +118914,7 @@ + @@ -118668,12 +119535,12 @@ - - - - - - + + + + + + @@ -118982,6 +119849,11 @@ + + + + + @@ -119375,6 +120247,10 @@ + + + + @@ -119648,6 +120524,14 @@ + + + + + + + + @@ -119943,9 +120827,9 @@ - - - + + + @@ -120297,6 +121181,10 @@ + + + + @@ -120758,10 +121646,10 @@ - - - - + + + + @@ -121665,11 +122553,24 @@ + + + + + + + + + + + + + @@ -121689,6 +122590,11 @@ + + + + + @@ -121953,6 +122859,12 @@ + + + + + + @@ -124268,16 +125180,16 @@ - - + + - - + + - - + + @@ -124289,22 +125201,27 @@ - - + + - - + + - - + + + + + + + @@ -124314,6 +125231,151 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -126829,10 +127891,10 @@ - - - - + + + + @@ -127118,6 +128180,10 @@ + + + + @@ -127315,6 +128381,13 @@ + + + + + + + @@ -127481,6 +128554,14 @@ + + + + + + + + @@ -127516,6 +128597,186 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -127544,6 +128805,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -127784,8 +129086,8 @@ - - + + @@ -127793,6 +129095,22 @@ + + + + + + + + + + + + + + + + @@ -127804,6 +129122,14 @@ + + + + + + + + @@ -127836,9 +129162,9 @@ - - - + + + @@ -127861,6 +129187,10 @@ + + + + @@ -129293,6 +130623,15 @@ + + + + + + + + + @@ -129438,6 +130777,11 @@ + + + + + @@ -129575,6 +130919,10 @@ + + + + @@ -129727,6 +131075,10 @@ + + + + @@ -129776,56 +131128,56 @@ - - - - - + + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + @@ -129868,42 +131220,42 @@ - - + + - - + + - - - - + + + + - - - - + + + + - - - - + + + + - - + + - - + + @@ -129911,100 +131263,100 @@ - + - - - - + + + + - - + + - - + + - - + + - - + + - - + + - - - - + + + + - - - + + + - - - + + + - - - - - - + + + + + + - - - - - - + + + + + + - - + + - - - - + + + + - - - + + + - - - - + + + + - - - - + + + + - - + + @@ -130652,9 +132004,9 @@ - - - + + + @@ -130749,10 +132101,10 @@ - - - - + + + + @@ -131414,8 +132766,8 @@ - - + + @@ -132517,16 +133869,16 @@ - - + + - - + + - - + + @@ -133871,10 +135223,10 @@ - - - - + + + + @@ -134123,8 +135475,8 @@ - - + + @@ -135604,6 +136956,21 @@ + + + + + + + + + + + + + + + @@ -136194,8 +137561,8 @@ - - + + @@ -136481,6 +137848,10 @@ + + + + @@ -136923,11 +138294,11 @@ - - - - - + + + + + @@ -138176,16 +139547,16 @@ - - - - + + + + - - - - + + + + @@ -138194,6 +139565,12 @@ + + + + + + @@ -138432,6 +139809,12 @@ + + + + + + @@ -138475,6 +139858,11 @@ + + + + + @@ -138675,6 +140063,10 @@ + + + + @@ -138693,10 +140085,10 @@ - - - - + + + + @@ -138840,8 +140232,8 @@ - - + + @@ -138851,9 +140243,9 @@ - - - + + + @@ -138901,9 +140293,9 @@ - - - + + + @@ -138917,10 +140309,10 @@ - - - - + + + + @@ -138981,11 +140373,11 @@ - - - - - + + + + + @@ -138993,9 +140385,9 @@ - - - + + + @@ -140797,6 +142189,10 @@ + + + + @@ -141040,6 +142436,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -141142,6 +142576,14 @@ + + + + + + + + @@ -141487,11 +142929,11 @@ - - - - - + + + + + @@ -142178,28 +143620,28 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - + + @@ -142229,9 +143671,9 @@ - - - + + + @@ -142244,12 +143686,12 @@ - - + + - - + + @@ -142257,12 +143699,12 @@ - - + + - - + + @@ -142357,34 +143799,34 @@ - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + + + @@ -142546,6 +143988,10 @@ + + + + @@ -142562,12 +144008,12 @@ - - + + - - + + @@ -142578,15 +144024,15 @@ - - - - + + + + - - - + + + @@ -142700,16 +144146,16 @@ - - + + - - + + - - + + @@ -142992,10 +144438,10 @@ - - - - + + + + @@ -143103,9 +144549,9 @@ - - - + + + @@ -143206,9 +144652,9 @@ - - - + + + @@ -143285,8 +144731,8 @@ - - + + @@ -143487,9 +144933,9 @@ - - - + + + @@ -144352,12 +145798,30 @@ + + + + + + + + + + + + + + + + + + @@ -145046,6 +146510,12 @@ + + + + + + @@ -145088,14 +146558,14 @@ - - - + + + - - - + + + diff --git a/android/abi_gki_aarch64_oplus b/android/abi_gki_aarch64_oplus index bf898882c7b3..5f50e47ef58a 100644 --- a/android/abi_gki_aarch64_oplus +++ b/android/abi_gki_aarch64_oplus @@ -63,6 +63,7 @@ bdput bio_add_pc_page bio_alloc_bioset + bio_associate_blkg_from_css bio_endio bio_put bio_reset @@ -83,6 +84,7 @@ bitmap_zalloc blk_alloc_queue blk_cleanup_queue + blkdev_issue_zeroout blk_execute_rq blk_execute_rq_nowait blk_get_request @@ -90,6 +92,7 @@ blk_mq_rq_cpu blk_mq_sched_mark_restart_hctx blk_mq_start_request + blk_op_str blk_put_request blk_queue_flag_clear blk_queue_flag_set @@ -152,6 +155,7 @@ class_unregister __cleancache_get_page cleanup_srcu_struct + clear_nlink clear_page __ClearPageMovable clk_bulk_disable @@ -372,6 +376,7 @@ del_gendisk del_timer del_timer_sync + dentry_path_raw desc_to_gpio destroy_workqueue dev_alloc_name @@ -590,6 +595,9 @@ devres_release dev_set_name _dev_warn + d_find_alias + d_instantiate_new + d_invalidate disable_irq disable_irq_nosync disable_percpu_irq @@ -681,6 +689,39 @@ down_write d_path dput + dqget + dqput + dquot_acquire + dquot_alloc + dquot_alloc_inode + __dquot_alloc_space + dquot_claim_space_nodirty + dquot_commit + dquot_commit_info + dquot_destroy + dquot_disable + dquot_drop + dquot_file_open + dquot_free_inode + __dquot_free_space + dquot_get_dqblk + dquot_get_next_dqblk + dquot_get_next_id + dquot_get_state + dquot_initialize + dquot_initialize_needed + dquot_load_quota_inode + dquot_mark_dquot_dirty + dquot_quota_off + dquot_quota_on + dquot_quota_on_mount + dquot_release + dquot_resume + dquot_set_dqblk + dquot_set_dqinfo + __dquot_transfer + dquot_transfer + dquot_writeback_dquots drain_workqueue driver_create_file driver_find_device @@ -820,6 +861,7 @@ drm_writeback_connector_init drm_writeback_queue_job drm_writeback_signal_completion + d_tmpfile dump_stack __dynamic_pr_debug edac_device_add_device @@ -850,6 +892,7 @@ eventfd_ctx_remove_wait_queue eventfd_signal event_triggers_call + evict_inodes extcon_get_edev_by_phandle extcon_get_edev_name extcon_get_property @@ -867,6 +910,7 @@ filp_close filp_open_block find_get_pid + find_inode_nowait find_last_bit find_next_bit find_next_zero_bit @@ -897,6 +941,8 @@ free_pages_exact free_percpu free_percpu_irq + freeze_bdev + freeze_super freezing_slow_path freq_qos_add_notifier freq_qos_add_request @@ -905,11 +951,60 @@ freq_qos_update_request freq_scale fs_bio_set + fscrypt_decrypt_bio + fscrypt_dio_supported + fscrypt_drop_inode + fscrypt_encrypt_pagecache_blocks + __fscrypt_encrypt_symlink + fscrypt_file_open + fscrypt_fname_alloc_buffer + fscrypt_fname_disk_to_usr + fscrypt_fname_free_buffer + fscrypt_fname_siphash + fscrypt_free_bounce_page + fscrypt_free_inode + fscrypt_get_symlink + fscrypt_has_permitted_context + __fscrypt_inode_uses_inline_crypto + fscrypt_ioctl_add_key + fscrypt_ioctl_get_key_status + fscrypt_ioctl_get_nonce + fscrypt_ioctl_get_policy + fscrypt_ioctl_get_policy_ex + fscrypt_ioctl_remove_key + fscrypt_ioctl_remove_key_all_users + fscrypt_ioctl_set_policy + fscrypt_match_name + fscrypt_mergeable_bio + __fscrypt_prepare_link + __fscrypt_prepare_lookup + fscrypt_prepare_new_inode + __fscrypt_prepare_readdir + __fscrypt_prepare_rename + __fscrypt_prepare_setattr + fscrypt_prepare_symlink + fscrypt_put_encryption_info + fscrypt_set_bio_crypt_ctx + fscrypt_set_context + fscrypt_set_test_dummy_encryption + fscrypt_setup_filename + fscrypt_show_test_dummy_encryption + fscrypt_symlink_getattr + fscrypt_zeroout_range fsg_common_create_luns fsg_common_set_cdev fsg_common_set_inquiry_string fsg_common_set_sysfs fsg_config_from_params + fsverity_cleanup_inode + fsverity_enqueue_verify_work + fsverity_file_open + fsverity_ioctl_enable + fsverity_ioctl_measure + fsverity_ioctl_read_metadata + fsverity_prepare_setattr + fsverity_verify_bio + fsverity_verify_page fsync_bdev fwnode_device_is_available fwnode_find_reference @@ -930,10 +1025,15 @@ gcd generate_random_uuid generic_device_group + generic_fh_to_dentry + generic_fh_to_parent generic_file_llseek + generic_file_llseek_size + __generic_file_write_iter generic_handle_irq generic_iommu_put_resv_regions generic_mii_ioctl + generic_set_encrypted_ci_d_ops genlmsg_multicast_allns genlmsg_put genl_register_family @@ -1089,6 +1189,8 @@ idr_preload idr_remove idr_replace + iget_failed + iget_locked iio_alloc_pollfunc iio_buffer_init iio_buffer_put @@ -1106,11 +1208,14 @@ iio_read_channel_processed iio_read_channel_raw iio_trigger_notify_done + ilookup import_iovec inc_node_page_state inc_zone_page_state in_egroup_p + __inet6_lookup_established inet_csk_get_port + __inet_lookup_established init_iova_domain init_net init_pseudo @@ -1122,6 +1227,7 @@ init_uts_ns init_wait_entry __init_waitqueue_head + inode_nohighmem inode_owner_or_capable inode_permission input_alloc_absinfo @@ -1143,6 +1249,7 @@ input_unregister_device input_unregister_handle input_unregister_handler + insert_inode_locked interval_tree_insert interval_tree_iter_first interval_tree_iter_next @@ -1874,6 +1981,9 @@ pm_wakeup_dev_event pm_wakeup_ws_event pm_wq + posix_acl_alloc + posix_acl_chmod + posix_acl_equiv_mode power_supply_changed power_supply_get_by_name power_supply_get_drvdata @@ -2020,6 +2130,7 @@ register_shrinker register_syscore_ops register_sysctl_table + register_tcf_proto_ops register_virtio_device register_virtio_driver regmap_bulk_read @@ -2235,6 +2346,7 @@ send_sig send_sig_info seq_buf_printf + seq_escape seq_hex_dump seq_lseek seq_open @@ -2257,11 +2369,13 @@ serial8250_rpm_put serial8250_suspend_port serial8250_unregister_port + set_cached_acl set_cpus_allowed_ptr set_normalized_timespec64 set_page_dirty_lock __SetPageMovable set_task_cpu + set_task_ioprio set_user_nice sg_alloc_table sg_alloc_table_from_pages @@ -2277,6 +2391,7 @@ __sg_page_iter_start sg_scsi_ioctl show_regs + shrink_dcache_sb shrink_slab sigprocmask si_mem_available @@ -2469,6 +2584,7 @@ synchronize_net synchronize_rcu synchronize_srcu + sync_inodes_sb syscon_node_to_regmap syscon_regmap_lookup_by_compatible syscon_regmap_lookup_by_phandle @@ -2514,7 +2630,16 @@ __task_pid_nr_ns __task_rq_lock task_rq_lock + tcf_action_exec + tcf_exts_destroy + tcf_exts_dump + tcf_exts_dump_stats + tcf_exts_validate + tcf_queue_work + tcp_hashinfo tcp_parse_options + thaw_bdev + thaw_super thermal_cooling_device_register thermal_cooling_device_unregister thermal_of_cooling_device_register @@ -2630,10 +2755,10 @@ __traceiter_android_vh_binder_reply __traceiter_android_vh_binder_restore_priority __traceiter_android_vh_binder_set_priority - __traceiter_android_vh_binder_trans - __traceiter_android_vh_binder_transaction_init __traceiter_android_vh_binder_thread_read __traceiter_android_vh_binder_thread_release + __traceiter_android_vh_binder_trans + __traceiter_android_vh_binder_transaction_init __traceiter_android_vh_binder_wait_for_work __traceiter_android_vh_binder_wakeup_ilocked __traceiter_android_vh_build_sched_domains @@ -2645,6 +2770,7 @@ __traceiter_android_vh_check_uninterruptible_tasks_dn __traceiter_android_vh_clear_mask_adjust __traceiter_android_vh_clear_reserved_fmt_fields + __traceiter_android_vh_cma_drain_all_pages_bypass __traceiter_android_vh_commit_creds __traceiter_android_vh_cpufreq_acct_update_power __traceiter_android_vh_cpufreq_fast_switch @@ -2654,6 +2780,7 @@ __traceiter_android_vh_cpu_idle_exit __traceiter_android_vh_cpu_up __traceiter_android_vh_do_send_sig_info + __traceiter_android_vh_drain_all_pages_bypass __traceiter_android_vh_em_cpu_energy __traceiter_android_vh_exclude_reserved_zone __traceiter_android_vh_exit_creds @@ -2687,9 +2814,12 @@ __traceiter_android_vh_mutex_wait_finish __traceiter_android_vh_mutex_wait_start __traceiter_android_vh_override_creds + __traceiter_android_vh_page_referenced_check_bypass + __traceiter_android_vh_pcplist_add_cma_pages_bypass __traceiter_android_vh_prepare_update_load_avg_se __traceiter_android_vh_printk_hotplug __traceiter_android_vh_process_killed + __traceiter_android_vh_killed_process __traceiter_android_vh_revert_creds __traceiter_android_vh_rmqueue __traceiter_android_vh_rwsem_init @@ -2724,10 +2854,6 @@ __traceiter_android_vh_tune_inactive_ratio __traceiter_android_vh_tune_scan_type __traceiter_android_vh_tune_swappiness - __traceiter_android_vh_page_referenced_check_bypass - __traceiter_android_vh_drain_all_pages_bypass - __traceiter_android_vh_cma_drain_all_pages_bypass - __traceiter_android_vh_pcplist_add_cma_pages_bypass __traceiter_android_vh_ufs_compl_command __traceiter_android_vh_ufs_send_command __traceiter_android_vh_ufs_send_tm_command @@ -2852,6 +2978,7 @@ __tracepoint_android_vh_check_uninterruptible_tasks_dn __tracepoint_android_vh_clear_mask_adjust __tracepoint_android_vh_clear_reserved_fmt_fields + __tracepoint_android_vh_cma_drain_all_pages_bypass __tracepoint_android_vh_commit_creds __tracepoint_android_vh_cpufreq_acct_update_power __tracepoint_android_vh_cpufreq_fast_switch @@ -2861,6 +2988,7 @@ __tracepoint_android_vh_cpu_idle_exit __tracepoint_android_vh_cpu_up __tracepoint_android_vh_do_send_sig_info + __tracepoint_android_vh_drain_all_pages_bypass __tracepoint_android_vh_em_cpu_energy __tracepoint_android_vh_exclude_reserved_zone __tracepoint_android_vh_exit_creds @@ -2894,9 +3022,12 @@ __tracepoint_android_vh_mutex_wait_finish __tracepoint_android_vh_mutex_wait_start __tracepoint_android_vh_override_creds + __tracepoint_android_vh_page_referenced_check_bypass + __tracepoint_android_vh_pcplist_add_cma_pages_bypass __tracepoint_android_vh_prepare_update_load_avg_se __tracepoint_android_vh_printk_hotplug __tracepoint_android_vh_process_killed + __tracepoint_android_vh_killed_process __tracepoint_android_vh_revert_creds __tracepoint_android_vh_rmqueue __tracepoint_android_vh_rwsem_init @@ -2931,10 +3062,6 @@ __tracepoint_android_vh_tune_inactive_ratio __tracepoint_android_vh_tune_scan_type __tracepoint_android_vh_tune_swappiness - __tracepoint_android_vh_page_referenced_check_bypass - __tracepoint_android_vh_drain_all_pages_bypass - __tracepoint_android_vh_cma_drain_all_pages_bypass - __tracepoint_android_vh_pcplist_add_cma_pages_bypass __tracepoint_android_vh_ufs_compl_command __tracepoint_android_vh_ufs_send_command __tracepoint_android_vh_ufs_send_tm_command @@ -3091,6 +3218,7 @@ unregister_shrinker unregister_syscore_ops unregister_sysctl_table + unregister_tcf_proto_ops unregister_virtio_device unregister_virtio_driver up @@ -3325,6 +3453,9 @@ verify_pkcs7_signature vfree vfs_fsync + vfs_ioc_fssetxattr_check + vfs_ioc_setflags_prepare + vfs_setpos video_devdata video_device_alloc video_device_release @@ -3391,6 +3522,7 @@ __warn_printk watchdog_init_timeout watchdog_set_restart_priority + wbc_account_cgroup_owner wireless_nlevent_flush woken_wake_function work_busy diff --git a/android/abi_gki_aarch64_qcom b/android/abi_gki_aarch64_qcom index bcfc4c9ef782..fe1688823b24 100644 --- a/android/abi_gki_aarch64_qcom +++ b/android/abi_gki_aarch64_qcom @@ -2603,12 +2603,14 @@ __traceiter_ipi_entry __traceiter_ipi_raise __traceiter_irq_handler_entry + __traceiter_map __traceiter_rwmmio_post_read __traceiter_rwmmio_read __traceiter_rwmmio_write __traceiter_sched_overutilized_tp __traceiter_sched_switch __traceiter_suspend_resume + __traceiter_unmap __tracepoint_android_rvh_account_irq __tracepoint_android_rvh_after_dequeue_task __tracepoint_android_rvh_after_enqueue_task @@ -2727,6 +2729,7 @@ __tracepoint_ipi_entry __tracepoint_ipi_raise __tracepoint_irq_handler_entry + __tracepoint_map tracepoint_probe_register tracepoint_probe_register_prio tracepoint_probe_unregister @@ -2736,6 +2739,7 @@ __tracepoint_sched_overutilized_tp __tracepoint_sched_switch __tracepoint_suspend_resume + __tracepoint_unmap trace_print_array_seq trace_print_flags_seq trace_print_hex_seq diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 9703932d9262..7d7c4086de6c 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -118,6 +118,9 @@ CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=16 CONFIG_READ_ONLY_THP_FOR_FS=y +CONFIG_DAMON=y +CONFIG_DAMON_PADDR=y +CONFIG_DAMON_RECLAIM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 8525a0e365c5..14ea69c0d417 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -94,6 +94,9 @@ CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=16 CONFIG_READ_ONLY_THP_FOR_FS=y +CONFIG_DAMON=y +CONFIG_DAMON_PADDR=y +CONFIG_DAMON_RECLAIM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h index 97bbb4a9083a..05b48b33baf0 100644 --- a/arch/x86/include/asm/kfence.h +++ b/arch/x86/include/asm/kfence.h @@ -56,8 +56,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) else set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - /* Flush this CPU's TLB. */ + /* + * Flush this CPU's TLB, assuming whoever did the allocation/free is + * likely to continue running on this CPU. + */ + preempt_disable(); flush_tlb_one_kernel(addr); + preempt_enable(); return true; } diff --git a/build.config.common b/build.config.common index b321ae20ecea..9ec1c637a79f 100644 --- a/build.config.common +++ b/build.config.common @@ -7,6 +7,7 @@ DTC=dtc CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r416183b/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 +KCFLAGS="${KCFLAGS} -D__ANDROID_COMMON_KERNEL__" EXTRA_CMDS='' STOP_SHIP_TRACEPRINTK=1 IN_KERNEL_MODULES=1 diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 2ae0b6eecc37..1ca42b0dfd4d 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -99,6 +99,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_binder_restore_priority); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_binder_wakeup_ilocked); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_do_send_sig_info); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_process_killed); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_killed_process); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_init); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_wake); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_write_finished); @@ -113,6 +114,11 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_read_wait_start); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_read_wait_finish); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_write_wait_start); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_write_wait_finish); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_set_owner); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_set_reader_owned); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_mark_wake_readers); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_up_read_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_up_write_end); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sched_show_task); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_shmem_alloc_page); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cpu_idle_enter); @@ -236,6 +242,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sync_txn_recvd); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_build_sched_domains); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alter_mutex_list_add); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_unlock_slowpath); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_unlock_slowpath_end); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_wake_finish); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_undefinstr); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_ptrauth_fault); @@ -397,3 +404,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_binder_has_work_ilocked); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_binder_read_done); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_handle_tlb_conf); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_shrink_node_memcgs); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ra_tuning_max_page); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_tune_memcg_scan_type); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 018fb172a098..132b019695f3 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -827,9 +827,17 @@ static int virtblk_probe(struct virtio_device *vdev) err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, struct virtio_blk_config, blk_size, &blk_size); - if (!err) + if (!err) { + err = blk_validate_block_size(blk_size); + if (err) { + dev_err(&vdev->dev, + "virtio_blk: invalid block size: 0x%x\n", + blk_size); + goto out_cleanup_disk; + } + blk_queue_logical_block_size(q, blk_size); - else + } else blk_size = queue_logical_block_size(q); /* Use topology information if available */ @@ -890,6 +898,8 @@ static int virtblk_probe(struct virtio_device *vdev) device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); return 0; +out_cleanup_disk: + blk_cleanup_queue(vblk->disk->queue); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_put_disk: diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1d320eec94aa..ed6902eb8d91 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -268,11 +268,13 @@ int iommu_probe_device(struct device *dev) * support default domains, so the return value is not yet * checked. */ + mutex_lock(&group->mutex); iommu_alloc_default_domain(group, dev); if (group->default_domain) { ret = __iommu_attach_device(group->default_domain, dev); if (ret) { + mutex_unlock(&group->mutex); iommu_group_put(group); goto err_release; } @@ -280,6 +282,7 @@ int iommu_probe_device(struct device *dev) iommu_create_device_direct_mappings(group, dev); + mutex_unlock(&group->mutex); iommu_group_put(group); if (ops->probe_finalize) diff --git a/drivers/md/dm-bow.c b/drivers/md/dm-bow.c index cfd1fa63ff97..2e95d6149692 100644 --- a/drivers/md/dm-bow.c +++ b/drivers/md/dm-bow.c @@ -599,6 +599,7 @@ static void dm_bow_dtr(struct dm_target *ti) struct bow_context *bc = (struct bow_context *) ti->private; struct kobject *kobj; + mutex_lock(&bc->ranges_lock); while (rb_first(&bc->ranges)) { struct bow_range *br = container_of(rb_first(&bc->ranges), struct bow_range, node); @@ -606,6 +607,8 @@ static void dm_bow_dtr(struct dm_target *ti) rb_erase(&br->node, &bc->ranges); kfree(br); } + mutex_unlock(&bc->ranges_lock); + if (bc->workqueue) destroy_workqueue(bc->workqueue); if (bc->bufio) @@ -1181,6 +1184,7 @@ static void dm_bow_tablestatus(struct dm_target *ti, char *result, return; } + mutex_lock(&bc->ranges_lock); for (i = rb_first(&bc->ranges); i; i = rb_next(i)) { struct bow_range *br = container_of(i, struct bow_range, node); @@ -1188,11 +1192,11 @@ static void dm_bow_tablestatus(struct dm_target *ti, char *result, readable_type[br->type], (unsigned long long)br->sector); if (result >= end) - return; + goto unlock; result += scnprintf(result, end - result, "\n"); if (result >= end) - return; + goto unlock; if (br->type == TRIMMED) ++trimmed_range_count; @@ -1214,19 +1218,22 @@ static void dm_bow_tablestatus(struct dm_target *ti, char *result, if (!rb_next(i)) { scnprintf(result, end - result, "\nERROR: Last range not of type TOP"); - return; + goto unlock; } if (br->sector > range_top(br)) { scnprintf(result, end - result, "\nERROR: sectors out of order"); - return; + goto unlock; } } if (trimmed_range_count != trimmed_list_length) scnprintf(result, end - result, "\nERROR: not all trimmed ranges in trimmed list"); + +unlock: + mutex_unlock(&bc->ranges_lock); } static void dm_bow_status(struct dm_target *ti, status_type_t type, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 10eb0544f1cb..9fd176e225af 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -608,18 +608,17 @@ static void start_io_acct(struct dm_io *io) false, 0, &io->stats_aux); } -static void end_io_acct(struct dm_io *io) +static void end_io_acct(struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) { - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; - unsigned long duration = jiffies - io->start_time; + unsigned long duration = jiffies - start_time; - bio_end_io_acct(bio, io->start_time); + bio_end_io_acct(bio, start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), - true, duration, &io->stats_aux); + true, duration, stats_aux); /* nudge anyone waiting on suspend queue */ if (unlikely(wq_has_sleeper(&md->wait))) @@ -904,6 +903,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error) blk_status_t io_error; struct bio *bio; struct mapped_device *md = io->md; + unsigned long start_time = 0; + struct dm_stats_aux stats_aux; /* Push-back supersedes any I/O errors */ if (unlikely(error)) { @@ -930,8 +931,10 @@ static void dec_pending(struct dm_io *io, blk_status_t error) io_error = io->status; bio = io->orig_bio; - end_io_acct(io); + start_time = io->start_time; + stats_aux = io->stats_aux; free_io(md, io); + end_io_acct(md, bio, start_time, &stats_aux); if (io_error == BLK_STS_DM_REQUEUE) return; diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index ca7c55d6a41d..985e00aee4ee 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -670,9 +670,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, atomic_inc(&priv->active_tx_urbs); err = usb_submit_urb(urb, GFP_ATOMIC); - if (unlikely(err)) - goto failed; - else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) + if (unlikely(err)) { + can_free_echo_skb(netdev, context->echo_index); + + usb_unanchor_urb(urb); + usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); + + atomic_dec(&priv->active_tx_urbs); + + if (err == -ENODEV) + netif_device_detach(netdev); + else + netdev_warn(netdev, "failed tx_urb %d\n", err); + stats->tx_dropped++; + } else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) /* Slow down tx path */ netif_stop_queue(netdev); @@ -691,19 +702,6 @@ nofreecontext: return NETDEV_TX_BUSY; -failed: - can_free_echo_skb(netdev, context->echo_index); - - usb_unanchor_urb(urb); - usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); - - atomic_dec(&priv->active_tx_urbs); - - if (err == -ENODEV) - netif_device_detach(netdev); - else - netdev_warn(netdev, "failed tx_urb %d\n", err); - nomembuf: usb_free_urb(urb); diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c index b5a1e3b697d9..581930483ef8 100644 --- a/drivers/remoteproc/remoteproc_debugfs.c +++ b/drivers/remoteproc/remoteproc_debugfs.c @@ -76,7 +76,7 @@ static ssize_t rproc_coredump_write(struct file *filp, int ret, err = 0; char buf[20]; - if (count > sizeof(buf)) + if (count < 1 || count > sizeof(buf)) return -EINVAL; ret = copy_from_user(buf, user_buf, count); diff --git a/drivers/staging/android/ion/ion_buffer.c b/drivers/staging/android/ion/ion_buffer.c index 9baca1a472b6..9c7a353a4220 100644 --- a/drivers/staging/android/ion/ion_buffer.c +++ b/drivers/staging/android/ion/ion_buffer.c @@ -249,6 +249,9 @@ void *ion_buffer_kmap_get(struct ion_buffer *buffer) void *vaddr; if (buffer->kmap_cnt) { + if (buffer->kmap_cnt == INT_MAX) + return ERR_PTR(-EOVERFLOW); + buffer->kmap_cnt++; return buffer->vaddr; } diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index cb514a3bd449..ca0966badf42 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -114,8 +114,6 @@ void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) dwc->current_dr_role = mode; } -static int dwc3_core_soft_reset(struct dwc3 *dwc); - static void __dwc3_set_mode(struct work_struct *work) { struct dwc3 *dwc = work_to_dwc(work); @@ -260,7 +258,7 @@ u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type) * dwc3_core_soft_reset - Issues core soft reset and PHY reset * @dwc: pointer to our context structure */ -static int dwc3_core_soft_reset(struct dwc3 *dwc) +int dwc3_core_soft_reset(struct dwc3 *dwc) { u32 reg; int retries = 1000; diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 44f1364d1682..28df4e8b9359 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -729,6 +729,7 @@ struct dwc3_ep { #define DWC3_EP_FIRST_STREAM_PRIMED BIT(10) #define DWC3_EP_PENDING_CLEAR_STALL BIT(11) #define DWC3_EP_TXFIFO_RESIZED BIT(12) +#define DWC3_EP_DELAY_STOP BIT(13) /* This last one is specific to EP0 */ #define DWC3_EP0_DIR_IN BIT(31) @@ -1539,6 +1540,8 @@ bool dwc3_has_imod(struct dwc3 *dwc); int dwc3_event_buffers_setup(struct dwc3 *dwc); void dwc3_event_buffers_cleanup(struct dwc3 *dwc); +int dwc3_core_soft_reset(struct dwc3 *dwc); + #if IS_ENABLED(CONFIG_USB_DWC3_HOST) || IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) int dwc3_host_init(struct dwc3 *dwc); void dwc3_host_exit(struct dwc3 *dwc); diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 658739410992..1064be5518f6 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -271,6 +271,7 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) { struct dwc3_ep *dep; int ret; + int i; complete(&dwc->ep0_in_setup); @@ -279,6 +280,19 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) DWC3_TRBCTL_CONTROL_SETUP, false); ret = dwc3_ep0_start_trans(dep); WARN_ON(ret < 0); + for (i = 2; i < DWC3_ENDPOINTS_NUM; i++) { + struct dwc3_ep *dwc3_ep; + + dwc3_ep = dwc->eps[i]; + if (!dwc3_ep) + continue; + + if (!(dwc3_ep->flags & DWC3_EP_DELAY_STOP)) + continue; + + dwc3_ep->flags &= ~DWC3_EP_DELAY_STOP; + dwc3_stop_active_transfer(dwc3_ep, true, true); + } } static struct dwc3_ep *dwc3_wIndex_to_dep(struct dwc3 *dwc, __le16 wIndex_le) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 93759e839267..74d6c30c3b54 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -769,7 +769,8 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) num_fifos = 3; if (dep->endpoint.maxburst > 6 && - usb_endpoint_xfer_bulk(dep->endpoint.desc) && DWC3_IP_IS(DWC31)) + (usb_endpoint_xfer_bulk(dep->endpoint.desc) || + usb_endpoint_xfer_isoc(dep->endpoint.desc)) && DWC3_IP_IS(DWC31)) num_fifos = dwc->tx_fifo_resize_max_num; /* FIFO size for a single buffer */ @@ -1805,7 +1806,13 @@ static int __dwc3_gadget_start_isoc(struct dwc3_ep *dep) } for (i = 0; i < DWC3_ISOC_MAX_RETRIES; i++) { - dep->frame_number = DWC3_ALIGN_FRAME(dep, i + 1); + int future_interval = i + 1; + + /* Give the controller at least 500us to schedule transfers */ + if (desc->bInterval < 3) + future_interval += 3 - desc->bInterval; + + dep->frame_number = DWC3_ALIGN_FRAME(dep, future_interval); ret = __dwc3_gadget_kick_transfer(dep); if (ret != -EAGAIN) @@ -1874,6 +1881,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) */ if ((dep->flags & DWC3_EP_END_TRANSFER_PENDING) || (dep->flags & DWC3_EP_WEDGE) || + (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_STALL)) { dep->flags |= DWC3_EP_DELAY_START; return 0; @@ -1953,10 +1961,10 @@ static void dwc3_gadget_ep_skip_trbs(struct dwc3_ep *dep, struct dwc3_request *r static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) { struct dwc3_request *req; - struct dwc3_request *tmp; struct dwc3 *dwc = dep->dwc; - list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) { + while (!list_empty(&dep->cancelled_list)) { + req = next_request(&dep->cancelled_list); dwc3_gadget_ep_skip_trbs(dep, req); switch (req->status) { case DWC3_REQUEST_STATUS_DISCONNECTED: @@ -1973,6 +1981,12 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) dwc3_gadget_giveback(dep, req, -ECONNRESET); break; } + /* + * The endpoint is disabled, let the dwc3_remove_requests() + * handle the cleanup. + */ + if (!dep->endpoint.desc) + break; } } @@ -2008,6 +2022,16 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, if (r == req) { struct dwc3_request *t; + /* + * If a Setup packet is received but yet to DMA out, the controller will + * not process the End Transfer command of any endpoint. Polling of its + * DEPCMD.CmdAct may block setting up TRB for Setup packet, causing a + * timeout. Delay issuing the End Transfer command until the Setup TRB is + * prepared. + */ + if (dwc->ep0state != EP0_SETUP_PHASE && !dwc->delayed_status) + dep->flags |= DWC3_EP_DELAY_STOP; + /* wait until it is processed */ dwc3_stop_active_transfer(dep, true, true); @@ -2089,7 +2113,8 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) if (!list_empty(&dep->started_list)) dep->flags |= DWC3_EP_DELAY_START; - if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) { + if (dep->flags & DWC3_EP_END_TRANSFER_PENDING || + (dep->flags & DWC3_EP_DELAY_STOP)) { dep->flags |= DWC3_EP_PENDING_CLEAR_STALL; return 0; } @@ -2520,6 +2545,17 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) dwc->ev_buf->length; } } else { + /* + * In the Synopsys DWC_usb31 1.90a programming guide section + * 4.1.9, it specifies that for a reconnect after a + * device-initiated disconnect requires a core soft reset + * (DCTL.CSftRst) before enabling the run/stop bit. + */ + spin_unlock_irqrestore(&dwc->lock, flags); + dwc3_core_soft_reset(dwc); + spin_lock_irqsave(&dwc->lock, flags); + + dwc3_event_buffers_setup(dwc); __dwc3_gadget_start(dwc); } @@ -3241,15 +3277,21 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, const struct dwc3_event_depevt *event, int status) { struct dwc3_request *req; - struct dwc3_request *tmp; - list_for_each_entry_safe(req, tmp, &dep->started_list, list) { + while (!list_empty(&dep->started_list)) { int ret; + req = next_request(&dep->started_list); ret = dwc3_gadget_ep_cleanup_completed_request(dep, event, req, status); if (ret) break; + /* + * The endpoint is disabled, let the dwc3_remove_requests() + * handle the cleanup. + */ + if (!dep->endpoint.desc) + break; } } @@ -3585,6 +3627,7 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, int ret; if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || + (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) return; diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index 77df4b6d6c13..f763380e672e 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -116,6 +116,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request, gfp_t gfp_flags); int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol); void dwc3_ep0_send_delayed_status(struct dwc3 *dwc); +void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt); /** * dwc3_gadget_ep_get_transfer_index - Gets transfer index from HW diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 553382ce3837..9315313108c9 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -159,6 +159,8 @@ int config_ep_by_speed_and_alt(struct usb_gadget *g, int want_comp_desc = 0; struct usb_descriptor_header **d_spd; /* cursor for speed desc */ + struct usb_composite_dev *cdev; + bool incomplete_desc = false; if (!g || !f || !_ep) return -EIO; @@ -167,28 +169,43 @@ int config_ep_by_speed_and_alt(struct usb_gadget *g, switch (g->speed) { case USB_SPEED_SUPER_PLUS: if (gadget_is_superspeed_plus(g)) { - speed_desc = f->ssp_descriptors; - want_comp_desc = 1; - break; + if (f->ssp_descriptors) { + speed_desc = f->ssp_descriptors; + want_comp_desc = 1; + break; + } + incomplete_desc = true; } fallthrough; case USB_SPEED_SUPER: if (gadget_is_superspeed(g)) { - speed_desc = f->ss_descriptors; - want_comp_desc = 1; - break; + if (f->ss_descriptors) { + speed_desc = f->ss_descriptors; + want_comp_desc = 1; + break; + } + incomplete_desc = true; } fallthrough; case USB_SPEED_HIGH: if (gadget_is_dualspeed(g)) { - speed_desc = f->hs_descriptors; - break; + if (f->hs_descriptors) { + speed_desc = f->hs_descriptors; + break; + } + incomplete_desc = true; } fallthrough; default: speed_desc = f->fs_descriptors; } + cdev = get_gadget_data(g); + if (incomplete_desc) + WARNING(cdev, + "%s doesn't hold the descriptors for current speed\n", + f->name); + /* find correct alternate setting descriptor */ for_each_desc(speed_desc, d_spd, USB_DT_INTERFACE) { int_desc = (struct usb_interface_descriptor *)*d_spd; @@ -244,12 +261,8 @@ ep_found: _ep->maxburst = comp_desc->bMaxBurst + 1; break; default: - if (comp_desc->bMaxBurst != 0) { - struct usb_composite_dev *cdev; - - cdev = get_gadget_data(g); + if (comp_desc->bMaxBurst != 0) ERROR(cdev, "ep0 bMaxBurst must be 0\n"); - } _ep->maxburst = 1; break; } diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 2d7b7cd6d3a6..21dd268175c6 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1530,6 +1530,8 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) usb_ep_autoconfig_reset(cdev->gadget); spin_lock_irqsave(&gi->spinlock, flags); cdev->gadget = NULL; + cdev->deactivations = 0; + gadget->deactivated = false; set_gadget_data(gadget, NULL); spin_unlock_irqrestore(&gi->spinlock, flags); } diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index f48a00e49794..875df9ef8887 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -44,7 +44,7 @@ MODULE_PARM_DESC(trace, "Trace level bitmask"); #define UVC_STRING_STREAMING_IDX 1 static struct usb_string uvc_en_us_strings[] = { - [UVC_STRING_CONTROL_IDX].s = "UVC Camera", + /* [UVC_STRING_CONTROL_IDX].s = DYNAMIC, */ [UVC_STRING_STREAMING_IDX].s = "Video Streaming", { } }; @@ -674,6 +674,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) uvc_hs_streaming_ep.bEndpointAddress = uvc->video.ep->address; uvc_ss_streaming_ep.bEndpointAddress = uvc->video.ep->address; + uvc_en_us_strings[UVC_STRING_CONTROL_IDX].s = opts->function_name; us = usb_gstrings_attach(cdev, uvc_function_strings, ARRAY_SIZE(uvc_en_us_strings)); if (IS_ERR(us)) { @@ -864,6 +865,7 @@ static struct usb_function_instance *uvc_alloc_inst(void) opts->streaming_interval = 1; opts->streaming_maxpacket = 1024; + snprintf(opts->function_name, sizeof(opts->function_name), "UVC Camera"); ret = uvcg_attach_configfs(opts); if (ret < 0) { @@ -887,13 +889,37 @@ static void uvc_unbind(struct usb_configuration *c, struct usb_function *f) { struct usb_composite_dev *cdev = c->cdev; struct uvc_device *uvc = to_uvc(f); + long wait_ret = 1; uvcg_info(f, "%s\n", __func__); + /* If we know we're connected via v4l2, then there should be a cleanup + * of the device from userspace either via UVC_EVENT_DISCONNECT or + * though the video device removal uevent. Allow some time for the + * application to close out before things get deleted. + */ + if (uvc->func_connected) { + uvcg_dbg(f, "waiting for clean disconnect\n"); + wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, + uvc->func_connected == false, msecs_to_jiffies(500)); + uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret); + } + device_remove_file(&uvc->vdev.dev, &dev_attr_function_name); video_unregister_device(&uvc->vdev); v4l2_device_unregister(&uvc->v4l2_dev); + if (uvc->func_connected) { + /* Wait for the release to occur to ensure there are no longer any + * pending operations that may cause panics when resources are cleaned + * up. + */ + uvcg_warn(f, "%s no clean disconnect, wait for release\n", __func__); + wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, + uvc->func_connected == false, msecs_to_jiffies(1000)); + uvcg_dbg(f, "done waiting for release with ret: %ld\n", wait_ret); + } + usb_ep_free_request(cdev->gadget->ep0, uvc->control_req); kfree(uvc->control_buf); @@ -912,6 +938,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi) mutex_init(&uvc->video.mutex); uvc->state = UVC_STATE_DISCONNECTED; + init_waitqueue_head(&uvc->func_connected_queue); opts = fi_to_f_uvc_opts(fi); mutex_lock(&opts->lock); diff --git a/drivers/usb/gadget/function/u_uvc.h b/drivers/usb/gadget/function/u_uvc.h index 9a01a7d4f17f..24b8681b0d6f 100644 --- a/drivers/usb/gadget/function/u_uvc.h +++ b/drivers/usb/gadget/function/u_uvc.h @@ -27,6 +27,7 @@ struct f_uvc_opts { unsigned int control_interface; unsigned int streaming_interface; + char function_name[32]; /* * Control descriptors array pointers for full-/high-speed and diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h index 893aaa70f81a..59c5a67b5ff3 100644 --- a/drivers/usb/gadget/function/uvc.h +++ b/drivers/usb/gadget/function/uvc.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -65,13 +66,17 @@ extern unsigned int uvc_gadget_trace_param; * Driver specific constants */ -#define UVC_NUM_REQUESTS 4 #define UVC_MAX_REQUEST_SIZE 64 #define UVC_MAX_EVENTS 4 /* ------------------------------------------------------------------------ * Structures */ +struct uvc_request { + struct usb_request *req; + u8 *req_buffer; + struct uvc_video *video; +}; struct uvc_video { struct uvc_device *uvc; @@ -87,10 +92,11 @@ struct uvc_video { unsigned int imagesize; struct mutex mutex; /* protects frame parameters */ + unsigned int uvc_num_requests; + /* Requests */ unsigned int req_size; - struct usb_request *req[UVC_NUM_REQUESTS]; - __u8 *req_buffer[UVC_NUM_REQUESTS]; + struct uvc_request *ureq; struct list_head req_free; spinlock_t req_lock; @@ -118,6 +124,7 @@ struct uvc_device { struct usb_function func; struct uvc_video video; bool func_connected; + wait_queue_head_t func_connected_queue; /* Descriptors */ struct { diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 00fb58e50a15..9fafa8001d33 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2430,10 +2430,51 @@ UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, 15); #undef UVCG_OPTS_ATTR +#define UVCG_OPTS_STRING_ATTR(cname, aname) \ +static ssize_t f_uvc_opts_string_##cname##_show(struct config_item *item,\ + char *page) \ +{ \ + struct f_uvc_opts *opts = to_f_uvc_opts(item); \ + int result; \ + \ + mutex_lock(&opts->lock); \ + result = snprintf(page, sizeof(opts->aname), "%s", opts->aname);\ + mutex_unlock(&opts->lock); \ + \ + return result; \ +} \ + \ +static ssize_t f_uvc_opts_string_##cname##_store(struct config_item *item,\ + const char *page, size_t len) \ +{ \ + struct f_uvc_opts *opts = to_f_uvc_opts(item); \ + int ret = 0; \ + \ + mutex_lock(&opts->lock); \ + if (opts->refcnt) { \ + ret = -EBUSY; \ + goto end; \ + } \ + \ + ret = snprintf(opts->aname, min(sizeof(opts->aname), len), \ + "%s", page); \ + \ +end: \ + mutex_unlock(&opts->lock); \ + return ret; \ +} \ + \ +UVC_ATTR(f_uvc_opts_string_, cname, aname) + +UVCG_OPTS_STRING_ATTR(function_name, function_name); + +#undef UVCG_OPTS_STRING_ATTR + static struct configfs_attribute *uvc_attrs[] = { &f_uvc_opts_attr_streaming_interval, &f_uvc_opts_attr_streaming_maxpacket, &f_uvc_opts_attr_streaming_maxburst, + &f_uvc_opts_string_attr_function_name, NULL, }; diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c index 61e2c94cc0b0..51c83eea8318 100644 --- a/drivers/usb/gadget/function/uvc_queue.c +++ b/drivers/usb/gadget/function/uvc_queue.c @@ -43,6 +43,7 @@ static int uvc_queue_setup(struct vb2_queue *vq, { struct uvc_video_queue *queue = vb2_get_drv_priv(vq); struct uvc_video *video = container_of(queue, struct uvc_video, queue); + struct usb_composite_dev *cdev = video->uvc->func.config->cdev; if (*nbuffers > UVC_MAX_VIDEO_BUFFERS) *nbuffers = UVC_MAX_VIDEO_BUFFERS; @@ -51,6 +52,11 @@ static int uvc_queue_setup(struct vb2_queue *vq, sizes[0] = video->imagesize; + if (cdev->gadget->speed < USB_SPEED_SUPER) + video->uvc_num_requests = 4; + else + video->uvc_num_requests = 64; + return 0; } @@ -163,18 +169,7 @@ int uvcg_query_buffer(struct uvc_video_queue *queue, struct v4l2_buffer *buf) int uvcg_queue_buffer(struct uvc_video_queue *queue, struct v4l2_buffer *buf) { - unsigned long flags; - int ret; - - ret = vb2_qbuf(&queue->queue, NULL, buf); - if (ret < 0) - return ret; - - spin_lock_irqsave(&queue->irqlock, flags); - ret = (queue->flags & UVC_QUEUE_PAUSED) != 0; - queue->flags &= ~UVC_QUEUE_PAUSED; - spin_unlock_irqrestore(&queue->irqlock, flags); - return ret; + return vb2_qbuf(&queue->queue, NULL, buf); } /* @@ -242,6 +237,8 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect) buf->state = UVC_BUF_STATE_ERROR; vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR); } + queue->buf_used = 0; + /* This must be protected by the irqlock spinlock to avoid race * conditions between uvc_queue_buffer and the disconnection event that * could result in an interruptible wait in uvc_dequeue_buffer. Do not @@ -340,8 +337,6 @@ struct uvc_buffer *uvcg_queue_head(struct uvc_video_queue *queue) if (!list_empty(&queue->irqqueue)) buf = list_first_entry(&queue->irqqueue, struct uvc_buffer, queue); - else - queue->flags |= UVC_QUEUE_PAUSED; return buf; } diff --git a/drivers/usb/gadget/function/uvc_queue.h b/drivers/usb/gadget/function/uvc_queue.h index 2f0fff769843..4338ff871ee6 100644 --- a/drivers/usb/gadget/function/uvc_queue.h +++ b/drivers/usb/gadget/function/uvc_queue.h @@ -40,7 +40,6 @@ struct uvc_buffer { #define UVC_QUEUE_DISCONNECTED (1 << 0) #define UVC_QUEUE_DROP_INCOMPLETE (1 << 1) -#define UVC_QUEUE_PAUSED (1 << 2) struct uvc_video_queue { struct vb2_queue queue; diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index 197c26f7aec6..fd8f73bb726d 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -169,7 +169,8 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b) if (ret < 0) return ret; - schedule_work(&video->pump); + if (uvc->state == UVC_STATE_STREAMING) + schedule_work(&video->pump); return ret; } @@ -252,10 +253,11 @@ uvc_v4l2_subscribe_event(struct v4l2_fh *fh, static void uvc_v4l2_disable(struct uvc_device *uvc) { - uvc->func_connected = false; uvc_function_disconnect(uvc); uvcg_video_enable(&uvc->video, 0); uvcg_free_buffers(&uvc->video.queue); + uvc->func_connected = false; + wake_up_interruptible(&uvc->func_connected_queue); } static int diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 633e23d58d86..e5e2c2bae3b9 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -134,9 +134,12 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req) uvcg_err(&video->uvc->func, "Failed to queue request (%d).\n", ret); - /* Isochronous endpoints can't be halted. */ - if (usb_endpoint_xfer_bulk(video->ep->desc)) - usb_ep_set_halt(video->ep); + /* If the endpoint is disabled the descriptor may be NULL. */ + if (video->ep->desc) { + /* Isochronous endpoints can't be halted. */ + if (usb_endpoint_xfer_bulk(video->ep->desc)) + usb_ep_set_halt(video->ep); + } } return ret; @@ -145,8 +148,10 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req) static void uvc_video_complete(struct usb_ep *ep, struct usb_request *req) { - struct uvc_video *video = req->context; + struct uvc_request *ureq = req->context; + struct uvc_video *video = ureq->video; struct uvc_video_queue *queue = &video->queue; + struct uvc_device *uvc = video->uvc; unsigned long flags; switch (req->status) { @@ -169,7 +174,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req) list_add_tail(&req->list, &video->req_free); spin_unlock_irqrestore(&video->req_lock, flags); - schedule_work(&video->pump); + if (uvc->state == UVC_STATE_STREAMING) + schedule_work(&video->pump); } static int @@ -177,16 +183,21 @@ uvc_video_free_requests(struct uvc_video *video) { unsigned int i; - for (i = 0; i < UVC_NUM_REQUESTS; ++i) { - if (video->req[i]) { - usb_ep_free_request(video->ep, video->req[i]); - video->req[i] = NULL; + if (video->ureq) { + for (i = 0; i < video->uvc_num_requests; ++i) { + if (video->ureq[i].req) { + usb_ep_free_request(video->ep, video->ureq[i].req); + video->ureq[i].req = NULL; + } + + if (video->ureq[i].req_buffer) { + kfree(video->ureq[i].req_buffer); + video->ureq[i].req_buffer = NULL; + } } - if (video->req_buffer[i]) { - kfree(video->req_buffer[i]); - video->req_buffer[i] = NULL; - } + kfree(video->ureq); + video->ureq = NULL; } INIT_LIST_HEAD(&video->req_free); @@ -207,21 +218,26 @@ uvc_video_alloc_requests(struct uvc_video *video) * max_t(unsigned int, video->ep->maxburst, 1) * (video->ep->mult); - for (i = 0; i < UVC_NUM_REQUESTS; ++i) { - video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL); - if (video->req_buffer[i] == NULL) + video->ureq = kcalloc(video->uvc_num_requests, sizeof(struct uvc_request), GFP_KERNEL); + if (video->ureq == NULL) + return -ENOMEM; + + for (i = 0; i < video->uvc_num_requests; ++i) { + video->ureq[i].req_buffer = kmalloc(req_size, GFP_KERNEL); + if (video->ureq[i].req_buffer == NULL) goto error; - video->req[i] = usb_ep_alloc_request(video->ep, GFP_KERNEL); - if (video->req[i] == NULL) + video->ureq[i].req = usb_ep_alloc_request(video->ep, GFP_KERNEL); + if (video->ureq[i].req == NULL) goto error; - video->req[i]->buf = video->req_buffer[i]; - video->req[i]->length = 0; - video->req[i]->complete = uvc_video_complete; - video->req[i]->context = video; + video->ureq[i].req->buf = video->ureq[i].req_buffer; + video->ureq[i].req->length = 0; + video->ureq[i].req->complete = uvc_video_complete; + video->ureq[i].req->context = &video->ureq[i]; + video->ureq[i].video = video; - list_add_tail(&video->req[i]->list, &video->req_free); + list_add_tail(&video->ureq[i].req->list, &video->req_free); } video->req_size = req_size; @@ -247,12 +263,12 @@ static void uvcg_video_pump(struct work_struct *work) { struct uvc_video *video = container_of(work, struct uvc_video, pump); struct uvc_video_queue *queue = &video->queue; - struct usb_request *req; + struct usb_request *req = NULL; struct uvc_buffer *buf; unsigned long flags; int ret; - while (1) { + while (video->ep->enabled) { /* Retrieve the first available USB request, protected by the * request lock. */ @@ -288,6 +304,9 @@ static void uvcg_video_pump(struct work_struct *work) } } + if (!req) + return; + spin_lock_irqsave(&video->req_lock, flags); list_add_tail(&req->list, &video->req_free); spin_unlock_irqrestore(&video->req_lock, flags); @@ -312,9 +331,9 @@ int uvcg_video_enable(struct uvc_video *video, int enable) cancel_work_sync(&video->pump); uvcg_queue_cancel(&video->queue, 0); - for (i = 0; i < UVC_NUM_REQUESTS; ++i) - if (video->req[i]) - usb_ep_dequeue(video->ep, video->req[i]); + for (i = 0; i < video->uvc_num_requests; ++i) + if (video->ureq && video->ureq[i].req) + usb_ep_dequeue(video->ep, video->ureq[i].req); uvc_video_free_requests(video); uvcg_queue_enable(&video->queue, 0); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c0b679216727..a6b3a5553fde 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2036,6 +2036,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; + /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2639,6 +2648,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); + /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c93b0154dd51..e9a1543ba7cb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -950,7 +950,17 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, while (count) { if (cs->write && cs->pipebufs && page) { - return fuse_ref_page(cs, page, offset, count); + /* + * Can't control lifetime of pipe buffers, so always + * copy user pages. + */ + if (cs->req->args->user_pages) { + err = fuse_copy_fill(cs); + if (err) + return err; + } else { + return fuse_ref_page(cs, page, offset, count); + } } else if (!cs->len) { if (cs->move_pages && page && offset == 0 && count == PAGE_SIZE) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 97a40476d1bd..a68c8310942f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1420,6 +1420,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, (PAGE_SIZE - ret) & (PAGE_SIZE - 1); } + ap->args.user_pages = true; if (write) ap->args.in_pages = true; else diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc5fcd8e0671..69a631c12b15 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -277,6 +277,7 @@ struct fuse_args { bool nocreds:1; bool in_pages:1; bool out_pages:1; + bool user_pages:1; bool out_argvar:1; bool page_zeroing:1; bool page_replace:1; diff --git a/include/linux/damon.h b/include/linux/damon.h new file mode 100644 index 000000000000..5e1e3a128b77 --- /dev/null +++ b/include/linux/damon.h @@ -0,0 +1,511 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DAMON api + * + * Author: SeongJae Park + */ + +#ifndef _DAMON_H_ +#define _DAMON_H_ + +#include +#include +#include +#include + +/* Minimal region size. Every damon_region is aligned by this. */ +#define DAMON_MIN_REGION PAGE_SIZE +/* Max priority score for DAMON-based operation schemes */ +#define DAMOS_MAX_SCORE (99) + +/* Get a random number in [l, r) */ +static inline unsigned long damon_rand(unsigned long l, unsigned long r) +{ + return l + prandom_u32_max(r - l); +} + +/** + * struct damon_addr_range - Represents an address region of [@start, @end). + * @start: Start address of the region (inclusive). + * @end: End address of the region (exclusive). + */ +struct damon_addr_range { + unsigned long start; + unsigned long end; +}; + +/** + * struct damon_region - Represents a monitoring target region. + * @ar: The address range of the region. + * @sampling_addr: Address of the sample for the next access check. + * @nr_accesses: Access frequency of this region. + * @list: List head for siblings. + * @age: Age of this region. + * + * @age is initially zero, increased for each aggregation interval, and reset + * to zero again if the access frequency is significantly changed. If two + * regions are merged into a new region, both @nr_accesses and @age of the new + * region are set as region size-weighted average of those of the two regions. + */ +struct damon_region { + struct damon_addr_range ar; + unsigned long sampling_addr; + unsigned int nr_accesses; + struct list_head list; + + unsigned int age; +/* private: Internal value for age calculation. */ + unsigned int last_nr_accesses; +}; + +/** + * struct damon_target - Represents a monitoring target. + * @id: Unique identifier for this target. + * @nr_regions: Number of monitoring target regions of this target. + * @regions_list: Head of the monitoring target regions of this target. + * @list: List head for siblings. + * + * Each monitoring context could have multiple targets. For example, a context + * for virtual memory address spaces could have multiple target processes. The + * @id of each target should be unique among the targets of the context. For + * example, in the virtual address monitoring context, it could be a pidfd or + * an address of an mm_struct. + */ +struct damon_target { + unsigned long id; + unsigned int nr_regions; + struct list_head regions_list; + struct list_head list; +}; + +/** + * enum damos_action - Represents an action of a Data Access Monitoring-based + * Operation Scheme. + * + * @DAMOS_WILLNEED: Call ``madvise()`` for the region with MADV_WILLNEED. + * @DAMOS_COLD: Call ``madvise()`` for the region with MADV_COLD. + * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. + * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. + * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. + * @DAMOS_STAT: Do nothing but count the stat. + */ +enum damos_action { + DAMOS_WILLNEED, + DAMOS_COLD, + DAMOS_PAGEOUT, + DAMOS_HUGEPAGE, + DAMOS_NOHUGEPAGE, + DAMOS_STAT, /* Do nothing but only record the stat */ +}; + +/** + * struct damos_quota - Controls the aggressiveness of the given scheme. + * @ms: Maximum milliseconds that the scheme can use. + * @sz: Maximum bytes of memory that the action can be applied. + * @reset_interval: Charge reset interval in milliseconds. + * + * @weight_sz: Weight of the region's size for prioritization. + * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. + * @weight_age: Weight of the region's age for prioritization. + * + * To avoid consuming too much CPU time or IO resources for applying the + * &struct damos->action to large memory, DAMON allows users to set time and/or + * size quotas. The quotas can be set by writing non-zero values to &ms and + * &sz, respectively. If the time quota is set, DAMON tries to use only up to + * &ms milliseconds within &reset_interval for applying the action. If the + * size quota is set, DAMON tries to apply the action only up to &sz bytes + * within &reset_interval. + * + * Internally, the time quota is transformed to a size quota using estimated + * throughput of the scheme's action. DAMON then compares it against &sz and + * uses smaller one as the effective quota. + * + * For selecting regions within the quota, DAMON prioritizes current scheme's + * target memory regions using the &struct damon_primitive->get_scheme_score. + * You could customize the prioritization logic by setting &weight_sz, + * &weight_nr_accesses, and &weight_age, because monitoring primitives are + * encouraged to respect those. + */ +struct damos_quota { + unsigned long ms; + unsigned long sz; + unsigned long reset_interval; + + unsigned int weight_sz; + unsigned int weight_nr_accesses; + unsigned int weight_age; + +/* private: */ + /* For throughput estimation */ + unsigned long total_charged_sz; + unsigned long total_charged_ns; + + unsigned long esz; /* Effective size quota in bytes */ + + /* For charging the quota */ + unsigned long charged_sz; + unsigned long charged_from; + struct damon_target *charge_target_from; + unsigned long charge_addr_from; + + /* For prioritization */ + unsigned long histogram[DAMOS_MAX_SCORE + 1]; + unsigned int min_score; +}; + +/** + * enum damos_wmark_metric - Represents the watermark metric. + * + * @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme. + * @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000]. + */ +enum damos_wmark_metric { + DAMOS_WMARK_NONE, + DAMOS_WMARK_FREE_MEM_RATE, +}; + +/** + * struct damos_watermarks - Controls when a given scheme should be activated. + * @metric: Metric for the watermarks. + * @interval: Watermarks check time interval in microseconds. + * @high: High watermark. + * @mid: Middle watermark. + * @low: Low watermark. + * + * If &metric is &DAMOS_WMARK_NONE, the scheme is always active. Being active + * means DAMON does monitoring and applying the action of the scheme to + * appropriate memory regions. Else, DAMON checks &metric of the system for at + * least every &interval microseconds and works as below. + * + * If &metric is higher than &high, the scheme is inactivated. If &metric is + * between &mid and &low, the scheme is activated. If &metric is lower than + * &low, the scheme is inactivated. + */ +struct damos_watermarks { + enum damos_wmark_metric metric; + unsigned long interval; + unsigned long high; + unsigned long mid; + unsigned long low; + +/* private: */ + bool activated; +}; + +/** + * struct damos_stat - Statistics on a given scheme. + * @nr_tried: Total number of regions that the scheme is tried to be applied. + * @sz_tried: Total size of regions that the scheme is tried to be applied. + * @nr_applied: Total number of regions that the scheme is applied. + * @sz_applied: Total size of regions that the scheme is applied. + * @qt_exceeds: Total number of times the quota of the scheme has exceeded. + */ +struct damos_stat { + unsigned long nr_tried; + unsigned long sz_tried; + unsigned long nr_applied; + unsigned long sz_applied; + unsigned long qt_exceeds; +}; + +/** + * struct damos - Represents a Data Access Monitoring-based Operation Scheme. + * @min_sz_region: Minimum size of target regions. + * @max_sz_region: Maximum size of target regions. + * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. + * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. + * @min_age_region: Minimum age of target regions. + * @max_age_region: Maximum age of target regions. + * @action: &damo_action to be applied to the target regions. + * @quota: Control the aggressiveness of this scheme. + * @wmarks: Watermarks for automated (in)activation of this scheme. + * @stat: Statistics of this scheme. + * @list: List head for siblings. + * + * For each aggregation interval, DAMON finds regions which fit in the + * condition (&min_sz_region, &max_sz_region, &min_nr_accesses, + * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to + * those. To avoid consuming too much CPU time or IO resources for the + * &action, "a is used. + * + * To do the work only when needed, schemes can be activated for specific + * system situations using &wmarks. If all schemes that registered to the + * monitoring context are inactive, DAMON stops monitoring either, and just + * repeatedly checks the watermarks. + * + * If all schemes that registered to a &struct damon_ctx are inactive, DAMON + * stops monitoring and just repeatedly checks the watermarks. + * + * After applying the &action to each region, &stat_count and &stat_sz is + * updated to reflect the number of regions and total size of regions that the + * &action is applied. + */ +struct damos { + unsigned long min_sz_region; + unsigned long max_sz_region; + unsigned int min_nr_accesses; + unsigned int max_nr_accesses; + unsigned int min_age_region; + unsigned int max_age_region; + enum damos_action action; + struct damos_quota quota; + struct damos_watermarks wmarks; + struct damos_stat stat; + struct list_head list; +}; + +struct damon_ctx; + +/** + * struct damon_primitive - Monitoring primitives for given use cases. + * + * @init: Initialize primitive-internal data structures. + * @update: Update primitive-internal data structures. + * @prepare_access_checks: Prepare next access check of target regions. + * @check_accesses: Check the accesses to target regions. + * @reset_aggregated: Reset aggregated accesses monitoring results. + * @get_scheme_score: Get the score of a region for a scheme. + * @apply_scheme: Apply a DAMON-based operation scheme. + * @target_valid: Determine if the target is valid. + * @cleanup: Clean up the context. + * + * DAMON can be extended for various address spaces and usages. For this, + * users should register the low level primitives for their target address + * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread + * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting + * the monitoring, @update after each &damon_ctx.primitive_update_interval, and + * @check_accesses, @target_valid and @prepare_access_checks after each + * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each + * &damon_ctx.aggr_interval. + * + * @init should initialize primitive-internal data structures. For example, + * this could be used to construct proper monitoring target regions and link + * those to @damon_ctx.adaptive_targets. + * @update should update the primitive-internal data structures. For example, + * this could be used to update monitoring target regions for current status. + * @prepare_access_checks should manipulate the monitoring regions to be + * prepared for the next access check. + * @check_accesses should check the accesses to each region that made after the + * last preparation and update the number of observed accesses of each region. + * It should also return max number of observed accesses that made as a result + * of its update. The value will be used for regions adjustment threshold. + * @reset_aggregated should reset the access monitoring results that aggregated + * by @check_accesses. + * @get_scheme_score should return the priority score of a region for a scheme + * as an integer in [0, &DAMOS_MAX_SCORE]. + * @apply_scheme is called from @kdamond when a region for user provided + * DAMON-based operation scheme is found. It should apply the scheme's action + * to the region and return bytes of the region that the action is successfully + * applied. + * @target_valid should check whether the target is still valid for the + * monitoring. + * @cleanup is called from @kdamond just before its termination. + */ +struct damon_primitive { + void (*init)(struct damon_ctx *context); + void (*update)(struct damon_ctx *context); + void (*prepare_access_checks)(struct damon_ctx *context); + unsigned int (*check_accesses)(struct damon_ctx *context); + void (*reset_aggregated)(struct damon_ctx *context); + int (*get_scheme_score)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); + unsigned long (*apply_scheme)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); + bool (*target_valid)(void *target); + void (*cleanup)(struct damon_ctx *context); +}; + +/** + * struct damon_callback - Monitoring events notification callbacks. + * + * @before_start: Called before starting the monitoring. + * @after_sampling: Called after each sampling. + * @after_aggregation: Called after each aggregation. + * @before_terminate: Called before terminating the monitoring. + * @private: User private data. + * + * The monitoring thread (&damon_ctx.kdamond) calls @before_start and + * @before_terminate just before starting and finishing the monitoring, + * respectively. Therefore, those are good places for installing and cleaning + * @private. + * + * The monitoring thread calls @after_sampling and @after_aggregation for each + * of the sampling intervals and aggregation intervals, respectively. + * Therefore, users can safely access the monitoring results without additional + * protection. For the reason, users are recommended to use these callback for + * the accesses to the results. + * + * If any callback returns non-zero, monitoring stops. + */ +struct damon_callback { + void *private; + + int (*before_start)(struct damon_ctx *context); + int (*after_sampling)(struct damon_ctx *context); + int (*after_aggregation)(struct damon_ctx *context); + void (*before_terminate)(struct damon_ctx *context); +}; + +/** + * struct damon_ctx - Represents a context for each monitoring. This is the + * main interface that allows users to set the attributes and get the results + * of the monitoring. + * + * @sample_interval: The time between access samplings. + * @aggr_interval: The time between monitor results aggregations. + * @primitive_update_interval: The time between monitoring primitive updates. + * + * For each @sample_interval, DAMON checks whether each region is accessed or + * not. It aggregates and keeps the access information (number of accesses to + * each region) for @aggr_interval time. DAMON also checks whether the target + * memory regions need update (e.g., by ``mmap()`` calls from the application, + * in case of virtual memory monitoring) and applies the changes for each + * @primitive_update_interval. All time intervals are in micro-seconds. + * Please refer to &struct damon_primitive and &struct damon_callback for more + * detail. + * + * @kdamond: Kernel thread who does the monitoring. + * @kdamond_stop: Notifies whether kdamond should stop. + * @kdamond_lock: Mutex for the synchronizations with @kdamond. + * + * For each monitoring context, one kernel thread for the monitoring is + * created. The pointer to the thread is stored in @kdamond. + * + * Once started, the monitoring thread runs until explicitly required to be + * terminated or every monitoring target is invalid. The validity of the + * targets is checked via the &damon_primitive.target_valid of @primitive. The + * termination can also be explicitly requested by writing non-zero to + * @kdamond_stop. The thread sets @kdamond to NULL when it terminates. + * Therefore, users can know whether the monitoring is ongoing or terminated by + * reading @kdamond. Reads and writes to @kdamond and @kdamond_stop from + * outside of the monitoring thread must be protected by @kdamond_lock. + * + * Note that the monitoring thread protects only @kdamond and @kdamond_stop via + * @kdamond_lock. Accesses to other fields must be protected by themselves. + * + * @primitive: Set of monitoring primitives for given use cases. + * @callback: Set of callbacks for monitoring events notifications. + * + * @min_nr_regions: The minimum number of adaptive monitoring regions. + * @max_nr_regions: The maximum number of adaptive monitoring regions. + * @adaptive_targets: Head of monitoring targets (&damon_target) list. + * @schemes: Head of schemes (&damos) list. + */ +struct damon_ctx { + unsigned long sample_interval; + unsigned long aggr_interval; + unsigned long primitive_update_interval; + +/* private: internal use only */ + struct timespec64 last_aggregation; + struct timespec64 last_primitive_update; + +/* public: */ + struct task_struct *kdamond; + struct mutex kdamond_lock; + + struct damon_primitive primitive; + struct damon_callback callback; + + unsigned long min_nr_regions; + unsigned long max_nr_regions; + struct list_head adaptive_targets; + struct list_head schemes; +}; + +static inline struct damon_region *damon_next_region(struct damon_region *r) +{ + return container_of(r->list.next, struct damon_region, list); +} + +static inline struct damon_region *damon_prev_region(struct damon_region *r) +{ + return container_of(r->list.prev, struct damon_region, list); +} + +static inline struct damon_region *damon_last_region(struct damon_target *t) +{ + return list_last_entry(&t->regions_list, struct damon_region, list); +} + +#define damon_for_each_region(r, t) \ + list_for_each_entry(r, &t->regions_list, list) + +#define damon_for_each_region_safe(r, next, t) \ + list_for_each_entry_safe(r, next, &t->regions_list, list) + +#define damon_for_each_target(t, ctx) \ + list_for_each_entry(t, &(ctx)->adaptive_targets, list) + +#define damon_for_each_target_safe(t, next, ctx) \ + list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list) + +#define damon_for_each_scheme(s, ctx) \ + list_for_each_entry(s, &(ctx)->schemes, list) + +#define damon_for_each_scheme_safe(s, next, ctx) \ + list_for_each_entry_safe(s, next, &(ctx)->schemes, list) + +#ifdef CONFIG_DAMON + +struct damon_region *damon_new_region(unsigned long start, unsigned long end); + +/* + * Add a region between two other regions + */ +static inline void damon_insert_region(struct damon_region *r, + struct damon_region *prev, struct damon_region *next, + struct damon_target *t) +{ + __list_add(&r->list, &prev->list, &next->list); + t->nr_regions++; +} + +void damon_add_region(struct damon_region *r, struct damon_target *t); +void damon_destroy_region(struct damon_region *r, struct damon_target *t); + +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks); +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); +void damon_destroy_scheme(struct damos *s); + +struct damon_target *damon_new_target(unsigned long id); +void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); +bool damon_targets_empty(struct damon_ctx *ctx); +void damon_free_target(struct damon_target *t); +void damon_destroy_target(struct damon_target *t); +unsigned int damon_nr_regions(struct damon_target *t); + +struct damon_ctx *damon_new_ctx(void); +void damon_destroy_ctx(struct damon_ctx *ctx); +int damon_set_targets(struct damon_ctx *ctx, + unsigned long *ids, ssize_t nr_ids); +int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, + unsigned long aggr_int, unsigned long primitive_upd_int, + unsigned long min_nr_reg, unsigned long max_nr_reg); +int damon_set_schemes(struct damon_ctx *ctx, + struct damos **schemes, ssize_t nr_schemes); +int damon_nr_running_ctxs(void); + +int damon_start(struct damon_ctx **ctxs, int nr_ctxs); +int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); + +#endif /* CONFIG_DAMON */ + +#ifdef CONFIG_DAMON_VADDR +bool damon_va_target_valid(void *t); +void damon_va_set_primitives(struct damon_ctx *ctx); +#endif /* CONFIG_DAMON_VADDR */ + +#ifdef CONFIG_DAMON_PADDR +bool damon_pa_target_valid(void *t); +void damon_pa_set_primitives(struct damon_ctx *ctx); +#endif /* CONFIG_DAMON_PADDR */ + +#endif /* _DAMON_H */ diff --git a/include/linux/delay.h b/include/linux/delay.h index 1d0e2ce6b6d9..abecbccab6e4 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -20,6 +20,7 @@ */ #include +#include extern unsigned long loops_per_jiffy; @@ -58,8 +59,15 @@ void calibrate_delay(void); void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); +void usleep_range_state(unsigned long min, unsigned long max, + unsigned int state); void usleep_range(unsigned long min, unsigned long max); +static inline void usleep_idle_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_IDLE); +} + static inline void ssleep(unsigned int seconds) { msleep(seconds * 1000); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a2738944340c..dc636f162194 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -132,7 +132,7 @@ enum pageflags { #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) PG_young, PG_idle, #endif @@ -440,7 +440,7 @@ PAGEFLAG_FALSE(HWPoison) #define __PG_HWPOISON 0 #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) TESTPAGEFLAG(Young, young, PF_ANY) SETPAGEFLAG(Young, young, PF_ANY) TESTCLEARFLAG(Young, young, PF_ANY) diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index cd45c1927d90..8e58a1b73771 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -25,7 +25,7 @@ enum page_ext_flags { /* page migration failed */ PAGE_EXT_PINNER_MIGRATION_FAILED, #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) PAGE_EXT_YOUNG, PAGE_EXT_IDLE, #endif diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 1e894d34bdce..d8a6aecf99cb 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -6,7 +6,7 @@ #include #include -#ifdef CONFIG_IDLE_PAGE_TRACKING +#ifdef CONFIG_PAGE_IDLE_FLAG #ifdef CONFIG_64BIT static inline bool page_is_young(struct page *page) @@ -106,7 +106,7 @@ static inline void clear_page_idle(struct page *page) } #endif /* CONFIG_64BIT */ -#else /* !CONFIG_IDLE_PAGE_TRACKING */ +#else /* !CONFIG_PAGE_IDLE_FLAG */ static inline bool page_is_young(struct page *page) { @@ -135,6 +135,6 @@ static inline void clear_page_idle(struct page *page) { } -#endif /* CONFIG_IDLE_PAGE_TRACKING */ +#endif /* CONFIG_PAGE_IDLE_FLAG */ #endif /* _LINUX_MM_PAGE_IDLE_H */ diff --git a/include/net/esp.h b/include/net/esp.h index 9c5637d41d95..90cd02ff77ef 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -4,6 +4,8 @@ #include +#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER) + struct ip_esp_hdr; static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) diff --git a/include/net/sock.h b/include/net/sock.h index 738981f7b565..eb64ca0f01f3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2691,6 +2691,7 @@ extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; +#define SKB_FRAG_PAGE_ORDER get_order(32768) DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h new file mode 100644 index 000000000000..c79f1d4c39af --- /dev/null +++ b/include/trace/events/damon.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM damon + +#if !defined(_TRACE_DAMON_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DAMON_H + +#include +#include +#include + +TRACE_EVENT(damon_aggregated, + + TP_PROTO(struct damon_target *t, unsigned int target_id, + struct damon_region *r, unsigned int nr_regions), + + TP_ARGS(t, target_id, r, nr_regions), + + TP_STRUCT__entry( + __field(unsigned long, target_id) + __field(unsigned int, nr_regions) + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned int, nr_accesses) + __field(unsigned int, age) + ), + + TP_fast_assign( + __entry->target_id = target_id; + __entry->nr_regions = nr_regions; + __entry->start = r->ar.start; + __entry->end = r->ar.end; + __entry->nr_accesses = r->nr_accesses; + __entry->age = r->age; + ), + + TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u %u", + __entry->target_id, __entry->nr_regions, + __entry->start, __entry->end, + __entry->nr_accesses, __entry->age) +); + +#endif /* _TRACE_DAMON_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 390270e00a1d..d428f0137c49 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -73,7 +73,7 @@ #define IF_HAVE_PG_HWPOISON(flag,string) #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) #define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string} #else #define IF_HAVE_PG_IDLE(flag,string) diff --git a/include/trace/hooks/dtask.h b/include/trace/hooks/dtask.h index 3c49af0e6560..675634238fbd 100644 --- a/include/trace/hooks/dtask.h +++ b/include/trace/hooks/dtask.h @@ -54,6 +54,9 @@ DECLARE_HOOK(android_vh_alter_mutex_list_add, DECLARE_HOOK(android_vh_mutex_unlock_slowpath, TP_PROTO(struct mutex *lock), TP_ARGS(lock)); +DECLARE_HOOK(android_vh_mutex_unlock_slowpath_end, + TP_PROTO(struct mutex *lock, struct task_struct *next), + TP_ARGS(lock, next)); /* macro versions of hooks are no longer required */ diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h index bde2f3556c2a..4af456345aa4 100644 --- a/include/trace/hooks/mm.h +++ b/include/trace/hooks/mm.h @@ -132,6 +132,10 @@ struct device; DECLARE_HOOK(android_vh_subpage_dma_contig_alloc, TP_PROTO(bool *allow_subpage_alloc, struct device *dev, size_t *size), TP_ARGS(allow_subpage_alloc, dev, size)); +struct readahead_control; +DECLARE_HOOK(android_vh_ra_tuning_max_page, + TP_PROTO(struct readahead_control *ractl, unsigned long *max_page), + TP_ARGS(ractl, max_page)); /* macro versions of hooks are no longer required */ #endif /* _TRACE_HOOK_MM_H */ diff --git a/include/trace/hooks/rwsem.h b/include/trace/hooks/rwsem.h index d644a6e21259..c8e445216910 100644 --- a/include/trace/hooks/rwsem.h +++ b/include/trace/hooks/rwsem.h @@ -29,7 +29,21 @@ DECLARE_HOOK(android_vh_alter_rwsem_list_add, DECLARE_HOOK(android_vh_rwsem_wake_finish, TP_PROTO(struct rw_semaphore *sem), TP_ARGS(sem)); - +DECLARE_HOOK(android_vh_rwsem_set_owner, + TP_PROTO(struct rw_semaphore *sem), + TP_ARGS(sem)); +DECLARE_HOOK(android_vh_rwsem_set_reader_owned, + TP_PROTO(struct rw_semaphore *sem), + TP_ARGS(sem)); +DECLARE_HOOK(android_vh_rwsem_up_write_end, + TP_PROTO(struct rw_semaphore *sem), + TP_ARGS(sem)); +DECLARE_HOOK(android_vh_rwsem_up_read_end, + TP_PROTO(struct rw_semaphore *sem), + TP_ARGS(sem)); +DECLARE_HOOK(android_vh_rwsem_mark_wake_readers, + TP_PROTO(struct rw_semaphore *sem, struct rwsem_waiter *waiter), + TP_ARGS(sem, waiter)); /* macro versions of hooks are no longer required */ #endif /* _TRACE_HOOK_RWSEM_H */ diff --git a/include/trace/hooks/signal.h b/include/trace/hooks/signal.h index a0db2e8cf77d..ee36ea1de283 100644 --- a/include/trace/hooks/signal.h +++ b/include/trace/hooks/signal.h @@ -15,6 +15,9 @@ DECLARE_HOOK(android_vh_do_send_sig_info, DECLARE_HOOK(android_vh_process_killed, TP_PROTO(struct task_struct *task, bool *reap), TP_ARGS(task, reap)); +DECLARE_HOOK(android_vh_killed_process, + TP_PROTO(struct task_struct *killer, struct task_struct *dst, bool *reap), + TP_ARGS(killer, dst, reap)); #endif /* _TRACE_HOOK_SIGNAL_H */ /* This part must be outside protection */ #include diff --git a/include/trace/hooks/vmscan.h b/include/trace/hooks/vmscan.h index 48a00c6cede0..7b4d222301af 100644 --- a/include/trace/hooks/vmscan.h +++ b/include/trace/hooks/vmscan.h @@ -31,6 +31,9 @@ DECLARE_HOOK(android_vh_page_referenced_check_bypass, DECLARE_HOOK(android_vh_shrink_node_memcgs, TP_PROTO(struct mem_cgroup *memcg, bool *skip), TP_ARGS(memcg, skip)); +DECLARE_HOOK(android_vh_tune_memcg_scan_type, + TP_PROTO(struct mem_cgroup *memcg, char *scan_type), + TP_ARGS(memcg, scan_type)); #endif /* _TRACE_HOOK_VMSCAN_H */ /* This part must be outside protection */ #include diff --git a/init/Kconfig b/init/Kconfig index d474efce09ed..b69607d62336 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -53,13 +53,13 @@ config LLD_VERSION config CC_CAN_LINK bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag)) + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag)) config CC_CAN_LINK_STATIC bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag) -static) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag) -static) + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static) config CC_HAS_ASM_GOTO def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 016f40c76a83..fac88b1e134e 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1293,6 +1293,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne spin_unlock(&lock->wait_lock); wake_up_q(&wake_q); + trace_android_vh_mutex_unlock_slowpath_end(lock, next); } #ifndef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 30a5f91f99cf..a33e6a36dae9 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -176,6 +176,7 @@ static inline void rwsem_set_owner(struct rw_semaphore *sem) { atomic_long_set(&sem->owner, (long)current); + trace_android_vh_rwsem_set_owner(sem); } static inline void rwsem_clear_owner(struct rw_semaphore *sem) @@ -213,6 +214,7 @@ static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) { __rwsem_set_reader_owned(sem, current); + trace_android_vh_rwsem_set_reader_owned(sem); } /* @@ -496,6 +498,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, woken++; list_move_tail(&waiter->list, &wlist); + trace_android_vh_rwsem_mark_wake_readers(sem, waiter); /* * Limit # of readers that can be woken up per wakeup call. */ @@ -1460,6 +1463,7 @@ static inline void __up_read(struct rw_semaphore *sem) clear_wr_nonspinnable(sem); rwsem_wake(sem, tmp); } + trace_android_vh_rwsem_up_read_end(sem); } /* @@ -1481,6 +1485,7 @@ static inline void __up_write(struct rw_semaphore *sem) tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); if (unlikely(tmp & RWSEM_FLAG_WAITERS)) rwsem_wake(sem, tmp); + trace_android_vh_rwsem_up_write_end(sem); } /* diff --git a/kernel/signal.c b/kernel/signal.c index 7814560f8637..6fff4a9788ac 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1420,6 +1420,7 @@ int group_send_sig_info(int sig, struct kernel_siginfo *info, bool reap = false; trace_android_vh_process_killed(current, &reap); + trace_android_vh_killed_process(current, p, &reap); if (reap) add_to_oom_reaper(p); } diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ed71c4141047..1535065a7d76 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2052,6 +2052,32 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); +/** + * usleep_range_state - Sleep for an approximate time in a given state + * @min: Minimum time in usecs to sleep + * @max: Maximum time in usecs to sleep + * @state: State of the current task that will be while sleeping + * + * In non-atomic context where the exact wakeup time is flexible, use + * usleep_range_state() instead of udelay(). The sleep improves responsiveness + * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces + * power usage by allowing hrtimers to take advantage of an already- + * scheduled interrupt instead of scheduling a new one just for this sleep. + */ +void __sched usleep_range_state(unsigned long min, unsigned long max, + unsigned int state) +{ + ktime_t exp = ktime_add_us(ktime_get(), min); + u64 delta = (u64)(max - min) * NSEC_PER_USEC; + + for (;;) { + __set_current_state(state); + /* Do not return before the requested sleep time has elapsed */ + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) + break; + } +} + /** * usleep_range - Sleep for an approximate time * @min: Minimum time in usecs to sleep @@ -2065,14 +2091,6 @@ EXPORT_SYMBOL(msleep_interruptible); */ void __sched usleep_range(unsigned long min, unsigned long max) { - ktime_t exp = ktime_add_us(ktime_get(), min); - u64 delta = (u64)(max - min) * NSEC_PER_USEC; - - for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); - /* Do not return before the requested sleep time has elapsed */ - if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) - break; - } + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(usleep_range); diff --git a/mm/Kconfig b/mm/Kconfig index 884ba3860ec8..6a020a0d7bc5 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -756,10 +756,18 @@ config DEFERRED_STRUCT_PAGE_INIT lifetime of the system until these kthreads finish the initialisation. +config PAGE_IDLE_FLAG + bool + select PAGE_EXTENSION if !64BIT + help + This adds PG_idle and PG_young flags to 'struct page'. PTE Accessed + bit writers can set the state of the bit in the flags so that PTE + Accessed bit readers may avoid disturbance. + config IDLE_PAGE_TRACKING bool "Enable idle page tracking" depends on SYSFS && MMU - select PAGE_EXTENSION if !64BIT + select PAGE_IDLE_FLAG help This feature allows to estimate the amount of user pages that have not been touched during a given period of time. This information can @@ -888,4 +896,6 @@ config ARCH_HAS_HUGEPD config MAPPING_DIRTY_HELPERS bool +source "mm/damon/Kconfig" + endmenu diff --git a/mm/Makefile b/mm/Makefile index 8de8651da069..b90583a89fb5 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -116,6 +116,7 @@ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o +obj-$(CONFIG_DAMON) += damon/ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_ZONE_DEVICE) += memremap.o diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig new file mode 100644 index 000000000000..5bcf05851ad0 --- /dev/null +++ b/mm/damon/Kconfig @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: GPL-2.0-only + +menu "Data Access Monitoring" + +config DAMON + bool "DAMON: Data Access Monitoring Framework" + help + This builds a framework that allows kernel subsystems to monitor + access frequency of each memory region. The information can be useful + for performance-centric DRAM level memory management. + + See https://damonitor.github.io/doc/html/latest-damon/index.html for + more information. + +config DAMON_KUNIT_TEST + bool "Test for damon" if !KUNIT_ALL_TESTS + depends on DAMON && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_VADDR + bool "Data access monitoring primitives for virtual address spaces" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + help + This builds the default data access monitoring primitives for DAMON + that work for virtual address spaces. + +config DAMON_PADDR + bool "Data access monitoring primitives for the physical address space" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + help + This builds the default data access monitoring primitives for DAMON + that works for the physical address space. + +config DAMON_VADDR_KUNIT_TEST + bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS + depends on DAMON_VADDR && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON virtual addresses primitives Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_DBGFS + bool "DAMON debugfs interface" + depends on DAMON_VADDR && DAMON_PADDR && DEBUG_FS + help + This builds the debugfs interface for DAMON. The user space admins + can use the interface for arbitrary data access monitoring. + + If unsure, say N. + +config DAMON_DBGFS_KUNIT_TEST + bool "Test for damon debugfs interface" if !KUNIT_ALL_TESTS + depends on DAMON_DBGFS && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON debugfs interface Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_RECLAIM + bool "Build DAMON-based reclaim (DAMON_RECLAIM)" + depends on DAMON_PADDR + help + This builds the DAMON-based reclamation subsystem. It finds pages + that not accessed for a long time (cold) using DAMON and reclaim + those. + + This is suggested to be used as a proactive and lightweight + reclamation under light memory pressure, while the traditional page + scanning-based reclamation is used for heavy pressure. + +endmenu diff --git a/mm/damon/Makefile b/mm/damon/Makefile new file mode 100644 index 000000000000..f7d5ac377a2b --- /dev/null +++ b/mm/damon/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DAMON) := core.o +obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o +obj-$(CONFIG_DAMON_PADDR) += prmtv-common.o paddr.o +obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o +obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h new file mode 100644 index 000000000000..7008c3735e99 --- /dev/null +++ b/mm/damon/core-test.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Access Monitor Unit Tests + * + * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * Author: SeongJae Park + */ + +#ifdef CONFIG_DAMON_KUNIT_TEST + +#ifndef _DAMON_CORE_TEST_H +#define _DAMON_CORE_TEST_H + +#include + +static void damon_test_regions(struct kunit *test) +{ + struct damon_region *r; + struct damon_target *t; + + r = damon_new_region(1, 2); + KUNIT_EXPECT_EQ(test, 1ul, r->ar.start); + KUNIT_EXPECT_EQ(test, 2ul, r->ar.end); + KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses); + + t = damon_new_target(42); + KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t)); + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, 1u, damon_nr_regions(t)); + + damon_del_region(r, t); + KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t)); + + damon_free_target(t); +} + +static unsigned int nr_damon_targets(struct damon_ctx *ctx) +{ + struct damon_target *t; + unsigned int nr_targets = 0; + + damon_for_each_target(t, ctx) + nr_targets++; + + return nr_targets; +} + +static void damon_test_target(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + + t = damon_new_target(42); + KUNIT_EXPECT_EQ(test, 42ul, t->id); + KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c)); + + damon_add_target(c, t); + KUNIT_EXPECT_EQ(test, 1u, nr_damon_targets(c)); + + damon_destroy_target(t); + KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c)); + + damon_destroy_ctx(c); +} + +/* + * Test kdamond_reset_aggregated() + * + * DAMON checks access to each region and aggregates this information as the + * access frequency of each region. In detail, it increases '->nr_accesses' of + * regions that an access has confirmed. 'kdamond_reset_aggregated()' flushes + * the aggregated information ('->nr_accesses' of each regions) to the result + * buffer. As a result of the flushing, the '->nr_accesses' of regions are + * initialized to zero. + */ +static void damon_test_aggregate(struct kunit *test) +{ + struct damon_ctx *ctx = damon_new_ctx(); + unsigned long target_ids[] = {1, 2, 3}; + unsigned long saddr[][3] = {{10, 20, 30}, {5, 42, 49}, {13, 33, 55} }; + unsigned long eaddr[][3] = {{15, 27, 40}, {31, 45, 55}, {23, 44, 66} }; + unsigned long accesses[][3] = {{42, 95, 84}, {10, 20, 30}, {0, 1, 2} }; + struct damon_target *t; + struct damon_region *r; + int it, ir; + + damon_set_targets(ctx, target_ids, 3); + + it = 0; + damon_for_each_target(t, ctx) { + for (ir = 0; ir < 3; ir++) { + r = damon_new_region(saddr[it][ir], eaddr[it][ir]); + r->nr_accesses = accesses[it][ir]; + damon_add_region(r, t); + } + it++; + } + kdamond_reset_aggregated(ctx); + it = 0; + damon_for_each_target(t, ctx) { + ir = 0; + /* '->nr_accesses' should be zeroed */ + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses); + ir++; + } + /* regions should be preserved */ + KUNIT_EXPECT_EQ(test, 3, ir); + it++; + } + /* targets also should be preserved */ + KUNIT_EXPECT_EQ(test, 3, it); + + damon_destroy_ctx(ctx); +} + +static void damon_test_split_at(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + struct damon_region *r; + + t = damon_new_target(42); + r = damon_new_region(0, 100); + damon_add_region(r, t); + damon_split_region_at(c, t, r, 25); + KUNIT_EXPECT_EQ(test, r->ar.start, 0ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 25ul); + + r = damon_next_region(r); + KUNIT_EXPECT_EQ(test, r->ar.start, 25ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 100ul); + + damon_free_target(t); + damon_destroy_ctx(c); +} + +static void damon_test_merge_two(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r, *r2, *r3; + int i; + + t = damon_new_target(42); + r = damon_new_region(0, 100); + r->nr_accesses = 10; + damon_add_region(r, t); + r2 = damon_new_region(100, 300); + r2->nr_accesses = 20; + damon_add_region(r2, t); + + damon_merge_two_regions(t, r, r2); + KUNIT_EXPECT_EQ(test, r->ar.start, 0ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 300ul); + KUNIT_EXPECT_EQ(test, r->nr_accesses, 16u); + + i = 0; + damon_for_each_region(r3, t) { + KUNIT_EXPECT_PTR_EQ(test, r, r3); + i++; + } + KUNIT_EXPECT_EQ(test, i, 1); + + damon_free_target(t); +} + +static struct damon_region *__nth_region_of(struct damon_target *t, int idx) +{ + struct damon_region *r; + unsigned int i = 0; + + damon_for_each_region(r, t) { + if (i++ == idx) + return r; + } + + return NULL; +} + +static void damon_test_merge_regions_of(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sa[] = {0, 100, 114, 122, 130, 156, 170, 184}; + unsigned long ea[] = {100, 112, 122, 130, 156, 170, 184, 230}; + unsigned int nrs[] = {0, 0, 10, 10, 20, 30, 1, 2}; + + unsigned long saddrs[] = {0, 114, 130, 156, 170}; + unsigned long eaddrs[] = {112, 130, 156, 170, 230}; + int i; + + t = damon_new_target(42); + for (i = 0; i < ARRAY_SIZE(sa); i++) { + r = damon_new_region(sa[i], ea[i]); + r->nr_accesses = nrs[i]; + damon_add_region(r, t); + } + + damon_merge_regions_of(t, 9, 9999); + /* 0-112, 114-130, 130-156, 156-170 */ + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u); + for (i = 0; i < 5; i++) { + r = __nth_region_of(t, i); + KUNIT_EXPECT_EQ(test, r->ar.start, saddrs[i]); + KUNIT_EXPECT_EQ(test, r->ar.end, eaddrs[i]); + } + damon_free_target(t); +} + +static void damon_test_split_regions_of(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + struct damon_region *r; + + t = damon_new_target(42); + r = damon_new_region(0, 22); + damon_add_region(r, t); + damon_split_regions_of(c, t, 2); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u); + damon_free_target(t); + + t = damon_new_target(42); + r = damon_new_region(0, 220); + damon_add_region(r, t); + damon_split_regions_of(c, t, 4); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u); + damon_free_target(t); + damon_destroy_ctx(c); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_test_target), + KUNIT_CASE(damon_test_regions), + KUNIT_CASE(damon_test_aggregate), + KUNIT_CASE(damon_test_split_at), + KUNIT_CASE(damon_test_merge_two), + KUNIT_CASE(damon_test_merge_regions_of), + KUNIT_CASE(damon_test_split_regions_of), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_CORE_TEST_H */ + +#endif /* CONFIG_DAMON_KUNIT_TEST */ diff --git a/mm/damon/core.c b/mm/damon/core.c new file mode 100644 index 000000000000..1dd153c31c9e --- /dev/null +++ b/mm/damon/core.c @@ -0,0 +1,1075 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Data Access Monitor + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon: " fmt + +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#ifdef CONFIG_DAMON_KUNIT_TEST +#undef DAMON_MIN_REGION +#define DAMON_MIN_REGION 1 +#endif + +static DEFINE_MUTEX(damon_lock); +static int nr_running_ctxs; + +/* + * Construct a damon_region struct + * + * Returns the pointer to the new struct if success, or NULL otherwise + */ +struct damon_region *damon_new_region(unsigned long start, unsigned long end) +{ + struct damon_region *region; + + region = kmalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return NULL; + + region->ar.start = start; + region->ar.end = end; + region->nr_accesses = 0; + INIT_LIST_HEAD(®ion->list); + + region->age = 0; + region->last_nr_accesses = 0; + + return region; +} + +void damon_add_region(struct damon_region *r, struct damon_target *t) +{ + list_add_tail(&r->list, &t->regions_list); + t->nr_regions++; +} + +static void damon_del_region(struct damon_region *r, struct damon_target *t) +{ + list_del(&r->list); + t->nr_regions--; +} + +static void damon_free_region(struct damon_region *r) +{ + kfree(r); +} + +void damon_destroy_region(struct damon_region *r, struct damon_target *t) +{ + damon_del_region(r, t); + damon_free_region(r); +} + +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks) +{ + struct damos *scheme; + + scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); + if (!scheme) + return NULL; + scheme->min_sz_region = min_sz_region; + scheme->max_sz_region = max_sz_region; + scheme->min_nr_accesses = min_nr_accesses; + scheme->max_nr_accesses = max_nr_accesses; + scheme->min_age_region = min_age_region; + scheme->max_age_region = max_age_region; + scheme->action = action; + scheme->stat = (struct damos_stat){}; + INIT_LIST_HEAD(&scheme->list); + + scheme->quota.ms = quota->ms; + scheme->quota.sz = quota->sz; + scheme->quota.reset_interval = quota->reset_interval; + scheme->quota.weight_sz = quota->weight_sz; + scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; + scheme->quota.weight_age = quota->weight_age; + scheme->quota.total_charged_sz = 0; + scheme->quota.total_charged_ns = 0; + scheme->quota.esz = 0; + scheme->quota.charged_sz = 0; + scheme->quota.charged_from = 0; + scheme->quota.charge_target_from = NULL; + scheme->quota.charge_addr_from = 0; + + scheme->wmarks.metric = wmarks->metric; + scheme->wmarks.interval = wmarks->interval; + scheme->wmarks.high = wmarks->high; + scheme->wmarks.mid = wmarks->mid; + scheme->wmarks.low = wmarks->low; + scheme->wmarks.activated = true; + + return scheme; +} + +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) +{ + list_add_tail(&s->list, &ctx->schemes); +} + +static void damon_del_scheme(struct damos *s) +{ + list_del(&s->list); +} + +static void damon_free_scheme(struct damos *s) +{ + kfree(s); +} + +void damon_destroy_scheme(struct damos *s) +{ + damon_del_scheme(s); + damon_free_scheme(s); +} + +/* + * Construct a damon_target struct + * + * Returns the pointer to the new struct if success, or NULL otherwise + */ +struct damon_target *damon_new_target(unsigned long id) +{ + struct damon_target *t; + + t = kmalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return NULL; + + t->id = id; + t->nr_regions = 0; + INIT_LIST_HEAD(&t->regions_list); + + return t; +} + +void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) +{ + list_add_tail(&t->list, &ctx->adaptive_targets); +} + +bool damon_targets_empty(struct damon_ctx *ctx) +{ + return list_empty(&ctx->adaptive_targets); +} + +static void damon_del_target(struct damon_target *t) +{ + list_del(&t->list); +} + +void damon_free_target(struct damon_target *t) +{ + struct damon_region *r, *next; + + damon_for_each_region_safe(r, next, t) + damon_free_region(r); + kfree(t); +} + +void damon_destroy_target(struct damon_target *t) +{ + damon_del_target(t); + damon_free_target(t); +} + +unsigned int damon_nr_regions(struct damon_target *t) +{ + return t->nr_regions; +} + +struct damon_ctx *damon_new_ctx(void) +{ + struct damon_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + ctx->sample_interval = 5 * 1000; + ctx->aggr_interval = 100 * 1000; + ctx->primitive_update_interval = 60 * 1000 * 1000; + + ktime_get_coarse_ts64(&ctx->last_aggregation); + ctx->last_primitive_update = ctx->last_aggregation; + + mutex_init(&ctx->kdamond_lock); + + ctx->min_nr_regions = 10; + ctx->max_nr_regions = 1000; + + INIT_LIST_HEAD(&ctx->adaptive_targets); + INIT_LIST_HEAD(&ctx->schemes); + + return ctx; +} + +static void damon_destroy_targets(struct damon_ctx *ctx) +{ + struct damon_target *t, *next_t; + + if (ctx->primitive.cleanup) { + ctx->primitive.cleanup(ctx); + return; + } + + damon_for_each_target_safe(t, next_t, ctx) + damon_destroy_target(t); +} + +void damon_destroy_ctx(struct damon_ctx *ctx) +{ + struct damos *s, *next_s; + + damon_destroy_targets(ctx); + + damon_for_each_scheme_safe(s, next_s, ctx) + damon_destroy_scheme(s); + + kfree(ctx); +} + +/** + * damon_set_targets() - Set monitoring targets. + * @ctx: monitoring context + * @ids: array of target ids + * @nr_ids: number of entries in @ids + * + * This function should not be called while the kdamond is running. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_set_targets(struct damon_ctx *ctx, + unsigned long *ids, ssize_t nr_ids) +{ + ssize_t i; + struct damon_target *t, *next; + + damon_destroy_targets(ctx); + + for (i = 0; i < nr_ids; i++) { + t = damon_new_target(ids[i]); + if (!t) { + /* The caller should do cleanup of the ids itself */ + damon_for_each_target_safe(t, next, ctx) + damon_destroy_target(t); + return -ENOMEM; + } + damon_add_target(ctx, t); + } + + return 0; +} + +/** + * damon_set_attrs() - Set attributes for the monitoring. + * @ctx: monitoring context + * @sample_int: time interval between samplings + * @aggr_int: time interval between aggregations + * @primitive_upd_int: time interval between monitoring primitive updates + * @min_nr_reg: minimal number of regions + * @max_nr_reg: maximum number of regions + * + * This function should not be called while the kdamond is running. + * Every time interval is in micro-seconds. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, + unsigned long aggr_int, unsigned long primitive_upd_int, + unsigned long min_nr_reg, unsigned long max_nr_reg) +{ + if (min_nr_reg < 3) + return -EINVAL; + if (min_nr_reg > max_nr_reg) + return -EINVAL; + + ctx->sample_interval = sample_int; + ctx->aggr_interval = aggr_int; + ctx->primitive_update_interval = primitive_upd_int; + ctx->min_nr_regions = min_nr_reg; + ctx->max_nr_regions = max_nr_reg; + + return 0; +} + +/** + * damon_set_schemes() - Set data access monitoring based operation schemes. + * @ctx: monitoring context + * @schemes: array of the schemes + * @nr_schemes: number of entries in @schemes + * + * This function should not be called while the kdamond of the context is + * running. + * + * Return: 0 if success, or negative error code otherwise. + */ +int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, + ssize_t nr_schemes) +{ + struct damos *s, *next; + ssize_t i; + + damon_for_each_scheme_safe(s, next, ctx) + damon_destroy_scheme(s); + for (i = 0; i < nr_schemes; i++) + damon_add_scheme(ctx, schemes[i]); + return 0; +} + +/** + * damon_nr_running_ctxs() - Return number of currently running contexts. + */ +int damon_nr_running_ctxs(void) +{ + int nr_ctxs; + + mutex_lock(&damon_lock); + nr_ctxs = nr_running_ctxs; + mutex_unlock(&damon_lock); + + return nr_ctxs; +} + +/* Returns the size upper limit for each monitoring region */ +static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sz = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) + sz += r->ar.end - r->ar.start; + } + + if (ctx->min_nr_regions) + sz /= ctx->min_nr_regions; + if (sz < DAMON_MIN_REGION) + sz = DAMON_MIN_REGION; + + return sz; +} + +static int kdamond_fn(void *data); + +/* + * __damon_start() - Starts monitoring with given context. + * @ctx: monitoring context + * + * This function should be called while damon_lock is hold. + * + * Return: 0 on success, negative error code otherwise. + */ +static int __damon_start(struct damon_ctx *ctx) +{ + int err = -EBUSY; + + mutex_lock(&ctx->kdamond_lock); + if (!ctx->kdamond) { + err = 0; + ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", + nr_running_ctxs); + if (IS_ERR(ctx->kdamond)) { + err = PTR_ERR(ctx->kdamond); + ctx->kdamond = NULL; + } + } + mutex_unlock(&ctx->kdamond_lock); + + return err; +} + +/** + * damon_start() - Starts the monitorings for a given group of contexts. + * @ctxs: an array of the pointers for contexts to start monitoring + * @nr_ctxs: size of @ctxs + * + * This function starts a group of monitoring threads for a group of monitoring + * contexts. One thread per each context is created and run in parallel. The + * caller should handle synchronization between the threads by itself. If a + * group of threads that created by other 'damon_start()' call is currently + * running, this function does nothing but returns -EBUSY. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_start(struct damon_ctx **ctxs, int nr_ctxs) +{ + int i; + int err = 0; + + mutex_lock(&damon_lock); + if (nr_running_ctxs) { + mutex_unlock(&damon_lock); + return -EBUSY; + } + + for (i = 0; i < nr_ctxs; i++) { + err = __damon_start(ctxs[i]); + if (err) + break; + nr_running_ctxs++; + } + mutex_unlock(&damon_lock); + + return err; +} + +/* + * __damon_stop() - Stops monitoring of given context. + * @ctx: monitoring context + * + * Return: 0 on success, negative error code otherwise. + */ +static int __damon_stop(struct damon_ctx *ctx) +{ + struct task_struct *tsk; + + mutex_lock(&ctx->kdamond_lock); + tsk = ctx->kdamond; + if (tsk) { + get_task_struct(tsk); + mutex_unlock(&ctx->kdamond_lock); + kthread_stop(tsk); + put_task_struct(tsk); + return 0; + } + mutex_unlock(&ctx->kdamond_lock); + + return -EPERM; +} + +/** + * damon_stop() - Stops the monitorings for a given group of contexts. + * @ctxs: an array of the pointers for contexts to stop monitoring + * @nr_ctxs: size of @ctxs + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) +{ + int i, err = 0; + + for (i = 0; i < nr_ctxs; i++) { + /* nr_running_ctxs is decremented in kdamond_fn */ + err = __damon_stop(ctxs[i]); + if (err) + return err; + } + + return err; +} + +/* + * damon_check_reset_time_interval() - Check if a time interval is elapsed. + * @baseline: the time to check whether the interval has elapsed since + * @interval: the time interval (microseconds) + * + * See whether the given time interval has passed since the given baseline + * time. If so, it also updates the baseline to current time for next check. + * + * Return: true if the time interval has passed, or false otherwise. + */ +static bool damon_check_reset_time_interval(struct timespec64 *baseline, + unsigned long interval) +{ + struct timespec64 now; + + ktime_get_coarse_ts64(&now); + if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < + interval * 1000) + return false; + *baseline = now; + return true; +} + +/* + * Check whether it is time to flush the aggregated information + */ +static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) +{ + return damon_check_reset_time_interval(&ctx->last_aggregation, + ctx->aggr_interval); +} + +/* + * Reset the aggregated monitoring results ('nr_accesses' of each region). + */ +static void kdamond_reset_aggregated(struct damon_ctx *c) +{ + struct damon_target *t; + unsigned int ti = 0; /* target's index */ + + damon_for_each_target(t, c) { + struct damon_region *r; + + damon_for_each_region(r, t) { + trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); + r->last_nr_accesses = r->nr_accesses; + r->nr_accesses = 0; + } + ti++; + } +} + +static void damon_split_region_at(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + unsigned long sz_r); + +static bool __damos_valid_target(struct damon_region *r, struct damos *s) +{ + unsigned long sz; + + sz = r->ar.end - r->ar.start; + return s->min_sz_region <= sz && sz <= s->max_sz_region && + s->min_nr_accesses <= r->nr_accesses && + r->nr_accesses <= s->max_nr_accesses && + s->min_age_region <= r->age && r->age <= s->max_age_region; +} + +static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + bool ret = __damos_valid_target(r, s); + + if (!ret || !s->quota.esz || !c->primitive.get_scheme_score) + return ret; + + return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score; +} + +static void damon_do_apply_schemes(struct damon_ctx *c, + struct damon_target *t, + struct damon_region *r) +{ + struct damos *s; + + damon_for_each_scheme(s, c) { + struct damos_quota *quota = &s->quota; + unsigned long sz = r->ar.end - r->ar.start; + struct timespec64 begin, end; + unsigned long sz_applied = 0; + + if (!s->wmarks.activated) + continue; + + /* Check the quota */ + if (quota->esz && quota->charged_sz >= quota->esz) + continue; + + /* Skip previously charged regions */ + if (quota->charge_target_from) { + if (t != quota->charge_target_from) + continue; + if (r == damon_last_region(t)) { + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + continue; + } + if (quota->charge_addr_from && + r->ar.end <= quota->charge_addr_from) + continue; + + if (quota->charge_addr_from && r->ar.start < + quota->charge_addr_from) { + sz = ALIGN_DOWN(quota->charge_addr_from - + r->ar.start, DAMON_MIN_REGION); + if (!sz) { + if (r->ar.end - r->ar.start <= + DAMON_MIN_REGION) + continue; + sz = DAMON_MIN_REGION; + } + damon_split_region_at(c, t, r, sz); + r = damon_next_region(r); + sz = r->ar.end - r->ar.start; + } + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + } + + if (!damos_valid_target(c, t, r, s)) + continue; + + /* Apply the scheme */ + if (c->primitive.apply_scheme) { + if (quota->esz && + quota->charged_sz + sz > quota->esz) { + sz = ALIGN_DOWN(quota->esz - quota->charged_sz, + DAMON_MIN_REGION); + if (!sz) + goto update_stat; + damon_split_region_at(c, t, r, sz); + } + ktime_get_coarse_ts64(&begin); + sz_applied = c->primitive.apply_scheme(c, t, r, s); + ktime_get_coarse_ts64(&end); + quota->total_charged_ns += timespec64_to_ns(&end) - + timespec64_to_ns(&begin); + quota->charged_sz += sz; + if (quota->esz && quota->charged_sz >= quota->esz) { + quota->charge_target_from = t; + quota->charge_addr_from = r->ar.end + 1; + } + } + if (s->action != DAMOS_STAT) + r->age = 0; + +update_stat: + s->stat.nr_tried++; + s->stat.sz_tried += sz; + if (sz_applied) + s->stat.nr_applied++; + s->stat.sz_applied += sz_applied; + } +} + +/* Shouldn't be called if quota->ms and quota->sz are zero */ +static void damos_set_effective_quota(struct damos_quota *quota) +{ + unsigned long throughput; + unsigned long esz; + + if (!quota->ms) { + quota->esz = quota->sz; + return; + } + + if (quota->total_charged_ns) + throughput = quota->total_charged_sz * 1000000 / + quota->total_charged_ns; + else + throughput = PAGE_SIZE * 1024; + esz = throughput * quota->ms; + + if (quota->sz && quota->sz < esz) + esz = quota->sz; + quota->esz = esz; +} + +static void kdamond_apply_schemes(struct damon_ctx *c) +{ + struct damon_target *t; + struct damon_region *r, *next_r; + struct damos *s; + + damon_for_each_scheme(s, c) { + struct damos_quota *quota = &s->quota; + unsigned long cumulated_sz; + unsigned int score, max_score = 0; + + if (!s->wmarks.activated) + continue; + + if (!quota->ms && !quota->sz) + continue; + + /* New charge window starts */ + if (time_after_eq(jiffies, quota->charged_from + + msecs_to_jiffies( + quota->reset_interval))) { + if (quota->esz && quota->charged_sz >= quota->esz) + s->stat.qt_exceeds++; + quota->total_charged_sz += quota->charged_sz; + quota->charged_from = jiffies; + quota->charged_sz = 0; + damos_set_effective_quota(quota); + } + + if (!c->primitive.get_scheme_score) + continue; + + /* Fill up the score histogram */ + memset(quota->histogram, 0, sizeof(quota->histogram)); + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + if (!__damos_valid_target(r, s)) + continue; + score = c->primitive.get_scheme_score( + c, t, r, s); + quota->histogram[score] += + r->ar.end - r->ar.start; + if (score > max_score) + max_score = score; + } + } + + /* Set the min score limit */ + for (cumulated_sz = 0, score = max_score; ; score--) { + cumulated_sz += quota->histogram[score]; + if (cumulated_sz >= quota->esz || !score) + break; + } + quota->min_score = score; + } + + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next_r, t) + damon_do_apply_schemes(c, t, r); + } +} + +static inline unsigned long sz_damon_region(struct damon_region *r) +{ + return r->ar.end - r->ar.start; +} + +/* + * Merge two adjacent regions into one region + */ +static void damon_merge_two_regions(struct damon_target *t, + struct damon_region *l, struct damon_region *r) +{ + unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); + + l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / + (sz_l + sz_r); + l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); + l->ar.end = r->ar.end; + damon_destroy_region(r, t); +} + +/* + * Merge adjacent regions having similar access frequencies + * + * t target affected by this merge operation + * thres '->nr_accesses' diff threshold for the merge + * sz_limit size upper limit of each region + */ +static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, + unsigned long sz_limit) +{ + struct damon_region *r, *prev = NULL, *next; + + damon_for_each_region_safe(r, next, t) { + if (abs(r->nr_accesses - r->last_nr_accesses) > thres) + r->age = 0; + else + r->age++; + + if (prev && prev->ar.end == r->ar.start && + abs(prev->nr_accesses - r->nr_accesses) <= thres && + sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) + damon_merge_two_regions(t, prev, r); + else + prev = r; + } +} + +/* + * Merge adjacent regions having similar access frequencies + * + * threshold '->nr_accesses' diff threshold for the merge + * sz_limit size upper limit of each region + * + * This function merges monitoring target regions which are adjacent and their + * access frequencies are similar. This is for minimizing the monitoring + * overhead under the dynamically changeable access pattern. If a merge was + * unnecessarily made, later 'kdamond_split_regions()' will revert it. + */ +static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, + unsigned long sz_limit) +{ + struct damon_target *t; + + damon_for_each_target(t, c) + damon_merge_regions_of(t, threshold, sz_limit); +} + +/* + * Split a region in two + * + * r the region to be split + * sz_r size of the first sub-region that will be made + */ +static void damon_split_region_at(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + unsigned long sz_r) +{ + struct damon_region *new; + + new = damon_new_region(r->ar.start + sz_r, r->ar.end); + if (!new) + return; + + r->ar.end = new->ar.start; + + new->age = r->age; + new->last_nr_accesses = r->last_nr_accesses; + + damon_insert_region(new, r, damon_next_region(r), t); +} + +/* Split every region in the given target into 'nr_subs' regions */ +static void damon_split_regions_of(struct damon_ctx *ctx, + struct damon_target *t, int nr_subs) +{ + struct damon_region *r, *next; + unsigned long sz_region, sz_sub = 0; + int i; + + damon_for_each_region_safe(r, next, t) { + sz_region = r->ar.end - r->ar.start; + + for (i = 0; i < nr_subs - 1 && + sz_region > 2 * DAMON_MIN_REGION; i++) { + /* + * Randomly select size of left sub-region to be at + * least 10 percent and at most 90% of original region + */ + sz_sub = ALIGN_DOWN(damon_rand(1, 10) * + sz_region / 10, DAMON_MIN_REGION); + /* Do not allow blank region */ + if (sz_sub == 0 || sz_sub >= sz_region) + continue; + + damon_split_region_at(ctx, t, r, sz_sub); + sz_region = sz_sub; + } + } +} + +/* + * Split every target region into randomly-sized small regions + * + * This function splits every target region into random-sized small regions if + * current total number of the regions is equal or smaller than half of the + * user-specified maximum number of regions. This is for maximizing the + * monitoring accuracy under the dynamically changeable access patterns. If a + * split was unnecessarily made, later 'kdamond_merge_regions()' will revert + * it. + */ +static void kdamond_split_regions(struct damon_ctx *ctx) +{ + struct damon_target *t; + unsigned int nr_regions = 0; + static unsigned int last_nr_regions; + int nr_subregions = 2; + + damon_for_each_target(t, ctx) + nr_regions += damon_nr_regions(t); + + if (nr_regions > ctx->max_nr_regions / 2) + return; + + /* Maybe the middle of the region has different access frequency */ + if (last_nr_regions == nr_regions && + nr_regions < ctx->max_nr_regions / 3) + nr_subregions = 3; + + damon_for_each_target(t, ctx) + damon_split_regions_of(ctx, t, nr_subregions); + + last_nr_regions = nr_regions; +} + +/* + * Check whether it is time to check and apply the target monitoring regions + * + * Returns true if it is. + */ +static bool kdamond_need_update_primitive(struct damon_ctx *ctx) +{ + return damon_check_reset_time_interval(&ctx->last_primitive_update, + ctx->primitive_update_interval); +} + +/* + * Check whether current monitoring should be stopped + * + * The monitoring is stopped when either the user requested to stop, or all + * monitoring targets are invalid. + * + * Returns true if need to stop current monitoring. + */ +static bool kdamond_need_stop(struct damon_ctx *ctx) +{ + struct damon_target *t; + + if (kthread_should_stop()) + return true; + + if (!ctx->primitive.target_valid) + return false; + + damon_for_each_target(t, ctx) { + if (ctx->primitive.target_valid(t)) + return false; + } + + return true; +} + +static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) +{ + struct sysinfo i; + + switch (metric) { + case DAMOS_WMARK_FREE_MEM_RATE: + si_meminfo(&i); + return i.freeram * 1000 / i.totalram; + default: + break; + } + return -EINVAL; +} + +/* + * Returns zero if the scheme is active. Else, returns time to wait for next + * watermark check in micro-seconds. + */ +static unsigned long damos_wmark_wait_us(struct damos *scheme) +{ + unsigned long metric; + + if (scheme->wmarks.metric == DAMOS_WMARK_NONE) + return 0; + + metric = damos_wmark_metric_value(scheme->wmarks.metric); + /* higher than high watermark or lower than low watermark */ + if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { + if (scheme->wmarks.activated) + pr_debug("deactivate a scheme (%d) for %s wmark\n", + scheme->action, + metric > scheme->wmarks.high ? + "high" : "low"); + scheme->wmarks.activated = false; + return scheme->wmarks.interval; + } + + /* inactive and higher than middle watermark */ + if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && + !scheme->wmarks.activated) + return scheme->wmarks.interval; + + if (!scheme->wmarks.activated) + pr_debug("activate a scheme (%d)\n", scheme->action); + scheme->wmarks.activated = true; + return 0; +} + +static void kdamond_usleep(unsigned long usecs) +{ + /* See Documentation/timers/timers-howto.rst for the thresholds */ + if (usecs > 20 * USEC_PER_MSEC) + schedule_timeout_idle(usecs_to_jiffies(usecs)); + else + usleep_idle_range(usecs, usecs + 1); +} + +/* Returns negative error code if it's not activated but should return */ +static int kdamond_wait_activation(struct damon_ctx *ctx) +{ + struct damos *s; + unsigned long wait_time; + unsigned long min_wait_time = 0; + + while (!kdamond_need_stop(ctx)) { + damon_for_each_scheme(s, ctx) { + wait_time = damos_wmark_wait_us(s); + if (!min_wait_time || wait_time < min_wait_time) + min_wait_time = wait_time; + } + if (!min_wait_time) + return 0; + + kdamond_usleep(min_wait_time); + } + return -EBUSY; +} + +/* + * The monitoring daemon that runs as a kernel thread + */ +static int kdamond_fn(void *data) +{ + struct damon_ctx *ctx = (struct damon_ctx *)data; + struct damon_target *t; + struct damon_region *r, *next; + unsigned int max_nr_accesses = 0; + unsigned long sz_limit = 0; + bool done = false; + + pr_debug("kdamond (%d) starts\n", current->pid); + + if (ctx->primitive.init) + ctx->primitive.init(ctx); + if (ctx->callback.before_start && ctx->callback.before_start(ctx)) + done = true; + + sz_limit = damon_region_sz_limit(ctx); + + while (!kdamond_need_stop(ctx) && !done) { + if (kdamond_wait_activation(ctx)) + continue; + + if (ctx->primitive.prepare_access_checks) + ctx->primitive.prepare_access_checks(ctx); + if (ctx->callback.after_sampling && + ctx->callback.after_sampling(ctx)) + done = true; + + kdamond_usleep(ctx->sample_interval); + + if (ctx->primitive.check_accesses) + max_nr_accesses = ctx->primitive.check_accesses(ctx); + + if (kdamond_aggregate_interval_passed(ctx)) { + kdamond_merge_regions(ctx, + max_nr_accesses / 10, + sz_limit); + if (ctx->callback.after_aggregation && + ctx->callback.after_aggregation(ctx)) + done = true; + kdamond_apply_schemes(ctx); + kdamond_reset_aggregated(ctx); + kdamond_split_regions(ctx); + if (ctx->primitive.reset_aggregated) + ctx->primitive.reset_aggregated(ctx); + } + + if (kdamond_need_update_primitive(ctx)) { + if (ctx->primitive.update) + ctx->primitive.update(ctx); + sz_limit = damon_region_sz_limit(ctx); + } + } + damon_for_each_target(t, ctx) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + + if (ctx->callback.before_terminate) + ctx->callback.before_terminate(ctx); + if (ctx->primitive.cleanup) + ctx->primitive.cleanup(ctx); + + pr_debug("kdamond (%d) finishes\n", current->pid); + mutex_lock(&ctx->kdamond_lock); + ctx->kdamond = NULL; + mutex_unlock(&ctx->kdamond_lock); + + mutex_lock(&damon_lock); + nr_running_ctxs--; + mutex_unlock(&damon_lock); + + return 0; +} + +#include "core-test.h" diff --git a/mm/damon/dbgfs-test.h b/mm/damon/dbgfs-test.h new file mode 100644 index 000000000000..86b9f9528231 --- /dev/null +++ b/mm/damon/dbgfs-test.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DAMON Debugfs Interface Unit Tests + * + * Author: SeongJae Park + */ + +#ifdef CONFIG_DAMON_DBGFS_KUNIT_TEST + +#ifndef _DAMON_DBGFS_TEST_H +#define _DAMON_DBGFS_TEST_H + +#include + +static void damon_dbgfs_test_str_to_target_ids(struct kunit *test) +{ + char *question; + unsigned long *answers; + unsigned long expected[] = {12, 35, 46}; + ssize_t nr_integers = 0, i; + + question = "123"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers); + KUNIT_EXPECT_EQ(test, 123ul, answers[0]); + kfree(answers); + + question = "123abc"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers); + KUNIT_EXPECT_EQ(test, 123ul, answers[0]); + kfree(answers); + + question = "a123"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); + kfree(answers); + + question = "12 35"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers); + for (i = 0; i < nr_integers; i++) + KUNIT_EXPECT_EQ(test, expected[i], answers[i]); + kfree(answers); + + question = "12 35 46"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers); + for (i = 0; i < nr_integers; i++) + KUNIT_EXPECT_EQ(test, expected[i], answers[i]); + kfree(answers); + + question = "12 35 abc 46"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers); + for (i = 0; i < 2; i++) + KUNIT_EXPECT_EQ(test, expected[i], answers[i]); + kfree(answers); + + question = ""; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); + kfree(answers); + + question = "\n"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); + kfree(answers); +} + +static void damon_dbgfs_test_set_targets(struct kunit *test) +{ + struct damon_ctx *ctx = dbgfs_new_ctx(); + unsigned long ids[] = {1, 2, 3}; + char buf[64]; + + /* Make DAMON consider target id as plain number */ + ctx->primitive.target_valid = NULL; + ctx->primitive.cleanup = NULL; + + damon_set_targets(ctx, ids, 3); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2 3\n"); + + damon_set_targets(ctx, NULL, 0); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "\n"); + + damon_set_targets(ctx, (unsigned long []){1, 2}, 2); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2\n"); + + damon_set_targets(ctx, (unsigned long []){2}, 1); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "2\n"); + + damon_set_targets(ctx, NULL, 0); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "\n"); + + dbgfs_destroy_ctx(ctx); +} + +static void damon_dbgfs_test_set_init_regions(struct kunit *test) +{ + struct damon_ctx *ctx = damon_new_ctx(); + unsigned long ids[] = {1, 2, 3}; + /* Each line represents one region in `` `` */ + char * const valid_inputs[] = {"2 10 20\n 2 20 30\n2 35 45", + "2 10 20\n", + "2 10 20\n1 39 59\n1 70 134\n 2 20 25\n", + ""}; + /* Reading the file again will show sorted, clean output */ + char * const valid_expects[] = {"2 10 20\n2 20 30\n2 35 45\n", + "2 10 20\n", + "1 39 59\n1 70 134\n2 10 20\n2 20 25\n", + ""}; + char * const invalid_inputs[] = {"4 10 20\n", /* target not exists */ + "2 10 20\n 2 14 26\n", /* regions overlap */ + "1 10 20\n2 30 40\n 1 5 8"}; /* not sorted by address */ + char *input, *expect; + int i, rc; + char buf[256]; + + damon_set_targets(ctx, ids, 3); + + /* Put valid inputs and check the results */ + for (i = 0; i < ARRAY_SIZE(valid_inputs); i++) { + input = valid_inputs[i]; + expect = valid_expects[i]; + + rc = set_init_regions(ctx, input, strnlen(input, 256)); + KUNIT_EXPECT_EQ(test, rc, 0); + + memset(buf, 0, 256); + sprint_init_regions(ctx, buf, 256); + + KUNIT_EXPECT_STREQ(test, (char *)buf, expect); + } + /* Put invalid inputs and check the return error code */ + for (i = 0; i < ARRAY_SIZE(invalid_inputs); i++) { + input = invalid_inputs[i]; + pr_info("input: %s\n", input); + rc = set_init_regions(ctx, input, strnlen(input, 256)); + KUNIT_EXPECT_EQ(test, rc, -EINVAL); + + memset(buf, 0, 256); + sprint_init_regions(ctx, buf, 256); + + KUNIT_EXPECT_STREQ(test, (char *)buf, ""); + } + + damon_set_targets(ctx, NULL, 0); + damon_destroy_ctx(ctx); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_dbgfs_test_str_to_target_ids), + KUNIT_CASE(damon_dbgfs_test_set_targets), + KUNIT_CASE(damon_dbgfs_test_set_init_regions), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon-dbgfs", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_TEST_H */ + +#endif /* CONFIG_DAMON_KUNIT_TEST */ diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c new file mode 100644 index 000000000000..5b899601e56c --- /dev/null +++ b/mm/damon/dbgfs.c @@ -0,0 +1,990 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Debugfs Interface + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-dbgfs: " fmt + +#include +#include +#include +#include +#include +#include +#include + +static struct damon_ctx **dbgfs_ctxs; +static int dbgfs_nr_ctxs; +static struct dentry **dbgfs_dirs; +static DEFINE_MUTEX(damon_dbgfs_lock); + +/* + * Returns non-empty string on success, negative error code otherwise. + */ +static char *user_input_str(const char __user *buf, size_t count, loff_t *ppos) +{ + char *kbuf; + ssize_t ret; + + /* We do not accept continuous write */ + if (*ppos) + return ERR_PTR(-EINVAL); + + kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return ERR_PTR(-ENOMEM); + + ret = simple_write_to_buffer(kbuf, count + 1, ppos, buf, count); + if (ret != count) { + kfree(kbuf); + return ERR_PTR(-EIO); + } + kbuf[ret] = '\0'; + + return kbuf; +} + +static ssize_t dbgfs_attrs_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char kbuf[128]; + int ret; + + mutex_lock(&ctx->kdamond_lock); + ret = scnprintf(kbuf, ARRAY_SIZE(kbuf), "%lu %lu %lu %lu %lu\n", + ctx->sample_interval, ctx->aggr_interval, + ctx->primitive_update_interval, ctx->min_nr_regions, + ctx->max_nr_regions); + mutex_unlock(&ctx->kdamond_lock); + + return simple_read_from_buffer(buf, count, ppos, kbuf, ret); +} + +static ssize_t dbgfs_attrs_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + unsigned long s, a, r, minr, maxr; + char *kbuf; + ssize_t ret; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + if (sscanf(kbuf, "%lu %lu %lu %lu %lu", + &s, &a, &r, &minr, &maxr) != 5) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + ret = damon_set_attrs(ctx, s, a, r, minr, maxr); + if (!ret) + ret = count; +unlock_out: + mutex_unlock(&ctx->kdamond_lock); +out: + kfree(kbuf); + return ret; +} + +static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) +{ + struct damos *s; + int written = 0; + int rc; + + damon_for_each_scheme(s, c) { + rc = scnprintf(&buf[written], len - written, + "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + s->min_sz_region, s->max_sz_region, + s->min_nr_accesses, s->max_nr_accesses, + s->min_age_region, s->max_age_region, + s->action, + s->quota.ms, s->quota.sz, + s->quota.reset_interval, + s->quota.weight_sz, + s->quota.weight_nr_accesses, + s->quota.weight_age, + s->wmarks.metric, s->wmarks.interval, + s->wmarks.high, s->wmarks.mid, s->wmarks.low, + s->stat.nr_tried, s->stat.sz_tried, + s->stat.nr_applied, s->stat.sz_applied, + s->stat.qt_exceeds); + if (!rc) + return -ENOMEM; + + written += rc; + } + return written; +} + +static ssize_t dbgfs_schemes_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + len = sprint_schemes(ctx, kbuf, count); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static void free_schemes_arr(struct damos **schemes, ssize_t nr_schemes) +{ + ssize_t i; + + for (i = 0; i < nr_schemes; i++) + kfree(schemes[i]); + kfree(schemes); +} + +static bool damos_action_valid(int action) +{ + switch (action) { + case DAMOS_WILLNEED: + case DAMOS_COLD: + case DAMOS_PAGEOUT: + case DAMOS_HUGEPAGE: + case DAMOS_NOHUGEPAGE: + case DAMOS_STAT: + return true; + default: + return false; + } +} + +/* + * Converts a string into an array of struct damos pointers + * + * Returns an array of struct damos pointers that converted if the conversion + * success, or NULL otherwise. + */ +static struct damos **str_to_schemes(const char *str, ssize_t len, + ssize_t *nr_schemes) +{ + struct damos *scheme, **schemes; + const int max_nr_schemes = 256; + int pos = 0, parsed, ret; + unsigned long min_sz, max_sz; + unsigned int min_nr_a, max_nr_a, min_age, max_age; + unsigned int action; + + schemes = kmalloc_array(max_nr_schemes, sizeof(scheme), + GFP_KERNEL); + if (!schemes) + return NULL; + + *nr_schemes = 0; + while (pos < len && *nr_schemes < max_nr_schemes) { + struct damos_quota quota = {}; + struct damos_watermarks wmarks; + + ret = sscanf(&str[pos], + "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u %u %lu %lu %lu %lu%n", + &min_sz, &max_sz, &min_nr_a, &max_nr_a, + &min_age, &max_age, &action, "a.ms, + "a.sz, "a.reset_interval, + "a.weight_sz, "a.weight_nr_accesses, + "a.weight_age, &wmarks.metric, + &wmarks.interval, &wmarks.high, &wmarks.mid, + &wmarks.low, &parsed); + if (ret != 18) + break; + if (!damos_action_valid(action)) + goto fail; + + if (min_sz > max_sz || min_nr_a > max_nr_a || min_age > max_age) + goto fail; + + if (wmarks.high < wmarks.mid || wmarks.high < wmarks.low || + wmarks.mid < wmarks.low) + goto fail; + + pos += parsed; + scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a, + min_age, max_age, action, "a, &wmarks); + if (!scheme) + goto fail; + + schemes[*nr_schemes] = scheme; + *nr_schemes += 1; + } + return schemes; +fail: + free_schemes_arr(schemes, *nr_schemes); + return NULL; +} + +static ssize_t dbgfs_schemes_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + struct damos **schemes; + ssize_t nr_schemes = 0, ret; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + schemes = str_to_schemes(kbuf, count, &nr_schemes); + if (!schemes) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + ret = damon_set_schemes(ctx, schemes, nr_schemes); + if (!ret) { + ret = count; + nr_schemes = 0; + } + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); + free_schemes_arr(schemes, nr_schemes); +out: + kfree(kbuf); + return ret; +} + +static inline bool targetid_is_pid(const struct damon_ctx *ctx) +{ + return ctx->primitive.target_valid == damon_va_target_valid; +} + +static ssize_t sprint_target_ids(struct damon_ctx *ctx, char *buf, ssize_t len) +{ + struct damon_target *t; + unsigned long id; + int written = 0; + int rc; + + damon_for_each_target(t, ctx) { + id = t->id; + if (targetid_is_pid(ctx)) + /* Show pid numbers to debugfs users */ + id = (unsigned long)pid_vnr((struct pid *)id); + + rc = scnprintf(&buf[written], len - written, "%lu ", id); + if (!rc) + return -ENOMEM; + written += rc; + } + if (written) + written -= 1; + written += scnprintf(&buf[written], len - written, "\n"); + return written; +} + +static ssize_t dbgfs_target_ids_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + ssize_t len; + char ids_buf[320]; + + mutex_lock(&ctx->kdamond_lock); + len = sprint_target_ids(ctx, ids_buf, 320); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + return len; + + return simple_read_from_buffer(buf, count, ppos, ids_buf, len); +} + +/* + * Converts a string into an array of unsigned long integers + * + * Returns an array of unsigned long integers if the conversion success, or + * NULL otherwise. + */ +static unsigned long *str_to_target_ids(const char *str, ssize_t len, + ssize_t *nr_ids) +{ + unsigned long *ids; + const int max_nr_ids = 32; + unsigned long id; + int pos = 0, parsed, ret; + + *nr_ids = 0; + ids = kmalloc_array(max_nr_ids, sizeof(id), GFP_KERNEL); + if (!ids) + return NULL; + while (*nr_ids < max_nr_ids && pos < len) { + ret = sscanf(&str[pos], "%lu%n", &id, &parsed); + pos += parsed; + if (ret != 1) + break; + ids[*nr_ids] = id; + *nr_ids += 1; + } + + return ids; +} + +static void dbgfs_put_pids(unsigned long *ids, int nr_ids) +{ + int i; + + for (i = 0; i < nr_ids; i++) + put_pid((struct pid *)ids[i]); +} + +static ssize_t dbgfs_target_ids_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + struct damon_target *t, *next_t; + bool id_is_pid = true; + char *kbuf; + unsigned long *targets; + ssize_t nr_targets; + ssize_t ret; + int i; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + if (!strncmp(kbuf, "paddr\n", count)) { + id_is_pid = false; + /* target id is meaningless here, but we set it just for fun */ + scnprintf(kbuf, count, "42 "); + } + + targets = str_to_target_ids(kbuf, count, &nr_targets); + if (!targets) { + ret = -ENOMEM; + goto out; + } + + if (id_is_pid) { + for (i = 0; i < nr_targets; i++) { + targets[i] = (unsigned long)find_get_pid( + (int)targets[i]); + if (!targets[i]) { + dbgfs_put_pids(targets, i); + ret = -EINVAL; + goto free_targets_out; + } + } + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + if (id_is_pid) + dbgfs_put_pids(targets, nr_targets); + ret = -EBUSY; + goto unlock_out; + } + + /* remove previously set targets */ + damon_for_each_target_safe(t, next_t, ctx) { + if (targetid_is_pid(ctx)) + put_pid((struct pid *)t->id); + damon_destroy_target(t); + } + + /* Configure the context for the address space type */ + if (id_is_pid) + damon_va_set_primitives(ctx); + else + damon_pa_set_primitives(ctx); + + ret = damon_set_targets(ctx, targets, nr_targets); + if (ret) { + if (id_is_pid) + dbgfs_put_pids(targets, nr_targets); + } else { + ret = count; + } + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); +free_targets_out: + kfree(targets); +out: + kfree(kbuf); + return ret; +} + +static ssize_t sprint_init_regions(struct damon_ctx *c, char *buf, ssize_t len) +{ + struct damon_target *t; + struct damon_region *r; + int written = 0; + int rc; + + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + rc = scnprintf(&buf[written], len - written, + "%lu %lu %lu\n", + t->id, r->ar.start, r->ar.end); + if (!rc) + return -ENOMEM; + written += rc; + } + } + return written; +} + +static ssize_t dbgfs_init_regions_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + mutex_unlock(&ctx->kdamond_lock); + len = -EBUSY; + goto out; + } + + len = sprint_init_regions(ctx, kbuf, count); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static int add_init_region(struct damon_ctx *c, + unsigned long target_id, struct damon_addr_range *ar) +{ + struct damon_target *t; + struct damon_region *r, *prev; + unsigned long id; + int rc = -EINVAL; + + if (ar->start >= ar->end) + return -EINVAL; + + damon_for_each_target(t, c) { + id = t->id; + if (targetid_is_pid(c)) + id = (unsigned long)pid_vnr((struct pid *)id); + if (id == target_id) { + r = damon_new_region(ar->start, ar->end); + if (!r) + return -ENOMEM; + damon_add_region(r, t); + if (damon_nr_regions(t) > 1) { + prev = damon_prev_region(r); + if (prev->ar.end > r->ar.start) { + damon_destroy_region(r, t); + return -EINVAL; + } + } + rc = 0; + } + } + return rc; +} + +static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len) +{ + struct damon_target *t; + struct damon_region *r, *next; + int pos = 0, parsed, ret; + unsigned long target_id; + struct damon_addr_range ar; + int err; + + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + + while (pos < len) { + ret = sscanf(&str[pos], "%lu %lu %lu%n", + &target_id, &ar.start, &ar.end, &parsed); + if (ret != 3) + break; + err = add_init_region(c, target_id, &ar); + if (err) + goto fail; + pos += parsed; + } + + return 0; + +fail: + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + return err; +} + +static ssize_t dbgfs_init_regions_write(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t ret = count; + int err; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + err = set_init_regions(ctx, kbuf, ret); + if (err) + ret = err; + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); + kfree(kbuf); + return ret; +} + +static ssize_t dbgfs_kdamond_pid_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) + len = scnprintf(kbuf, count, "%d\n", ctx->kdamond->pid); + else + len = scnprintf(kbuf, count, "none\n"); + mutex_unlock(&ctx->kdamond_lock); + if (!len) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static int damon_dbgfs_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + + return nonseekable_open(inode, file); +} + +static const struct file_operations attrs_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_attrs_read, + .write = dbgfs_attrs_write, +}; + +static const struct file_operations schemes_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_schemes_read, + .write = dbgfs_schemes_write, +}; + +static const struct file_operations target_ids_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_target_ids_read, + .write = dbgfs_target_ids_write, +}; + +static const struct file_operations init_regions_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_init_regions_read, + .write = dbgfs_init_regions_write, +}; + +static const struct file_operations kdamond_pid_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_kdamond_pid_read, +}; + +static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx) +{ + const char * const file_names[] = {"attrs", "schemes", "target_ids", + "init_regions", "kdamond_pid"}; + const struct file_operations *fops[] = {&attrs_fops, &schemes_fops, + &target_ids_fops, &init_regions_fops, &kdamond_pid_fops}; + int i; + + for (i = 0; i < ARRAY_SIZE(file_names); i++) + debugfs_create_file(file_names[i], 0600, dir, ctx, fops[i]); +} + +static void dbgfs_before_terminate(struct damon_ctx *ctx) +{ + struct damon_target *t, *next; + + if (!targetid_is_pid(ctx)) + return; + + mutex_lock(&ctx->kdamond_lock); + damon_for_each_target_safe(t, next, ctx) { + put_pid((struct pid *)t->id); + damon_destroy_target(t); + } + mutex_unlock(&ctx->kdamond_lock); +} + +static struct damon_ctx *dbgfs_new_ctx(void) +{ + struct damon_ctx *ctx; + + ctx = damon_new_ctx(); + if (!ctx) + return NULL; + + damon_va_set_primitives(ctx); + ctx->callback.before_terminate = dbgfs_before_terminate; + return ctx; +} + +static void dbgfs_destroy_ctx(struct damon_ctx *ctx) +{ + damon_destroy_ctx(ctx); +} + +/* + * Make a context of @name and create a debugfs directory for it. + * + * This function should be called while holding damon_dbgfs_lock. + * + * Returns 0 on success, negative error code otherwise. + */ +static int dbgfs_mk_context(char *name) +{ + struct dentry *root, **new_dirs, *new_dir; + struct damon_ctx **new_ctxs, *new_ctx; + + if (damon_nr_running_ctxs()) + return -EBUSY; + + new_ctxs = krealloc(dbgfs_ctxs, sizeof(*dbgfs_ctxs) * + (dbgfs_nr_ctxs + 1), GFP_KERNEL); + if (!new_ctxs) + return -ENOMEM; + dbgfs_ctxs = new_ctxs; + + new_dirs = krealloc(dbgfs_dirs, sizeof(*dbgfs_dirs) * + (dbgfs_nr_ctxs + 1), GFP_KERNEL); + if (!new_dirs) + return -ENOMEM; + dbgfs_dirs = new_dirs; + + root = dbgfs_dirs[0]; + if (!root) + return -ENOENT; + + new_dir = debugfs_create_dir(name, root); + dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; + + new_ctx = dbgfs_new_ctx(); + if (!new_ctx) { + debugfs_remove(new_dir); + dbgfs_dirs[dbgfs_nr_ctxs] = NULL; + return -ENOMEM; + } + + dbgfs_ctxs[dbgfs_nr_ctxs] = new_ctx; + dbgfs_fill_ctx_dir(dbgfs_dirs[dbgfs_nr_ctxs], + dbgfs_ctxs[dbgfs_nr_ctxs]); + dbgfs_nr_ctxs++; + + return 0; +} + +static ssize_t dbgfs_mk_context_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + char *kbuf; + char *ctx_name; + ssize_t ret; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + ctx_name = kmalloc(count + 1, GFP_KERNEL); + if (!ctx_name) { + kfree(kbuf); + return -ENOMEM; + } + + /* Trim white space */ + if (sscanf(kbuf, "%s", ctx_name) != 1) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&damon_dbgfs_lock); + ret = dbgfs_mk_context(ctx_name); + if (!ret) + ret = count; + mutex_unlock(&damon_dbgfs_lock); + +out: + kfree(kbuf); + kfree(ctx_name); + return ret; +} + +/* + * Remove a context of @name and its debugfs directory. + * + * This function should be called while holding damon_dbgfs_lock. + * + * Return 0 on success, negative error code otherwise. + */ +static int dbgfs_rm_context(char *name) +{ + struct dentry *root, *dir, **new_dirs; + struct damon_ctx **new_ctxs; + int i, j; + + if (damon_nr_running_ctxs()) + return -EBUSY; + + root = dbgfs_dirs[0]; + if (!root) + return -ENOENT; + + dir = debugfs_lookup(name, root); + if (!dir) + return -ENOENT; + + new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs), + GFP_KERNEL); + if (!new_dirs) + return -ENOMEM; + + new_ctxs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_ctxs), + GFP_KERNEL); + if (!new_ctxs) { + kfree(new_dirs); + return -ENOMEM; + } + + for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) { + if (dbgfs_dirs[i] == dir) { + debugfs_remove(dbgfs_dirs[i]); + dbgfs_destroy_ctx(dbgfs_ctxs[i]); + continue; + } + new_dirs[j] = dbgfs_dirs[i]; + new_ctxs[j++] = dbgfs_ctxs[i]; + } + + kfree(dbgfs_dirs); + kfree(dbgfs_ctxs); + + dbgfs_dirs = new_dirs; + dbgfs_ctxs = new_ctxs; + dbgfs_nr_ctxs--; + + return 0; +} + +static ssize_t dbgfs_rm_context_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + char *kbuf; + ssize_t ret; + char *ctx_name; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + ctx_name = kmalloc(count + 1, GFP_KERNEL); + if (!ctx_name) { + kfree(kbuf); + return -ENOMEM; + } + + /* Trim white space */ + if (sscanf(kbuf, "%s", ctx_name) != 1) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&damon_dbgfs_lock); + ret = dbgfs_rm_context(ctx_name); + if (!ret) + ret = count; + mutex_unlock(&damon_dbgfs_lock); + +out: + kfree(kbuf); + kfree(ctx_name); + return ret; +} + +static ssize_t dbgfs_monitor_on_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + char monitor_on_buf[5]; + bool monitor_on = damon_nr_running_ctxs() != 0; + int len; + + len = scnprintf(monitor_on_buf, 5, monitor_on ? "on\n" : "off\n"); + + return simple_read_from_buffer(buf, count, ppos, monitor_on_buf, len); +} + +static ssize_t dbgfs_monitor_on_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + ssize_t ret; + char *kbuf; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + /* Remove white space */ + if (sscanf(kbuf, "%s", kbuf) != 1) { + kfree(kbuf); + return -EINVAL; + } + + mutex_lock(&damon_dbgfs_lock); + if (!strncmp(kbuf, "on", count)) { + int i; + + for (i = 0; i < dbgfs_nr_ctxs; i++) { + if (damon_targets_empty(dbgfs_ctxs[i])) { + kfree(kbuf); + mutex_unlock(&damon_dbgfs_lock); + return -EINVAL; + } + } + ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs); + } else if (!strncmp(kbuf, "off", count)) { + ret = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs); + } else { + ret = -EINVAL; + } + mutex_unlock(&damon_dbgfs_lock); + + if (!ret) + ret = count; + kfree(kbuf); + return ret; +} + +static const struct file_operations mk_contexts_fops = { + .write = dbgfs_mk_context_write, +}; + +static const struct file_operations rm_contexts_fops = { + .write = dbgfs_rm_context_write, +}; + +static const struct file_operations monitor_on_fops = { + .read = dbgfs_monitor_on_read, + .write = dbgfs_monitor_on_write, +}; + +static int __init __damon_dbgfs_init(void) +{ + struct dentry *dbgfs_root; + const char * const file_names[] = {"mk_contexts", "rm_contexts", + "monitor_on"}; + const struct file_operations *fops[] = {&mk_contexts_fops, + &rm_contexts_fops, &monitor_on_fops}; + int i; + + dbgfs_root = debugfs_create_dir("damon", NULL); + + for (i = 0; i < ARRAY_SIZE(file_names); i++) + debugfs_create_file(file_names[i], 0600, dbgfs_root, NULL, + fops[i]); + dbgfs_fill_ctx_dir(dbgfs_root, dbgfs_ctxs[0]); + + dbgfs_dirs = kmalloc_array(1, sizeof(dbgfs_root), GFP_KERNEL); + if (!dbgfs_dirs) { + debugfs_remove(dbgfs_root); + return -ENOMEM; + } + dbgfs_dirs[0] = dbgfs_root; + + return 0; +} + +/* + * Functions for the initialization + */ + +static int __init damon_dbgfs_init(void) +{ + int rc = -ENOMEM; + + mutex_lock(&damon_dbgfs_lock); + dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL); + if (!dbgfs_ctxs) + goto out; + dbgfs_ctxs[0] = dbgfs_new_ctx(); + if (!dbgfs_ctxs[0]) { + kfree(dbgfs_ctxs); + goto out; + } + dbgfs_nr_ctxs = 1; + + rc = __damon_dbgfs_init(); + if (rc) { + kfree(dbgfs_ctxs[0]); + kfree(dbgfs_ctxs); + pr_err("%s: dbgfs init failed\n", __func__); + } + +out: + mutex_unlock(&damon_dbgfs_lock); + return rc; +} + +module_init(damon_dbgfs_init); + +#include "dbgfs-test.h" diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c new file mode 100644 index 000000000000..5e8244f65a1a --- /dev/null +++ b/mm/damon/paddr.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Primitives for The Physical Address Space + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-pa: " fmt + +#include +#include +#include +#include +#include + +#include "../internal.h" +#include "prmtv-common.h" + +static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct page_vma_mapped_walk pvmw = { + .page = page, + .vma = vma, + .address = addr, + }; + + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) + damon_ptep_mkold(pvmw.pte, vma->vm_mm, addr); + else + damon_pmdp_mkold(pvmw.pmd, vma->vm_mm, addr); + } + return true; +} + +static void damon_pa_mkold(unsigned long paddr) +{ + struct page *page = damon_get_page(PHYS_PFN(paddr)); + struct rmap_walk_control rwc = { + .rmap_one = __damon_pa_mkold, + .anon_lock = page_lock_anon_vma_read, + }; + bool need_lock; + + if (!page) + return; + + if (!page_mapped(page) || !page_rmapping(page)) { + set_page_idle(page); + goto out; + } + + need_lock = !PageAnon(page) || PageKsm(page); + if (need_lock && !trylock_page(page)) + goto out; + + rmap_walk(page, &rwc); + + if (need_lock) + unlock_page(page); + +out: + put_page(page); +} + +static void __damon_pa_prepare_access_check(struct damon_ctx *ctx, + struct damon_region *r) +{ + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + + damon_pa_mkold(r->sampling_addr); +} + +static void damon_pa_prepare_access_checks(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) + __damon_pa_prepare_access_check(ctx, r); + } +} + +struct damon_pa_access_chk_result { + unsigned long page_sz; + bool accessed; +}; + +static bool __damon_pa_young(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct damon_pa_access_chk_result *result = arg; + struct page_vma_mapped_walk pvmw = { + .page = page, + .vma = vma, + .address = addr, + }; + + result->accessed = false; + result->page_sz = PAGE_SIZE; + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) { + result->accessed = pte_young(*pvmw.pte) || + !page_is_idle(page) || + mmu_notifier_test_young(vma->vm_mm, addr); + } else { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + result->accessed = pmd_young(*pvmw.pmd) || + !page_is_idle(page) || + mmu_notifier_test_young(vma->vm_mm, addr); + result->page_sz = ((1UL) << HPAGE_PMD_SHIFT); +#else + WARN_ON_ONCE(1); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + } + if (result->accessed) { + page_vma_mapped_walk_done(&pvmw); + break; + } + } + + /* If accessed, stop walking */ + return !result->accessed; +} + +static bool damon_pa_young(unsigned long paddr, unsigned long *page_sz) +{ + struct page *page = damon_get_page(PHYS_PFN(paddr)); + struct damon_pa_access_chk_result result = { + .page_sz = PAGE_SIZE, + .accessed = false, + }; + struct rmap_walk_control rwc = { + .arg = &result, + .rmap_one = __damon_pa_young, + .anon_lock = page_lock_anon_vma_read, + }; + bool need_lock; + + if (!page) + return false; + + if (!page_mapped(page) || !page_rmapping(page)) { + if (page_is_idle(page)) + result.accessed = false; + else + result.accessed = true; + put_page(page); + goto out; + } + + need_lock = !PageAnon(page) || PageKsm(page); + if (need_lock && !trylock_page(page)) { + put_page(page); + return NULL; + } + + rmap_walk(page, &rwc); + + if (need_lock) + unlock_page(page); + put_page(page); + +out: + *page_sz = result.page_sz; + return result.accessed; +} + +static void __damon_pa_check_access(struct damon_ctx *ctx, + struct damon_region *r) +{ + static unsigned long last_addr; + static unsigned long last_page_sz = PAGE_SIZE; + static bool last_accessed; + + /* If the region is in the last checked page, reuse the result */ + if (ALIGN_DOWN(last_addr, last_page_sz) == + ALIGN_DOWN(r->sampling_addr, last_page_sz)) { + if (last_accessed) + r->nr_accesses++; + return; + } + + last_accessed = damon_pa_young(r->sampling_addr, &last_page_sz); + if (last_accessed) + r->nr_accesses++; + + last_addr = r->sampling_addr; +} + +static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) { + __damon_pa_check_access(ctx, r); + max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + } + } + + return max_nr_accesses; +} + +bool damon_pa_target_valid(void *t) +{ + return true; +} + +static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + unsigned long addr, applied; + LIST_HEAD(page_list); + + if (scheme->action != DAMOS_PAGEOUT) + return 0; + + for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { + struct page *page = damon_get_page(PHYS_PFN(addr)); + + if (!page) + continue; + + ClearPageReferenced(page); + test_and_clear_page_young(page); + if (isolate_lru_page(page)) { + put_page(page); + continue; + } + if (PageUnevictable(page)) { + putback_lru_page(page); + } else { + list_add(&page->lru, &page_list); + put_page(page); + } + } + applied = reclaim_pages(&page_list); + cond_resched(); + return applied * PAGE_SIZE; +} + +static int damon_pa_scheme_score(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_pageout_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + +void damon_pa_set_primitives(struct damon_ctx *ctx) +{ + ctx->primitive.init = NULL; + ctx->primitive.update = NULL; + ctx->primitive.prepare_access_checks = damon_pa_prepare_access_checks; + ctx->primitive.check_accesses = damon_pa_check_accesses; + ctx->primitive.reset_aggregated = NULL; + ctx->primitive.target_valid = damon_pa_target_valid; + ctx->primitive.cleanup = NULL; + ctx->primitive.apply_scheme = damon_pa_apply_scheme; + ctx->primitive.get_scheme_score = damon_pa_scheme_score; +} diff --git a/mm/damon/prmtv-common.c b/mm/damon/prmtv-common.c new file mode 100644 index 000000000000..92a04f5831d6 --- /dev/null +++ b/mm/damon/prmtv-common.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Primitives for Data Access Monitoring + * + * Author: SeongJae Park + */ + +#include +#include +#include +#include + +#include "prmtv-common.h" + +/* + * Get an online page for a pfn if it's in the LRU list. Otherwise, returns + * NULL. + * + * The body of this function is stolen from the 'page_idle_get_page()'. We + * steal rather than reuse it because the code is quite simple. + */ +struct page *damon_get_page(unsigned long pfn) +{ + struct page *page = pfn_to_online_page(pfn); + + if (!page || !PageLRU(page) || !get_page_unless_zero(page)) + return NULL; + + if (unlikely(!PageLRU(page))) { + put_page(page); + page = NULL; + } + return page; +} + +void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr) +{ + bool referenced = false; + struct page *page = damon_get_page(pte_pfn(*pte)); + + if (!page) + return; + + if (pte_young(*pte)) { + referenced = true; + *pte = pte_mkold(*pte); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, addr + PAGE_SIZE)) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +} + +void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool referenced = false; + struct page *page = damon_get_page(pmd_pfn(*pmd)); + + if (!page) + return; + + if (pmd_young(*pmd)) { + referenced = true; + *pmd = pmd_mkold(*pmd); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, + addr + ((1UL) << HPAGE_PMD_SHIFT))) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +} + +#define DAMON_MAX_SUBSCORE (100) +#define DAMON_MAX_AGE_IN_LOG (32) + +int damon_pageout_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s) +{ + unsigned int max_nr_accesses; + int freq_subscore; + unsigned int age_in_sec; + int age_in_log, age_subscore; + unsigned int freq_weight = s->quota.weight_nr_accesses; + unsigned int age_weight = s->quota.weight_age; + int hotness; + + max_nr_accesses = c->aggr_interval / c->sample_interval; + freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / max_nr_accesses; + + age_in_sec = (unsigned long)r->age * c->aggr_interval / 1000000; + for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; + age_in_log++, age_in_sec >>= 1) + ; + + /* If frequency is 0, higher age means it's colder */ + if (freq_subscore == 0) + age_in_log *= -1; + + /* + * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. + * Scale it to be in [0, 100] and set it as age subscore. + */ + age_in_log += DAMON_MAX_AGE_IN_LOG; + age_subscore = age_in_log * DAMON_MAX_SUBSCORE / + DAMON_MAX_AGE_IN_LOG / 2; + + hotness = (freq_weight * freq_subscore + age_weight * age_subscore); + if (freq_weight + age_weight) + hotness /= freq_weight + age_weight; + /* + * Transform it to fit in [0, DAMOS_MAX_SCORE] + */ + hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; + + /* Return coldness of the region */ + return DAMOS_MAX_SCORE - hotness; +} diff --git a/mm/damon/prmtv-common.h b/mm/damon/prmtv-common.h new file mode 100644 index 000000000000..e790cb5f8fe0 --- /dev/null +++ b/mm/damon/prmtv-common.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common Primitives for Data Access Monitoring + * + * Author: SeongJae Park + */ + +#include + +struct page *damon_get_page(unsigned long pfn); + +void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr); +void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr); + +int damon_pageout_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s); diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c new file mode 100644 index 000000000000..183d4f3b4fde --- /dev/null +++ b/mm/damon/reclaim.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON-based page reclamation + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-reclaim: " fmt + +#include +#include +#include +#include +#include + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_reclaim." + +/* + * Enable or disable DAMON_RECLAIM. + * + * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``. + * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could + * do no real monitoring and reclamation due to the watermarks-based activation + * condition. Refer to below descriptions for the watermarks parameter for + * this. + */ +static bool enabled __read_mostly; + +/* + * Time threshold for cold memory regions identification in microseconds. + * + * If a memory region is not accessed for this or longer time, DAMON_RECLAIM + * identifies the region as cold, and reclaims. 120 seconds by default. + */ +static unsigned long min_age __read_mostly = 120000000; +module_param(min_age, ulong, 0600); + +/* + * Limit of time for trying the reclamation in milliseconds. + * + * DAMON_RECLAIM tries to use only up to this time within a time window + * (quota_reset_interval_ms) for trying reclamation of cold pages. This can be + * used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero, + * the limit is disabled. + * + * 10 ms by default. + */ +static unsigned long quota_ms __read_mostly = 10; +module_param(quota_ms, ulong, 0600); + +/* + * Limit of size of memory for the reclamation in bytes. + * + * DAMON_RECLAIM charges amount of memory which it tried to reclaim within a + * time window (quota_reset_interval_ms) and makes no more than this limit is + * tried. This can be used for limiting consumption of CPU and IO. If this + * value is zero, the limit is disabled. + * + * 128 MiB by default. + */ +static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024; +module_param(quota_sz, ulong, 0600); + +/* + * The time/size quota charge reset interval in milliseconds. + * + * The charge reset interval for the quota of time (quota_ms) and size + * (quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than + * quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms + * milliseconds. + * + * 1 second by default. + */ +static unsigned long quota_reset_interval_ms __read_mostly = 1000; +module_param(quota_reset_interval_ms, ulong, 0600); + +/* + * The watermarks check time interval in microseconds. + * + * Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is + * enabled but inactive due to its watermarks rule. 5 seconds by default. + */ +static unsigned long wmarks_interval __read_mostly = 5000000; +module_param(wmarks_interval, ulong, 0600); + +/* + * Free memory rate (per thousand) for the high watermark. + * + * If free memory of the system in bytes per thousand bytes is higher than + * this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically + * checks the watermarks. 500 (50%) by default. + */ +static unsigned long wmarks_high __read_mostly = 500; +module_param(wmarks_high, ulong, 0600); + +/* + * Free memory rate (per thousand) for the middle watermark. + * + * If free memory of the system in bytes per thousand bytes is between this and + * the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring + * and the reclaiming. 400 (40%) by default. + */ +static unsigned long wmarks_mid __read_mostly = 400; +module_param(wmarks_mid, ulong, 0600); + +/* + * Free memory rate (per thousand) for the low watermark. + * + * If free memory of the system in bytes per thousand bytes is lower than this, + * DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks + * the watermarks. In the case, the system falls back to the LRU-based page + * granularity reclamation logic. 200 (20%) by default. + */ +static unsigned long wmarks_low __read_mostly = 200; +module_param(wmarks_low, ulong, 0600); + +/* + * Sampling interval for the monitoring in microseconds. + * + * The sampling interval of DAMON for the cold memory monitoring. Please refer + * to the DAMON documentation for more detail. 5 ms by default. + */ +static unsigned long sample_interval __read_mostly = 5000; +module_param(sample_interval, ulong, 0600); + +/* + * Aggregation interval for the monitoring in microseconds. + * + * The aggregation interval of DAMON for the cold memory monitoring. Please + * refer to the DAMON documentation for more detail. 100 ms by default. + */ +static unsigned long aggr_interval __read_mostly = 100000; +module_param(aggr_interval, ulong, 0600); + +/* + * Minimum number of monitoring regions. + * + * The minimal number of monitoring regions of DAMON for the cold memory + * monitoring. This can be used to set lower-bound of the monitoring quality. + * But, setting this too high could result in increased monitoring overhead. + * Please refer to the DAMON documentation for more detail. 10 by default. + */ +static unsigned long min_nr_regions __read_mostly = 10; +module_param(min_nr_regions, ulong, 0600); + +/* + * Maximum number of monitoring regions. + * + * The maximum number of monitoring regions of DAMON for the cold memory + * monitoring. This can be used to set upper-bound of the monitoring overhead. + * However, setting this too low could result in bad monitoring quality. + * Please refer to the DAMON documentation for more detail. 1000 by default. + */ +static unsigned long max_nr_regions __read_mostly = 1000; +module_param(max_nr_regions, ulong, 0600); + +/* + * Start of the target memory region in physical address. + * + * The start physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_start __read_mostly; +module_param(monitor_region_start, ulong, 0600); + +/* + * End of the target memory region in physical address. + * + * The end physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_end __read_mostly; +module_param(monitor_region_end, ulong, 0600); + +/* + * PID of the DAMON thread + * + * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. + * Else, -1. + */ +static int kdamond_pid __read_mostly = -1; +module_param(kdamond_pid, int, 0400); + +/* + * Number of memory regions that tried to be reclaimed. + */ +static unsigned long nr_reclaim_tried_regions __read_mostly; +module_param(nr_reclaim_tried_regions, ulong, 0400); + +/* + * Total bytes of memory regions that tried to be reclaimed. + */ +static unsigned long bytes_reclaim_tried_regions __read_mostly; +module_param(bytes_reclaim_tried_regions, ulong, 0400); + +/* + * Number of memory regions that successfully be reclaimed. + */ +static unsigned long nr_reclaimed_regions __read_mostly; +module_param(nr_reclaimed_regions, ulong, 0400); + +/* + * Total bytes of memory regions that successfully be reclaimed. + */ +static unsigned long bytes_reclaimed_regions __read_mostly; +module_param(bytes_reclaimed_regions, ulong, 0400); + +/* + * Number of times that the time/space quota limits have exceeded + */ +static unsigned long nr_quota_exceeds __read_mostly; +module_param(nr_quota_exceeds, ulong, 0400); + +static struct damon_ctx *ctx; +static struct damon_target *target; + +struct damon_reclaim_ram_walk_arg { + unsigned long start; + unsigned long end; +}; + +static int walk_system_ram(struct resource *res, void *arg) +{ + struct damon_reclaim_ram_walk_arg *a = arg; + + if (a->end - a->start < res->end - res->start) { + a->start = res->start; + a->end = res->end; + } + return 0; +} + +/* + * Find biggest 'System RAM' resource and store its start and end address in + * @start and @end, respectively. If no System RAM is found, returns false. + */ +static bool get_monitoring_region(unsigned long *start, unsigned long *end) +{ + struct damon_reclaim_ram_walk_arg arg = {}; + + walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); + if (arg.end <= arg.start) + return false; + + *start = arg.start; + *end = arg.end; + return true; +} + +static struct damos *damon_reclaim_new_scheme(void) +{ + struct damos_watermarks wmarks = { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .interval = wmarks_interval, + .high = wmarks_high, + .mid = wmarks_mid, + .low = wmarks_low, + }; + struct damos_quota quota = { + /* + * Do not try reclamation for more than quota_ms milliseconds + * or quota_sz bytes within quota_reset_interval_ms. + */ + .ms = quota_ms, + .sz = quota_sz, + .reset_interval = quota_reset_interval_ms, + /* Within the quota, page out older regions first. */ + .weight_sz = 0, + .weight_nr_accesses = 0, + .weight_age = 1 + }; + struct damos *scheme = damon_new_scheme( + /* Find regions having PAGE_SIZE or larger size */ + PAGE_SIZE, ULONG_MAX, + /* and not accessed at all */ + 0, 0, + /* for min_age or more micro-seconds, and */ + min_age / aggr_interval, UINT_MAX, + /* page out those, as soon as found */ + DAMOS_PAGEOUT, + /* under the quota. */ + "a, + /* (De)activate this according to the watermarks. */ + &wmarks); + + return scheme; +} + +static int damon_reclaim_turn(bool on) +{ + struct damon_region *region; + struct damos *scheme; + int err; + + if (!on) { + err = damon_stop(&ctx, 1); + if (!err) + kdamond_pid = -1; + return err; + } + + err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0, + min_nr_regions, max_nr_regions); + if (err) + return err; + + if (monitor_region_start > monitor_region_end) + return -EINVAL; + if (!monitor_region_start && !monitor_region_end && + !get_monitoring_region(&monitor_region_start, + &monitor_region_end)) + return -EINVAL; + /* DAMON will free this on its own when finish monitoring */ + region = damon_new_region(monitor_region_start, monitor_region_end); + if (!region) + return -ENOMEM; + damon_add_region(region, target); + + /* Will be freed by 'damon_set_schemes()' below */ + scheme = damon_reclaim_new_scheme(); + if (!scheme) { + err = -ENOMEM; + goto free_region_out; + } + err = damon_set_schemes(ctx, &scheme, 1); + if (err) + goto free_scheme_out; + + err = damon_start(&ctx, 1); + if (!err) { + kdamond_pid = ctx->kdamond->pid; + return 0; + } + +free_scheme_out: + damon_destroy_scheme(scheme); +free_region_out: + damon_destroy_region(region, target); + return err; +} + +#define ENABLE_CHECK_INTERVAL_MS 1000 +static struct delayed_work damon_reclaim_timer; +static void damon_reclaim_timer_fn(struct work_struct *work) +{ + static bool last_enabled; + bool now_enabled; + + now_enabled = enabled; + if (last_enabled != now_enabled) { + if (!damon_reclaim_turn(now_enabled)) + last_enabled = now_enabled; + else + enabled = last_enabled; + } + + if (enabled) + schedule_delayed_work(&damon_reclaim_timer, + msecs_to_jiffies(ENABLE_CHECK_INTERVAL_MS)); +} +static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); + +static int enabled_store(const char *val, + const struct kernel_param *kp) +{ + int rc = param_set_bool(val, kp); + + if (rc < 0) + return rc; + + if (enabled) + schedule_delayed_work(&damon_reclaim_timer, 0); + + return 0; +} + +static const struct kernel_param_ops enabled_param_ops = { + .set = enabled_store, + .get = param_get_bool, +}; + +module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +MODULE_PARM_DESC(enabled, + "Enable or disable DAMON_RECLAIM (default: disabled)"); + +static int damon_reclaim_after_aggregation(struct damon_ctx *c) +{ + struct damos *s; + + /* update the stats parameter */ + damon_for_each_scheme(s, c) { + nr_reclaim_tried_regions = s->stat.nr_tried; + bytes_reclaim_tried_regions = s->stat.sz_tried; + nr_reclaimed_regions = s->stat.nr_applied; + bytes_reclaimed_regions = s->stat.sz_applied; + nr_quota_exceeds = s->stat.qt_exceeds; + } + return 0; +} + +static int __init damon_reclaim_init(void) +{ + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + + damon_pa_set_primitives(ctx); + ctx->callback.after_aggregation = damon_reclaim_after_aggregation; + + /* 4242 means nothing but fun */ + target = damon_new_target(4242); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + + schedule_delayed_work(&damon_reclaim_timer, 0); + return 0; +} + +module_init(damon_reclaim_init); diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h new file mode 100644 index 000000000000..6a1b9272ea12 --- /dev/null +++ b/mm/damon/vaddr-test.h @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Access Monitor Unit Tests + * + * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * Author: SeongJae Park + */ + +#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST + +#ifndef _DAMON_VADDR_TEST_H +#define _DAMON_VADDR_TEST_H + +#include + +static void __link_vmas(struct vm_area_struct *vmas, ssize_t nr_vmas) +{ + int i, j; + unsigned long largest_gap, gap; + + if (!nr_vmas) + return; + + for (i = 0; i < nr_vmas - 1; i++) { + vmas[i].vm_next = &vmas[i + 1]; + + vmas[i].vm_rb.rb_left = NULL; + vmas[i].vm_rb.rb_right = &vmas[i + 1].vm_rb; + + largest_gap = 0; + for (j = i; j < nr_vmas; j++) { + if (j == 0) + continue; + gap = vmas[j].vm_start - vmas[j - 1].vm_end; + if (gap > largest_gap) + largest_gap = gap; + } + vmas[i].rb_subtree_gap = largest_gap; + } + vmas[i].vm_next = NULL; + vmas[i].vm_rb.rb_right = NULL; + vmas[i].rb_subtree_gap = 0; +} + +/* + * Test __damon_va_three_regions() function + * + * In case of virtual memory address spaces monitoring, DAMON converts the + * complex and dynamic memory mappings of each target task to three + * discontiguous regions which cover every mapped areas. However, the three + * regions should not include the two biggest unmapped areas in the original + * mapping, because the two biggest areas are normally the areas between 1) + * heap and the mmap()-ed regions, and 2) the mmap()-ed regions and stack. + * Because these two unmapped areas are very huge but obviously never accessed, + * covering the region is just a waste. + * + * '__damon_va_three_regions() receives an address space of a process. It + * first identifies the start of mappings, end of mappings, and the two biggest + * unmapped areas. After that, based on the information, it constructs the + * three regions and returns. For more detail, refer to the comment of + * 'damon_init_regions_of()' function definition in 'mm/damon.c' file. + * + * For example, suppose virtual address ranges of 10-20, 20-25, 200-210, + * 210-220, 300-305, and 307-330 (Other comments represent this mappings in + * more short form: 10-20-25, 200-210-220, 300-305, 307-330) of a process are + * mapped. To cover every mappings, the three regions should start with 10, + * and end with 305. The process also has three unmapped areas, 25-200, + * 220-300, and 305-307. Among those, 25-200 and 220-300 are the biggest two + * unmapped areas, and thus it should be converted to three regions of 10-25, + * 200-220, and 300-330. + */ +static void damon_test_three_regions_in_vmas(struct kunit *test) +{ + struct damon_addr_range regions[3] = {0,}; + /* 10-20-25, 200-210-220, 300-305, 307-330 */ + struct vm_area_struct vmas[] = { + (struct vm_area_struct) {.vm_start = 10, .vm_end = 20}, + (struct vm_area_struct) {.vm_start = 20, .vm_end = 25}, + (struct vm_area_struct) {.vm_start = 200, .vm_end = 210}, + (struct vm_area_struct) {.vm_start = 210, .vm_end = 220}, + (struct vm_area_struct) {.vm_start = 300, .vm_end = 305}, + (struct vm_area_struct) {.vm_start = 307, .vm_end = 330}, + }; + + __link_vmas(vmas, 6); + + __damon_va_three_regions(&vmas[0], regions); + + KUNIT_EXPECT_EQ(test, 10ul, regions[0].start); + KUNIT_EXPECT_EQ(test, 25ul, regions[0].end); + KUNIT_EXPECT_EQ(test, 200ul, regions[1].start); + KUNIT_EXPECT_EQ(test, 220ul, regions[1].end); + KUNIT_EXPECT_EQ(test, 300ul, regions[2].start); + KUNIT_EXPECT_EQ(test, 330ul, regions[2].end); +} + +static struct damon_region *__nth_region_of(struct damon_target *t, int idx) +{ + struct damon_region *r; + unsigned int i = 0; + + damon_for_each_region(r, t) { + if (i++ == idx) + return r; + } + + return NULL; +} + +/* + * Test 'damon_va_apply_three_regions()' + * + * test kunit object + * regions an array containing start/end addresses of current + * monitoring target regions + * nr_regions the number of the addresses in 'regions' + * three_regions The three regions that need to be applied now + * expected start/end addresses of monitoring target regions that + * 'three_regions' are applied + * nr_expected the number of addresses in 'expected' + * + * The memory mapping of the target processes changes dynamically. To follow + * the change, DAMON periodically reads the mappings, simplifies it to the + * three regions, and updates the monitoring target regions to fit in the three + * regions. The update of current target regions is the role of + * 'damon_va_apply_three_regions()'. + * + * This test passes the given target regions and the new three regions that + * need to be applied to the function and check whether it updates the regions + * as expected. + */ +static void damon_do_test_apply_three_regions(struct kunit *test, + unsigned long *regions, int nr_regions, + struct damon_addr_range *three_regions, + unsigned long *expected, int nr_expected) +{ + struct damon_target *t; + struct damon_region *r; + int i; + + t = damon_new_target(42); + for (i = 0; i < nr_regions / 2; i++) { + r = damon_new_region(regions[i * 2], regions[i * 2 + 1]); + damon_add_region(r, t); + } + + damon_va_apply_three_regions(t, three_regions); + + for (i = 0; i < nr_expected / 2; i++) { + r = __nth_region_of(t, i); + KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]); + KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]); + } +} + +/* + * This function test most common case where the three big regions are only + * slightly changed. Target regions should adjust their boundary (10-20-30, + * 50-55, 70-80, 90-100) to fit with the new big regions or remove target + * regions (57-79) that now out of the three regions. + */ +static void damon_test_apply_three_regions1(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 45-55, 73-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 45, .end = 55}, + (struct damon_addr_range){.start = 73, .end = 104} }; + /* 5-20-27, 45-55, 73-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 45, 55, + 73, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test slightly bigger change. Similar to above, but the second big region + * now require two target regions (50-55, 57-59) to be removed. + */ +static void damon_test_apply_three_regions2(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 56-57, 65-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 56, .end = 57}, + (struct damon_addr_range){.start = 65, .end = 104} }; + /* 5-20-27, 56-57, 65-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 56, 57, + 65, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test a big change. The second big region has totally freed and mapped to + * different area (50-59 -> 61-63). The target regions which were in the old + * second big region (50-55-57-59) should be removed and new target region + * covering the second big region (61-63) should be created. + */ +static void damon_test_apply_three_regions3(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 61-63, 65-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 61, .end = 63}, + (struct damon_addr_range){.start = 65, .end = 104} }; + /* 5-20-27, 61-63, 65-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 61, 63, + 65, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test another big change. Both of the second and third big regions (50-59 + * and 70-100) has totally freed and mapped to different area (30-32 and + * 65-68). The target regions which were in the old second and third big + * regions should now be removed and new target regions covering the new second + * and third big regions should be created. + */ +static void damon_test_apply_three_regions4(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-7, 30-32, 65-68 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 7}, + (struct damon_addr_range){.start = 30, .end = 32}, + (struct damon_addr_range){.start = 65, .end = 68} }; + /* expect 5-7, 30-32, 65-68 */ + unsigned long expected[] = {5, 7, 30, 32, 65, 68}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +static void damon_test_split_evenly_fail(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); + + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, r->ar.start, start); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + } + + damon_free_target(t); +} + +static void damon_test_split_evenly_succ(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); + unsigned long expected_width = (end - start) / nr_pieces; + unsigned long i = 0; + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), 0); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces); + + damon_for_each_region(r, t) { + if (i == nr_pieces - 1) + break; + KUNIT_EXPECT_EQ(test, + r->ar.start, start + i++ * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width); + } + KUNIT_EXPECT_EQ(test, r->ar.start, start + i * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + damon_free_target(t); +} + +static void damon_test_split_evenly(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5), + -EINVAL); + + damon_test_split_evenly_fail(test, 0, 100, 0); + damon_test_split_evenly_succ(test, 0, 100, 10); + damon_test_split_evenly_succ(test, 5, 59, 5); + damon_test_split_evenly_fail(test, 5, 6, 2); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_test_three_regions_in_vmas), + KUNIT_CASE(damon_test_apply_three_regions1), + KUNIT_CASE(damon_test_apply_three_regions2), + KUNIT_CASE(damon_test_apply_three_regions3), + KUNIT_CASE(damon_test_apply_three_regions4), + KUNIT_CASE(damon_test_split_evenly), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon-primitives", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_VADDR_TEST_H */ + +#endif /* CONFIG_DAMON_VADDR_KUNIT_TEST */ diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c new file mode 100644 index 000000000000..89b6468da2b9 --- /dev/null +++ b/mm/damon/vaddr.c @@ -0,0 +1,761 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Primitives for Virtual Address Spaces + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-va: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include "prmtv-common.h" + +#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST +#undef DAMON_MIN_REGION +#define DAMON_MIN_REGION 1 +#endif + +/* + * 't->id' should be the pointer to the relevant 'struct pid' having reference + * count. Caller must put the returned task, unless it is NULL. + */ +static inline struct task_struct *damon_get_task_struct(struct damon_target *t) +{ + return get_pid_task((struct pid *)t->id, PIDTYPE_PID); +} + +/* + * Get the mm_struct of the given target + * + * Caller _must_ put the mm_struct after use, unless it is NULL. + * + * Returns the mm_struct of the target on success, NULL on failure + */ +static struct mm_struct *damon_get_mm(struct damon_target *t) +{ + struct task_struct *task; + struct mm_struct *mm; + + task = damon_get_task_struct(t); + if (!task) + return NULL; + + mm = get_task_mm(task); + put_task_struct(task); + return mm; +} + +/* + * Functions for the initial monitoring target regions construction + */ + +/* + * Size-evenly split a region into 'nr_pieces' small regions + * + * Returns 0 on success, or negative error code otherwise. + */ +static int damon_va_evenly_split_region(struct damon_target *t, + struct damon_region *r, unsigned int nr_pieces) +{ + unsigned long sz_orig, sz_piece, orig_end; + struct damon_region *n = NULL, *next; + unsigned long start; + + if (!r || !nr_pieces) + return -EINVAL; + + orig_end = r->ar.end; + sz_orig = r->ar.end - r->ar.start; + sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); + + if (!sz_piece) + return -EINVAL; + + r->ar.end = r->ar.start + sz_piece; + next = damon_next_region(r); + for (start = r->ar.end; start + sz_piece <= orig_end; + start += sz_piece) { + n = damon_new_region(start, start + sz_piece); + if (!n) + return -ENOMEM; + damon_insert_region(n, r, next, t); + r = n; + } + /* complement last region for possible rounding error */ + if (n) + n->ar.end = orig_end; + + return 0; +} + +static unsigned long sz_range(struct damon_addr_range *r) +{ + return r->end - r->start; +} + +/* + * Find three regions separated by two biggest unmapped regions + * + * vma the head vma of the target address space + * regions an array of three address ranges that results will be saved + * + * This function receives an address space and finds three regions in it which + * separated by the two biggest unmapped regions in the space. Please refer to + * below comments of '__damon_va_init_regions()' function to know why this is + * necessary. + * + * Returns 0 if success, or negative error code otherwise. + */ +static int __damon_va_three_regions(struct vm_area_struct *vma, + struct damon_addr_range regions[3]) +{ + struct damon_addr_range gap = {0}, first_gap = {0}, second_gap = {0}; + struct vm_area_struct *last_vma = NULL; + unsigned long start = 0; + struct rb_root rbroot; + + /* Find two biggest gaps so that first_gap > second_gap > others */ + for (; vma; vma = vma->vm_next) { + if (!last_vma) { + start = vma->vm_start; + goto next; + } + + if (vma->rb_subtree_gap <= sz_range(&second_gap)) { + rbroot.rb_node = &vma->vm_rb; + vma = rb_entry(rb_last(&rbroot), + struct vm_area_struct, vm_rb); + goto next; + } + + gap.start = last_vma->vm_end; + gap.end = vma->vm_start; + if (sz_range(&gap) > sz_range(&second_gap)) { + swap(gap, second_gap); + if (sz_range(&second_gap) > sz_range(&first_gap)) + swap(second_gap, first_gap); + } +next: + last_vma = vma; + } + + if (!sz_range(&second_gap) || !sz_range(&first_gap)) + return -EINVAL; + + /* Sort the two biggest gaps by address */ + if (first_gap.start > second_gap.start) + swap(first_gap, second_gap); + + /* Store the result */ + regions[0].start = ALIGN(start, DAMON_MIN_REGION); + regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); + regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); + regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); + regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); + regions[2].end = ALIGN(last_vma->vm_end, DAMON_MIN_REGION); + + return 0; +} + +/* + * Get the three regions in the given target (task) + * + * Returns 0 on success, negative error code otherwise. + */ +static int damon_va_three_regions(struct damon_target *t, + struct damon_addr_range regions[3]) +{ + struct mm_struct *mm; + int rc; + + mm = damon_get_mm(t); + if (!mm) + return -EINVAL; + + mmap_read_lock(mm); + rc = __damon_va_three_regions(mm->mmap, regions); + mmap_read_unlock(mm); + + mmput(mm); + return rc; +} + +/* + * Initialize the monitoring target regions for the given target (task) + * + * t the given target + * + * Because only a number of small portions of the entire address space + * is actually mapped to the memory and accessed, monitoring the unmapped + * regions is wasteful. That said, because we can deal with small noises, + * tracking every mapping is not strictly required but could even incur a high + * overhead if the mapping frequently changes or the number of mappings is + * high. The adaptive regions adjustment mechanism will further help to deal + * with the noise by simply identifying the unmapped areas as a region that + * has no access. Moreover, applying the real mappings that would have many + * unmapped areas inside will make the adaptive mechanism quite complex. That + * said, too huge unmapped areas inside the monitoring target should be removed + * to not take the time for the adaptive mechanism. + * + * For the reason, we convert the complex mappings to three distinct regions + * that cover every mapped area of the address space. Also the two gaps + * between the three regions are the two biggest unmapped areas in the given + * address space. In detail, this function first identifies the start and the + * end of the mappings and the two biggest unmapped areas of the address space. + * Then, it constructs the three regions as below: + * + * [mappings[0]->start, big_two_unmapped_areas[0]->start) + * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) + * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) + * + * As usual memory map of processes is as below, the gap between the heap and + * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed + * region and the stack will be two biggest unmapped regions. Because these + * gaps are exceptionally huge areas in usual address space, excluding these + * two biggest unmapped regions will be sufficient to make a trade-off. + * + * + * + * + * (other mmap()-ed regions and small unmapped regions) + * + * + * + */ +static void __damon_va_init_regions(struct damon_ctx *ctx, + struct damon_target *t) +{ + struct damon_target *ti; + struct damon_region *r; + struct damon_addr_range regions[3]; + unsigned long sz = 0, nr_pieces; + int i, tidx = 0; + + if (damon_va_three_regions(t, regions)) { + damon_for_each_target(ti, ctx) { + if (ti == t) + break; + tidx++; + } + pr_debug("Failed to get three regions of %dth target\n", tidx); + return; + } + + for (i = 0; i < 3; i++) + sz += regions[i].end - regions[i].start; + if (ctx->min_nr_regions) + sz /= ctx->min_nr_regions; + if (sz < DAMON_MIN_REGION) + sz = DAMON_MIN_REGION; + + /* Set the initial three regions of the target */ + for (i = 0; i < 3; i++) { + r = damon_new_region(regions[i].start, regions[i].end); + if (!r) { + pr_err("%d'th init region creation failed\n", i); + return; + } + damon_add_region(r, t); + + nr_pieces = (regions[i].end - regions[i].start) / sz; + damon_va_evenly_split_region(t, r, nr_pieces); + } +} + +/* Initialize '->regions_list' of every target (task) */ +static void damon_va_init(struct damon_ctx *ctx) +{ + struct damon_target *t; + + damon_for_each_target(t, ctx) { + /* the user may set the target regions as they want */ + if (!damon_nr_regions(t)) + __damon_va_init_regions(ctx, t); + } +} + +/* + * Functions for the dynamic monitoring target regions update + */ + +/* + * Check whether a region is intersecting an address range + * + * Returns true if it is. + */ +static bool damon_intersect(struct damon_region *r, + struct damon_addr_range *re) +{ + return !(r->ar.end <= re->start || re->end <= r->ar.start); +} + +/* + * Update damon regions for the three big regions of the given target + * + * t the given target + * bregions the three big regions of the target + */ +static void damon_va_apply_three_regions(struct damon_target *t, + struct damon_addr_range bregions[3]) +{ + struct damon_region *r, *next; + unsigned int i; + + /* Remove regions which are not in the three big regions now */ + damon_for_each_region_safe(r, next, t) { + for (i = 0; i < 3; i++) { + if (damon_intersect(r, &bregions[i])) + break; + } + if (i == 3) + damon_destroy_region(r, t); + } + + /* Adjust intersecting regions to fit with the three big regions */ + for (i = 0; i < 3; i++) { + struct damon_region *first = NULL, *last; + struct damon_region *newr; + struct damon_addr_range *br; + + br = &bregions[i]; + /* Get the first and last regions which intersects with br */ + damon_for_each_region(r, t) { + if (damon_intersect(r, br)) { + if (!first) + first = r; + last = r; + } + if (r->ar.start >= br->end) + break; + } + if (!first) { + /* no damon_region intersects with this big region */ + newr = damon_new_region( + ALIGN_DOWN(br->start, + DAMON_MIN_REGION), + ALIGN(br->end, DAMON_MIN_REGION)); + if (!newr) + continue; + damon_insert_region(newr, damon_prev_region(r), r, t); + } else { + first->ar.start = ALIGN_DOWN(br->start, + DAMON_MIN_REGION); + last->ar.end = ALIGN(br->end, DAMON_MIN_REGION); + } + } +} + +/* + * Update regions for current memory mappings + */ +static void damon_va_update(struct damon_ctx *ctx) +{ + struct damon_addr_range three_regions[3]; + struct damon_target *t; + + damon_for_each_target(t, ctx) { + if (damon_va_three_regions(t, three_regions)) + continue; + damon_va_apply_three_regions(t, three_regions); + } +} + +static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t *pte; + spinlock_t *ptl; + + if (pmd_huge(*pmd)) { + ptl = pmd_lock(walk->mm, pmd); + if (pmd_huge(*pmd)) { + damon_pmdp_mkold(pmd, walk->mm, addr); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + } + + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + return 0; + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte_present(*pte)) + goto out; + damon_ptep_mkold(pte, walk->mm, addr); +out: + pte_unmap_unlock(pte, ptl); + return 0; +} + +#ifdef CONFIG_HUGETLB_PAGE +static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long addr) +{ + bool referenced = false; + pte_t entry = huge_ptep_get(pte); + struct page *page = pte_page(entry); + + if (!page) + return; + + get_page(page); + + if (pte_young(entry)) { + referenced = true; + entry = pte_mkold(entry); + huge_ptep_set_access_flags(vma, addr, pte, entry, + vma->vm_flags & VM_WRITE); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, + addr + huge_page_size(hstate_vma(vma)))) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +} + +static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct hstate *h = hstate_vma(walk->vma); + spinlock_t *ptl; + pte_t entry; + + ptl = huge_pte_lock(h, walk->mm, pte); + entry = huge_ptep_get(pte); + if (!pte_present(entry)) + goto out; + + damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); + +out: + spin_unlock(ptl); + return 0; +} +#else +#define damon_mkold_hugetlb_entry NULL +#endif /* CONFIG_HUGETLB_PAGE */ + +static const struct mm_walk_ops damon_mkold_ops = { + .pmd_entry = damon_mkold_pmd_entry, + .hugetlb_entry = damon_mkold_hugetlb_entry, +}; + +static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) +{ + mmap_read_lock(mm); + walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); + mmap_read_unlock(mm); +} + +/* + * Functions for the access checking of the regions + */ + +static void __damon_va_prepare_access_check(struct damon_ctx *ctx, + struct mm_struct *mm, struct damon_region *r) +{ + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + + damon_va_mkold(mm, r->sampling_addr); +} + +static void damon_va_prepare_access_checks(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct mm_struct *mm; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + mm = damon_get_mm(t); + if (!mm) + continue; + damon_for_each_region(r, t) + __damon_va_prepare_access_check(ctx, mm, r); + mmput(mm); + } +} + +struct damon_young_walk_private { + unsigned long *page_sz; + bool young; +}; + +static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t *pte; + spinlock_t *ptl; + struct page *page; + struct damon_young_walk_private *priv = walk->private; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_huge(*pmd)) { + ptl = pmd_lock(walk->mm, pmd); + if (!pmd_huge(*pmd)) { + spin_unlock(ptl); + goto regular_page; + } + page = damon_get_page(pmd_pfn(*pmd)); + if (!page) + goto huge_out; + if (pmd_young(*pmd) || !page_is_idle(page) || + mmu_notifier_test_young(walk->mm, + addr)) { + *priv->page_sz = ((1UL) << HPAGE_PMD_SHIFT); + priv->young = true; + } + put_page(page); +huge_out: + spin_unlock(ptl); + return 0; + } + +regular_page: +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + return -EINVAL; + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte_present(*pte)) + goto out; + page = damon_get_page(pte_pfn(*pte)); + if (!page) + goto out; + if (pte_young(*pte) || !page_is_idle(page) || + mmu_notifier_test_young(walk->mm, addr)) { + *priv->page_sz = PAGE_SIZE; + priv->young = true; + } + put_page(page); +out: + pte_unmap_unlock(pte, ptl); + return 0; +} + +#ifdef CONFIG_HUGETLB_PAGE +static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct damon_young_walk_private *priv = walk->private; + struct hstate *h = hstate_vma(walk->vma); + struct page *page; + spinlock_t *ptl; + pte_t entry; + + ptl = huge_pte_lock(h, walk->mm, pte); + entry = huge_ptep_get(pte); + if (!pte_present(entry)) + goto out; + + page = pte_page(entry); + if (!page) + goto out; + + get_page(page); + + if (pte_young(entry) || !page_is_idle(page) || + mmu_notifier_test_young(walk->mm, addr)) { + *priv->page_sz = huge_page_size(h); + priv->young = true; + } + + put_page(page); + +out: + spin_unlock(ptl); + return 0; +} +#else +#define damon_young_hugetlb_entry NULL +#endif /* CONFIG_HUGETLB_PAGE */ + +static const struct mm_walk_ops damon_young_ops = { + .pmd_entry = damon_young_pmd_entry, + .hugetlb_entry = damon_young_hugetlb_entry, +}; + +static bool damon_va_young(struct mm_struct *mm, unsigned long addr, + unsigned long *page_sz) +{ + struct damon_young_walk_private arg = { + .page_sz = page_sz, + .young = false, + }; + + mmap_read_lock(mm); + walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); + mmap_read_unlock(mm); + return arg.young; +} + +/* + * Check whether the region was accessed after the last preparation + * + * mm 'mm_struct' for the given virtual address space + * r the region to be checked + */ +static void __damon_va_check_access(struct damon_ctx *ctx, + struct mm_struct *mm, struct damon_region *r) +{ + static struct mm_struct *last_mm; + static unsigned long last_addr; + static unsigned long last_page_sz = PAGE_SIZE; + static bool last_accessed; + + /* If the region is in the last checked page, reuse the result */ + if (mm == last_mm && (ALIGN_DOWN(last_addr, last_page_sz) == + ALIGN_DOWN(r->sampling_addr, last_page_sz))) { + if (last_accessed) + r->nr_accesses++; + return; + } + + last_accessed = damon_va_young(mm, r->sampling_addr, &last_page_sz); + if (last_accessed) + r->nr_accesses++; + + last_mm = mm; + last_addr = r->sampling_addr; +} + +static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct mm_struct *mm; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + + damon_for_each_target(t, ctx) { + mm = damon_get_mm(t); + if (!mm) + continue; + damon_for_each_region(r, t) { + __damon_va_check_access(ctx, mm, r); + max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + } + mmput(mm); + } + + return max_nr_accesses; +} + +/* + * Functions for the target validity check and cleanup + */ + +bool damon_va_target_valid(void *target) +{ + struct damon_target *t = target; + struct task_struct *task; + + task = damon_get_task_struct(t); + if (task) { + put_task_struct(task); + return true; + } + + return false; +} + +#ifndef CONFIG_ADVISE_SYSCALLS +static unsigned long damos_madvise(struct damon_target *target, + struct damon_region *r, int behavior) +{ + return 0; +} +#else +static unsigned long damos_madvise(struct damon_target *target, + struct damon_region *r, int behavior) +{ + struct mm_struct *mm; + unsigned long start = PAGE_ALIGN(r->ar.start); + unsigned long len = PAGE_ALIGN(r->ar.end - r->ar.start); + unsigned long applied; + + mm = damon_get_mm(target); + if (!mm) + return 0; + + applied = do_madvise(mm, start, len, behavior) ? 0 : len; + mmput(mm); + + return applied; +} +#endif /* CONFIG_ADVISE_SYSCALLS */ + +static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + int madv_action; + + switch (scheme->action) { + case DAMOS_WILLNEED: + madv_action = MADV_WILLNEED; + break; + case DAMOS_COLD: + madv_action = MADV_COLD; + break; + case DAMOS_PAGEOUT: + madv_action = MADV_PAGEOUT; + break; + case DAMOS_HUGEPAGE: + madv_action = MADV_HUGEPAGE; + break; + case DAMOS_NOHUGEPAGE: + madv_action = MADV_NOHUGEPAGE; + break; + case DAMOS_STAT: + return 0; + default: + return 0; + } + + return damos_madvise(t, r, madv_action); +} + +static int damon_va_scheme_score(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_pageout_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + +void damon_va_set_primitives(struct damon_ctx *ctx) +{ + ctx->primitive.init = damon_va_init; + ctx->primitive.update = damon_va_update; + ctx->primitive.prepare_access_checks = damon_va_prepare_access_checks; + ctx->primitive.check_accesses = damon_va_check_accesses; + ctx->primitive.reset_aggregated = NULL; + ctx->primitive.target_valid = damon_va_target_valid; + ctx->primitive.cleanup = NULL; + ctx->primitive.apply_scheme = damon_va_apply_scheme; + ctx->primitive.get_scheme_score = damon_va_scheme_score; +} + +#include "vaddr-test.h" diff --git a/mm/page_ext.c b/mm/page_ext.c index 7e44726b3549..e5e31ff1adba 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -58,11 +58,21 @@ * can utilize this callback to initialize the state of it correctly. */ +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) +static bool need_page_idle(void) +{ + return true; +} +struct page_ext_operations page_idle_ops = { + .need = need_page_idle, +}; +#endif + static struct page_ext_operations *page_ext_ops[] = { #ifdef CONFIG_PAGE_OWNER &page_owner_ops, #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif #ifdef CONFIG_PAGE_PINNER diff --git a/mm/page_idle.c b/mm/page_idle.c index 057c61df12db..144fb4ed961d 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -211,16 +211,6 @@ static const struct attribute_group page_idle_attr_group = { .name = "page_idle", }; -#ifndef CONFIG_64BIT -static bool need_page_idle(void) -{ - return true; -} -struct page_ext_operations page_idle_ops = { - .need = need_page_idle, -}; -#endif - static int __init page_idle_init(void) { int err; diff --git a/mm/page_io.c b/mm/page_io.c index f2900cf6c5b4..d5efe9558b8a 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -69,54 +69,6 @@ void end_swap_bio_write(struct bio *bio) bio_put(bio); } -static void swap_slot_free_notify(struct page *page) -{ - struct swap_info_struct *sis; - struct gendisk *disk; - swp_entry_t entry; - - /* - * There is no guarantee that the page is in swap cache - the software - * suspend code (at least) uses end_swap_bio_read() against a non- - * swapcache page. So we must check PG_swapcache before proceeding with - * this optimization. - */ - if (unlikely(!PageSwapCache(page))) - return; - - sis = page_swap_info(page); - if (data_race(!(sis->flags & SWP_BLKDEV))) - return; - - /* - * The swap subsystem performs lazy swap slot freeing, - * expecting that the page will be swapped out again. - * So we can avoid an unnecessary write if the page - * isn't redirtied. - * This is good for real swap storage because we can - * reduce unnecessary I/O and enhance wear-leveling - * if an SSD is used as the as swap device. - * But if in-memory swap device (eg zram) is used, - * this causes a duplicated copy between uncompressed - * data in VM-owned memory and compressed data in - * zram-owned memory. So let's free zram-owned memory - * and make the VM-owned decompressed page *dirty*, - * so the page should be swapped out somewhere again if - * we again wish to reclaim it. - */ - disk = sis->bdev->bd_disk; - entry.val = page_private(page); - if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) { - unsigned long offset; - - offset = swp_offset(entry); - - SetPageDirty(page); - disk->fops->swap_slot_free_notify(sis->bdev, - offset); - } -} - static void end_swap_bio_read(struct bio *bio) { struct page *page = bio_first_page_all(bio); @@ -132,7 +84,6 @@ static void end_swap_bio_read(struct bio *bio) } SetPageUptodate(page); - swap_slot_free_notify(page); out: unlock_page(page); WRITE_ONCE(bio->bi_private, NULL); @@ -409,11 +360,6 @@ int swap_readpage(struct page *page, bool synchronous) if (sis->flags & SWP_SYNCHRONOUS_IO) { ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); if (!ret) { - if (trylock_page(page)) { - swap_slot_free_notify(page); - unlock_page(page); - } - count_vm_event(PSWPIN); goto out; } diff --git a/mm/readahead.c b/mm/readahead.c index a6bfa987a04a..a95364c99487 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -459,6 +459,8 @@ static void ondemand_readahead(struct readahead_control *ractl, if (req_size > max_pages && bdi->io_pages > max_pages) max_pages = min(req_size, bdi->io_pages); + trace_android_vh_ra_tuning_max_page(ractl, &max_pages); + /* * start of file */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 89beec62bfa2..0c47c99b4cdb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2396,6 +2396,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, denominator = ap + fp; out: trace_android_vh_tune_scan_type((char *)(&scan_balance)); + trace_android_vh_tune_memcg_scan_type(memcg, (char *)(&scan_balance)); for_each_evictable_lru(lru) { int file = is_file_lru(lru); unsigned long lruvec_size; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index a3271ec3e162..d76af5ba5cc2 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -448,6 +448,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; + unsigned int allocsz; /* this is non-NULL only with TCP/UDP Encapsulation */ if (x->encap) { @@ -457,6 +458,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * return err; } + allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES); + if (allocsz > ESP_SKB_FRAG_MAXSIZE) + goto cow; + if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 0158b0163ca8..f5d40fabad0b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -483,6 +483,11 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; + unsigned int allocsz; + + allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES); + if (allocsz > ESP_SKB_FRAG_MAXSIZE) + goto cow; if (x->encap) { int err = esp6_output_encap(x, skb, esp); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a31334b92be7..d0c95d7dd292 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2278,8 +2278,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, copy_skb = skb_get(skb); skb_head = skb->data; } - if (copy_skb) + if (copy_skb) { + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); skb_set_owner_r(copy_skb, sk); + } } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { @@ -3434,6 +3437,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + const size_t max_len = min(sizeof(skb->cb), + sizeof(struct sockaddr_storage)); int copy_len; /* If the address length field is there to be filled @@ -3456,6 +3461,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(struct sockaddr_ll); } } + if (WARN_ON_ONCE(copy_len > max_len)) { + copy_len = max_len; + msg->msg_namelen = copy_len; + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0890b42153f5..9baca999e533 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -475,7 +475,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT, - [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, + /* allow 3 for NUL-termination, we used to declare this NLA_STRING */ + [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3), [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile new file mode 100644 index 000000000000..8a3f2cd9fec0 --- /dev/null +++ b/tools/testing/selftests/damon/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for damon selftests + +TEST_FILES = _chk_dependency.sh +TEST_PROGS = debugfs_attrs.sh + +include ../lib.mk diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh new file mode 100644 index 000000000000..0189db81550b --- /dev/null +++ b/tools/testing/selftests/damon/_chk_dependency.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +DBGFS=/sys/kernel/debug/damon + +if [ $EUID -ne 0 ]; +then + echo "Run as root" + exit $ksft_skip +fi + +if [ ! -d "$DBGFS" ] +then + echo "$DBGFS not found" + exit $ksft_skip +fi + +for f in attrs target_ids monitor_on +do + if [ ! -f "$DBGFS/$f" ] + then + echo "$f not found" + exit 1 + fi +done diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh new file mode 100644 index 000000000000..196b6640bf37 --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +test_write_result() { + file=$1 + content=$2 + orig_content=$3 + expect_reason=$4 + expected=$5 + + echo "$content" > "$file" + if [ $? -ne "$expected" ] + then + echo "writing $content to $file doesn't return $expected" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +test_write_succ() { + test_write_result "$1" "$2" "$3" "$4" 0 +} + +test_write_fail() { + test_write_result "$1" "$2" "$3" "$4" 1 +} + +test_content() { + file=$1 + orig_content=$2 + expected=$3 + expect_reason=$4 + + content=$(cat "$file") + if [ "$content" != "$expected" ] + then + echo "reading $file expected $expected but $content" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +source ./_chk_dependency.sh + +# Test attrs file +# =============== + +file="$DBGFS/attrs" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5" "$orig_content" "valid input" +test_write_fail "$file" "1 2 3 4" "$orig_content" "no enough fields" +test_write_fail "$file" "1 2 3 5 4" "$orig_content" \ + "min_nr_regions > max_nr_regions" +test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written" +echo "$orig_content" > "$file" + +# Test schemes file +# ================= + +file="$DBGFS/schemes" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ + "$orig_content" "valid input" +test_write_fail "$file" "1 2 +3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" +test_write_succ "$file" "" "$orig_content" "disabling" +echo "$orig_content" > "$file" + +# Test target_ids file +# ==================== + +file="$DBGFS/target_ids" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input" +test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input" +test_content "$file" "$orig_content" "1 2" "non-integer was there" +test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input" +test_content "$file" "$orig_content" "" "wrong input written" +test_write_succ "$file" "" "$orig_content" "empty input" +test_content "$file" "$orig_content" "" "empty input written" +echo "$orig_content" > "$file" + +echo "PASS" diff --git a/usr/include/Makefile b/usr/include/Makefile index 703a255cddc6..a8ed689918ce 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -12,6 +12,9 @@ UAPI_CFLAGS := -std=c90 -Wall -Werror=implicit-function-declaration # It is here just because CONFIG_CC_CAN_LINK is tested with -m32 or -m64. UAPI_CFLAGS += $(filter -m32 -m64, $(KBUILD_CFLAGS)) +# USERCFLAGS might contain sysroot location for CC. +UAPI_CFLAGS += $(USERCFLAGS) + override c_flags = $(UAPI_CFLAGS) -Wp,-MMD,$(depfile) -I$(objtree)/usr/include # The following are excluded for now because they fail to build.