diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc index ac5e11af79a8..a87724ecbf65 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-uvc +++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc @@ -7,6 +7,7 @@ Description: UVC function directory streaming_maxburst 0..15 (ss only) streaming_maxpacket 1..1023 (fs), 1..3072 (hs/ss) streaming_interval 1..16 + function_name string [32] =================== ============================= What: /config/usb-gadget/gadget/functions/uvc.name/control diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index 2d1fc03d346e..ef19b9c13523 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -77,6 +77,17 @@ HOSTLDLIBS ---------- Additional libraries to link against when building host programs. +.. _userkbuildflags: + +USERCFLAGS +---------- +Additional options used for $(CC) when compiling userprogs. + +USERLDFLAGS +----------- +Additional options used for $(LD) when linking userprogs. userprogs are linked +with CC, so $(USERLDFLAGS) should include "-Wl," prefix as applicable. + KBUILD_KCONFIG -------------- Set the top-level Kconfig file to the value of this environment diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 0d5dd5413af0..1db205dc1d0f 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -852,6 +852,8 @@ The syntax is quite similar. The difference is to use "userprogs" instead of When linking bpfilter_umh, it will be passed the extra option -static. + From command line, :ref:`USERCFLAGS and USERLDFLAGS ` will also be used. + 5.4 When userspace programs are actually built ---------------------------------------------- diff --git a/Documentation/usb/gadget-testing.rst b/Documentation/usb/gadget-testing.rst index 9d6276f82774..be1b085a0901 100644 --- a/Documentation/usb/gadget-testing.rst +++ b/Documentation/usb/gadget-testing.rst @@ -774,6 +774,7 @@ The uvc function provides these attributes in its function directory: streaming_maxpacket maximum packet size this endpoint is capable of sending or receiving when this configuration is selected + function_name name of the interface =================== ================================================ There are also "control" and "streaming" subdirectories, each of which contain diff --git a/Makefile b/Makefile index 96ca1c37ffb1..b78323dacd61 100644 --- a/Makefile +++ b/Makefile @@ -429,11 +429,12 @@ HOSTCC = gcc HOSTCXX = g++ endif -export KBUILD_USERCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ - -O2 -fomit-frame-pointer -std=gnu89 -export KBUILD_USERLDFLAGS := +KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ + -O2 -fomit-frame-pointer -std=gnu89 +KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS) +KBUILD_USERLDFLAGS := $(USERLDFLAGS) -KBUILD_HOSTCFLAGS := $(KBUILD_USERCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS) +KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS) KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS) KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS) KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) @@ -526,6 +527,7 @@ export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AW export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE +export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 18b0e2ba121b..0feea3047152 100755 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -42,6 +42,7 @@ + @@ -84,6 +85,9 @@ + + + @@ -111,6 +115,14 @@ + + + + + + + + @@ -130,6 +142,8 @@ + + @@ -192,9 +206,13 @@ + + + + @@ -414,6 +432,7 @@ + @@ -487,6 +506,7 @@ + @@ -526,6 +546,7 @@ + @@ -593,6 +614,7 @@ + @@ -618,6 +640,7 @@ + @@ -651,6 +674,7 @@ + @@ -707,10 +731,12 @@ + + @@ -793,6 +819,7 @@ + @@ -870,6 +897,7 @@ + @@ -922,6 +950,7 @@ + @@ -1033,6 +1062,7 @@ + @@ -1122,6 +1152,7 @@ + @@ -1299,11 +1330,15 @@ + + + + @@ -1349,6 +1384,7 @@ + @@ -1782,8 +1818,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2325,6 +2391,7 @@ + @@ -2349,16 +2416,20 @@ + + + + @@ -2394,17 +2465,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2445,11 +2560,15 @@ + + + + @@ -2460,6 +2579,7 @@ + @@ -2544,10 +2664,12 @@ + + @@ -2575,9 +2697,14 @@ + + + + + @@ -2738,6 +2865,8 @@ + + @@ -2767,6 +2896,7 @@ + @@ -2791,6 +2921,7 @@ + @@ -2822,6 +2953,7 @@ + @@ -3044,6 +3176,7 @@ + @@ -3161,6 +3294,7 @@ + @@ -3239,6 +3373,9 @@ + + + @@ -3628,14 +3765,20 @@ + + + + + + @@ -3762,6 +3905,9 @@ + + + @@ -3957,6 +4103,9 @@ + + + @@ -4047,12 +4196,14 @@ + + @@ -4093,6 +4244,7 @@ + @@ -4473,6 +4625,7 @@ + @@ -4520,6 +4673,7 @@ + @@ -4529,6 +4683,7 @@ + @@ -4566,6 +4721,8 @@ + + @@ -4954,12 +5111,14 @@ + + @@ -5029,6 +5188,8 @@ + + @@ -5088,7 +5249,9 @@ + + @@ -5497,6 +5660,11 @@ + + + + + @@ -5650,7 +5818,10 @@ + + + @@ -5723,6 +5894,7 @@ + @@ -5757,12 +5929,15 @@ + + + @@ -5778,10 +5953,12 @@ + + @@ -5797,6 +5974,7 @@ + @@ -6010,6 +6188,7 @@ + @@ -6087,6 +6266,7 @@ + @@ -6136,6 +6316,7 @@ + @@ -6209,6 +6390,7 @@ + @@ -6234,6 +6416,7 @@ + @@ -6403,6 +6586,7 @@ + @@ -7672,7 +7856,7 @@ - + @@ -8870,6 +9054,14 @@ + + + + + + + + @@ -9649,6 +9841,7 @@ + @@ -10350,6 +10543,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -10877,6 +11105,10 @@ + + + + @@ -11426,6 +11658,14 @@ + + + + + + + + @@ -11469,6 +11709,7 @@ + @@ -12403,6 +12644,13 @@ + + + + + + + @@ -13938,6 +14186,7 @@ + @@ -14806,6 +15055,7 @@ + @@ -15731,6 +15981,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -17789,6 +18059,7 @@ + @@ -20236,6 +20507,9 @@ + + + @@ -21015,6 +21289,7 @@ + @@ -21324,6 +21599,7 @@ + @@ -23890,42 +24166,42 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -26990,12 +27266,12 @@ - + - + - + @@ -27062,6 +27338,13 @@ + + + + + + + @@ -27078,6 +27361,7 @@ + @@ -29604,6 +29888,12 @@ + + + + + + @@ -29919,7 +30209,83 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -30526,6 +30892,13 @@ + + + + + + + @@ -31681,6 +32054,13 @@ + + + + + + + @@ -31713,7 +32093,20 @@ - + + + + + + + + + + + + + + @@ -31737,6 +32130,14 @@ + + + + + + + + @@ -31934,6 +32335,7 @@ + @@ -32570,6 +32972,7 @@ + @@ -33452,63 +33855,68 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + @@ -33692,6 +34100,9 @@ + + + @@ -33752,6 +34163,7 @@ + @@ -34551,7 +34963,7 @@ - + @@ -36351,6 +36763,13 @@ + + + + + + + @@ -38165,6 +38584,7 @@ + @@ -39424,6 +39844,7 @@ + @@ -39461,7 +39882,7 @@ - + @@ -43992,6 +44413,9 @@ + + + @@ -44328,7 +44752,18 @@ - + + + + + + + + + + + + @@ -44629,6 +45064,7 @@ + @@ -45775,6 +46211,7 @@ + @@ -47066,6 +47503,12 @@ + + + + + + @@ -47917,6 +48360,7 @@ + @@ -47982,6 +48426,7 @@ + @@ -48015,6 +48460,11 @@ + + + + + @@ -50220,18 +50670,18 @@ - + - + - + - + - + @@ -50914,6 +51364,14 @@ + + + + + + + + @@ -52201,6 +52659,23 @@ + + + + + + + + + + + + + + + + + @@ -52459,6 +52934,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -55976,6 +56471,7 @@ + @@ -56808,390 +57304,390 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -57208,6 +57704,29 @@ + + + + + + + + + + + + + + + + + + + + + + + @@ -57414,6 +57933,7 @@ + @@ -60800,6 +61320,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -61465,6 +62017,7 @@ + @@ -62210,6 +62763,7 @@ + @@ -65380,7 +65934,47 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -66174,6 +66768,7 @@ + @@ -66225,6 +66820,7 @@ + @@ -66431,7 +67027,7 @@ - + @@ -66494,6 +67090,13 @@ + + + + + + + @@ -69340,6 +69943,7 @@ + @@ -70415,6 +71019,7 @@ + @@ -71146,7 +71751,14 @@ - + + + + + + + + @@ -71280,6 +71892,14 @@ + + + + + + + + @@ -72730,6 +73350,7 @@ + @@ -74070,6 +74691,7 @@ + @@ -74983,6 +75605,7 @@ + @@ -74991,15 +75614,15 @@ - + - + - + - + @@ -75679,6 +76302,7 @@ + @@ -80906,6 +81530,23 @@ + + + + + + + + + + + + + + + + + @@ -84074,6 +84715,12 @@ + + + + + + @@ -85147,6 +85794,29 @@ + + + + + + + + + + + + + + + + + + + + + + + @@ -85861,6 +86531,7 @@ + @@ -87041,6 +87712,7 @@ + @@ -90172,6 +90844,7 @@ + @@ -94038,6 +94711,17 @@ + + + + + + + + + + + @@ -96050,6 +96734,7 @@ + @@ -97775,46 +98460,46 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -98128,6 +98813,7 @@ + @@ -100252,6 +100938,7 @@ + @@ -101845,6 +102532,7 @@ + @@ -102306,6 +102994,7 @@ + @@ -102910,6 +103599,7 @@ + @@ -105852,6 +106542,17 @@ + + + + + + + + + + + @@ -107860,6 +108561,13 @@ + + + + + + + @@ -109377,6 +110085,7 @@ + @@ -109538,6 +110247,7 @@ + @@ -111975,6 +112685,7 @@ + @@ -112834,6 +113545,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -113550,11 +114281,11 @@ - - - - - + + + + + @@ -113734,6 +114465,10 @@ + + + + @@ -114034,6 +114769,23 @@ + + + + + + + + + + + + + + + + + @@ -114158,19 +114910,64 @@ - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + @@ -114283,10 +115080,32 @@ - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -114651,6 +115470,14 @@ + + + + + + + + @@ -114666,6 +115493,17 @@ + + + + + + + + + + + @@ -114960,8 +115798,8 @@ - - + + @@ -114976,8 +115814,8 @@ - - + + @@ -116030,6 +116868,12 @@ + + + + + + @@ -116492,6 +117336,12 @@ + + + + + + @@ -116720,6 +117570,12 @@ + + + + + + @@ -117147,6 +118003,13 @@ + + + + + + + @@ -117295,6 +118158,13 @@ + + + + + + + @@ -117480,6 +118350,7 @@ + @@ -117557,6 +118428,7 @@ + @@ -117606,11 +118478,12 @@ + - + @@ -117679,6 +118552,7 @@ + @@ -117704,6 +118578,7 @@ + @@ -117875,6 +118750,12 @@ + + + + + + @@ -118148,6 +119029,13 @@ + + + + + + + @@ -118169,6 +119057,13 @@ + + + + + + + @@ -118180,9 +119075,9 @@ - - - + + + @@ -118299,17 +119194,17 @@ - - - + + + - - - - - - + + + + + + @@ -118618,6 +119513,11 @@ + + + + + @@ -119011,6 +119911,10 @@ + + + + @@ -119284,6 +120188,14 @@ + + + + + + + + @@ -119579,9 +120491,9 @@ - - - + + + @@ -119933,6 +120845,10 @@ + + + + @@ -120394,10 +121310,10 @@ - - - - + + + + @@ -120426,6 +121342,11 @@ + + + + + @@ -121296,11 +122217,24 @@ + + + + + + + + + + + + + @@ -121320,6 +122254,11 @@ + + + + + @@ -121584,6 +122523,12 @@ + + + + + + @@ -123121,17 +124066,17 @@ - - - + + + - - - - - - + + + + + + @@ -123899,16 +124844,16 @@ - - + + - - + + - - + + @@ -123920,22 +124865,27 @@ - - + + - - + + - - + + + + + + + @@ -123945,6 +124895,151 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -126460,10 +127555,10 @@ - - - - + + + + @@ -126749,6 +127844,10 @@ + + + + @@ -126883,6 +127982,13 @@ + + + + + + + @@ -126901,6 +128007,12 @@ + + + + + + @@ -126933,6 +128045,13 @@ + + + + + + + @@ -126998,7 +128117,7 @@ - + @@ -127076,14 +128195,14 @@ - - - + + + - - - + + + @@ -127099,6 +128218,14 @@ + + + + + + + + @@ -127134,6 +128261,186 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -127162,6 +128469,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -127394,12 +128742,16 @@ + + + + - - + + @@ -127407,6 +128759,22 @@ + + + + + + + + + + + + + + + + @@ -127418,6 +128786,14 @@ + + + + + + + + @@ -127450,9 +128826,9 @@ - - - + + + @@ -127475,6 +128851,10 @@ + + + + @@ -127763,8 +129143,8 @@ - - + + @@ -127900,6 +129280,17 @@ + + + + + + + + + + + @@ -127920,6 +129311,12 @@ + + + + + + @@ -128047,6 +129444,17 @@ + + + + + + + + + + + @@ -128060,6 +129468,18 @@ + + + + + + + + + + + + @@ -128867,6 +130287,15 @@ + + + + + + + + + @@ -129012,6 +130441,11 @@ + + + + + @@ -129149,6 +130583,10 @@ + + + + @@ -129301,6 +130739,10 @@ + + + + @@ -129350,56 +130792,56 @@ - - - - - + + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + @@ -129442,42 +130884,42 @@ - - + + - - + + - - - - + + + + - - - - + + + + - - - - + + + + - - + + - - + + @@ -129485,100 +130927,100 @@ - + - - - - + + + + - - + + - - + + - - + + - - + + - - + + - - - - + + + + - - - + + + - - - + + + - - - - - - + + + + + + - - - - - - + + + + + + - - + + - - - - + + + + - - - + + + - - - - + + + + - - - - + + + + - - + + @@ -130498,6 +131940,10 @@ + + + + @@ -130984,8 +132430,8 @@ - - + + @@ -131064,6 +132510,10 @@ + + + + @@ -131438,6 +132888,23 @@ + + + + + + + + + + + + + + + + + @@ -132050,12 +133517,12 @@ - - + + - - + + @@ -132066,16 +133533,16 @@ - - + + - - + + - - + + @@ -133414,12 +134881,30 @@ + + + + + + + + + + + + + + + + + + @@ -133440,6 +134925,12 @@ + + + + + + @@ -133453,6 +134944,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -133628,8 +135139,8 @@ - - + + @@ -134111,6 +135622,21 @@ + + + + + + + + + + + + + + + @@ -135094,6 +136620,21 @@ + + + + + + + + + + + + + + + @@ -135559,6 +137100,13 @@ + + + + + + + @@ -135591,6 +137139,10 @@ + + + + @@ -135757,6 +137309,12 @@ + + + + + + @@ -136396,11 +137954,11 @@ - - - - - + + + + + @@ -136715,30 +138273,30 @@ - - + + - - - + + + - - - + + + - - - - - - + + + + + + - - + + @@ -136773,15 +138331,15 @@ - - - - - + + + + + - - + + @@ -136809,57 +138367,57 @@ - - + + - - + + - - + + - - - - - - - - - + + + + + + + + + - - - - + + + + - - + + - - - + + + - - - + + + - - - + + + - - + + @@ -137165,16 +138723,16 @@ - - + + - - + + - - + + @@ -137667,6 +139225,12 @@ + + + + + + @@ -137905,6 +139469,12 @@ + + + + + + @@ -137948,6 +139518,11 @@ + + + + + @@ -138148,11 +139723,22 @@ - + + + + + + + + + + + + - - + + @@ -138306,8 +139892,8 @@ - - + + @@ -138317,9 +139903,9 @@ - - - + + + @@ -138367,9 +139953,9 @@ - - - + + + @@ -138383,10 +139969,10 @@ - - - - + + + + @@ -138447,11 +140033,11 @@ - - - - - + + + + + @@ -138459,9 +140045,9 @@ - - - + + + @@ -139873,9 +141459,9 @@ - - - + + + @@ -140234,6 +141820,10 @@ + + + + @@ -140259,6 +141849,10 @@ + + + + @@ -140502,6 +142096,7 @@ + @@ -140604,6 +142199,14 @@ + + + + + + + + @@ -140919,11 +142522,23 @@ + + + + + + + + + + + + @@ -140937,11 +142552,11 @@ - - - - - + + + + + @@ -142012,12 +143627,12 @@ - - + + - - + + @@ -142028,15 +143643,15 @@ - - - - + + + + - - - + + + @@ -142150,16 +143765,16 @@ - - + + - - + + - - + + @@ -142442,10 +144057,10 @@ - - - - + + + + @@ -142553,9 +144168,9 @@ - - - + + + @@ -142656,9 +144271,9 @@ - - - + + + @@ -142735,8 +144350,8 @@ - - + + @@ -142937,9 +144552,9 @@ - - - + + + @@ -142950,6 +144565,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -143773,12 +145417,30 @@ + + + + + + + + + + + + + + + + + + @@ -144146,6 +145808,9 @@ + + + @@ -144333,6 +145998,10 @@ + + + + @@ -144355,11 +146024,19 @@ + + + + + + + + @@ -144425,6 +146102,14 @@ + + + + + + + + @@ -144444,6 +146129,12 @@ + + + + + + @@ -144486,14 +146177,14 @@ - - - + + + - - - + + + @@ -144523,6 +146214,12 @@ + + + + + + @@ -144798,19 +146495,19 @@ - - - - - - + + + + + + - - - - - + + + + + diff --git a/android/abi_gki_aarch64_galaxy b/android/abi_gki_aarch64_galaxy index f35385449132..d5358ae912e2 100644 --- a/android/abi_gki_aarch64_galaxy +++ b/android/abi_gki_aarch64_galaxy @@ -2027,10 +2027,12 @@ gpiochip_generic_free gpiochip_generic_request gpiochip_get_data + gpiochip_irqchip_add_key gpiochip_line_is_valid gpiochip_lock_as_irq gpiochip_populate_parent_fwspec_fourcell gpiochip_remove + gpiochip_set_nested_irqchip gpiochip_unlock_as_irq gpiod_cansleep gpiod_count @@ -2053,9 +2055,14 @@ gro_cells_destroy gro_cells_init gro_cells_receive + gs_alloc_req + gs_free_req gserial_alloc_line gserial_connect gserial_disconnect + gserial_free_line + gserial_resume + gserial_suspend guid_gen handle_bad_irq handle_edge_irq diff --git a/android/abi_gki_aarch64_oplus b/android/abi_gki_aarch64_oplus index c01dfbccd138..72015e048a60 100644 --- a/android/abi_gki_aarch64_oplus +++ b/android/abi_gki_aarch64_oplus @@ -5,8 +5,10 @@ add_device_randomness add_memory add_memory_subsection + add_swap_extent add_taint add_timer + add_to_page_cache_lru add_uevent_var add_wait_queue adjust_managed_page_count @@ -61,6 +63,7 @@ bdput bio_add_pc_page bio_alloc_bioset + bio_associate_blkg_from_css bio_endio bio_put bio_reset @@ -81,6 +84,7 @@ bitmap_zalloc blk_alloc_queue blk_cleanup_queue + blkdev_issue_zeroout blk_execute_rq blk_execute_rq_nowait blk_get_request @@ -88,6 +92,7 @@ blk_mq_rq_cpu blk_mq_sched_mark_restart_hctx blk_mq_start_request + blk_op_str blk_put_request blk_queue_flag_clear blk_queue_flag_set @@ -148,7 +153,9 @@ class_interface_unregister __class_register class_unregister + __cleancache_get_page cleanup_srcu_struct + clear_nlink clear_page __ClearPageMovable clk_bulk_disable @@ -222,6 +229,7 @@ config_group_init_type_name config_item_get config_item_put + congestion_wait console_drivers console_printk console_stop @@ -368,6 +376,7 @@ del_gendisk del_timer del_timer_sync + dentry_path_raw desc_to_gpio destroy_workqueue dev_alloc_name @@ -586,6 +595,9 @@ devres_release dev_set_name _dev_warn + d_find_alias + d_instantiate_new + d_invalidate disable_irq disable_irq_nosync disable_percpu_irq @@ -677,6 +689,39 @@ down_write d_path dput + dqget + dqput + dquot_acquire + dquot_alloc + dquot_alloc_inode + __dquot_alloc_space + dquot_claim_space_nodirty + dquot_commit + dquot_commit_info + dquot_destroy + dquot_disable + dquot_drop + dquot_file_open + dquot_free_inode + __dquot_free_space + dquot_get_dqblk + dquot_get_next_dqblk + dquot_get_next_id + dquot_get_state + dquot_initialize + dquot_initialize_needed + dquot_load_quota_inode + dquot_mark_dquot_dirty + dquot_quota_off + dquot_quota_on + dquot_quota_on_mount + dquot_release + dquot_resume + dquot_set_dqblk + dquot_set_dqinfo + __dquot_transfer + dquot_transfer + dquot_writeback_dquots drain_workqueue driver_create_file driver_find_device @@ -816,6 +861,7 @@ drm_writeback_connector_init drm_writeback_queue_job drm_writeback_signal_completion + d_tmpfile dump_stack __dynamic_pr_debug edac_device_add_device @@ -846,6 +892,7 @@ eventfd_ctx_remove_wait_queue eventfd_signal event_triggers_call + evict_inodes extcon_get_edev_by_phandle extcon_get_edev_name extcon_get_property @@ -857,9 +904,13 @@ __fdget fd_install fget + filemap_allow_speculation + filemap_check_errors + filemap_map_pages filp_close filp_open_block find_get_pid + find_inode_nowait find_last_bit find_next_bit find_next_zero_bit @@ -890,6 +941,8 @@ free_pages_exact free_percpu free_percpu_irq + freeze_bdev + freeze_super freezing_slow_path freq_qos_add_notifier freq_qos_add_request @@ -898,11 +951,60 @@ freq_qos_update_request freq_scale fs_bio_set + fscrypt_decrypt_bio + fscrypt_dio_supported + fscrypt_drop_inode + fscrypt_encrypt_pagecache_blocks + __fscrypt_encrypt_symlink + fscrypt_file_open + fscrypt_fname_alloc_buffer + fscrypt_fname_disk_to_usr + fscrypt_fname_free_buffer + fscrypt_fname_siphash + fscrypt_free_bounce_page + fscrypt_free_inode + fscrypt_get_symlink + fscrypt_has_permitted_context + __fscrypt_inode_uses_inline_crypto + fscrypt_ioctl_add_key + fscrypt_ioctl_get_key_status + fscrypt_ioctl_get_nonce + fscrypt_ioctl_get_policy + fscrypt_ioctl_get_policy_ex + fscrypt_ioctl_remove_key + fscrypt_ioctl_remove_key_all_users + fscrypt_ioctl_set_policy + fscrypt_match_name + fscrypt_mergeable_bio + __fscrypt_prepare_link + __fscrypt_prepare_lookup + fscrypt_prepare_new_inode + __fscrypt_prepare_readdir + __fscrypt_prepare_rename + __fscrypt_prepare_setattr + fscrypt_prepare_symlink + fscrypt_put_encryption_info + fscrypt_set_bio_crypt_ctx + fscrypt_set_context + fscrypt_set_test_dummy_encryption + fscrypt_setup_filename + fscrypt_show_test_dummy_encryption + fscrypt_symlink_getattr + fscrypt_zeroout_range fsg_common_create_luns fsg_common_set_cdev fsg_common_set_inquiry_string fsg_common_set_sysfs fsg_config_from_params + fsverity_cleanup_inode + fsverity_enqueue_verify_work + fsverity_file_open + fsverity_ioctl_enable + fsverity_ioctl_measure + fsverity_ioctl_read_metadata + fsverity_prepare_setattr + fsverity_verify_bio + fsverity_verify_page fsync_bdev fwnode_device_is_available fwnode_find_reference @@ -921,11 +1023,17 @@ fwnode_property_read_u32_array fwnode_property_read_u64_array gcd + generate_random_uuid generic_device_group + generic_fh_to_dentry + generic_fh_to_parent generic_file_llseek + generic_file_llseek_size + __generic_file_write_iter generic_handle_irq generic_iommu_put_resv_regions generic_mii_ioctl + generic_set_encrypted_ci_d_ops genlmsg_multicast_allns genlmsg_put genl_register_family @@ -1081,6 +1189,8 @@ idr_preload idr_remove idr_replace + iget_failed + iget_locked iio_alloc_pollfunc iio_buffer_init iio_buffer_put @@ -1098,11 +1208,14 @@ iio_read_channel_processed iio_read_channel_raw iio_trigger_notify_done + ilookup import_iovec inc_node_page_state inc_zone_page_state in_egroup_p + __inet6_lookup_established inet_csk_get_port + __inet_lookup_established init_iova_domain init_net init_pseudo @@ -1114,6 +1227,7 @@ init_uts_ns init_wait_entry __init_waitqueue_head + inode_nohighmem inode_owner_or_capable inode_permission input_alloc_absinfo @@ -1135,6 +1249,7 @@ input_unregister_device input_unregister_handle input_unregister_handler + insert_inode_locked interval_tree_insert interval_tree_iter_first interval_tree_iter_next @@ -1302,6 +1417,7 @@ kobj_sysfs_ops krealloc kset_create_and_add + kset_register ksize ksoftirqd kstat @@ -1388,6 +1504,7 @@ lzorle1x_1_compress match_hex match_int + match_strdup match_string match_token mbox_chan_received_data @@ -1450,7 +1567,10 @@ __memset_io memstart_addr memunmap + migrate_page_copy + migrate_page_move_mapping migrate_pages + migrate_page_states migrate_swap mii_check_media mii_ethtool_get_link_ksettings @@ -1675,9 +1795,17 @@ on_each_cpu_cond oops_in_progress overflowuid + page_cache_ra_unbounded + page_cache_sync_ra + pagecache_write_begin + pagecache_write_end page_endio + __page_file_index + __page_file_mapping + page_get_link page_mapping __page_pinner_migration_failed + page_symlink panic panic_notifier_list panic_timeout @@ -1732,6 +1860,11 @@ pci_write_config_dword pcpu_nr_pages PDE_DATA + percpu_counter_add_batch + percpu_counter_destroy + __percpu_counter_init + percpu_counter_set + __percpu_counter_sum __per_cpu_offset per_cpu_ptr_to_phys perf_aux_output_begin @@ -1848,6 +1981,9 @@ pm_wakeup_dev_event pm_wakeup_ws_event pm_wq + posix_acl_alloc + posix_acl_chmod + posix_acl_equiv_mode power_supply_changed power_supply_get_by_name power_supply_get_drvdata @@ -1905,10 +2041,12 @@ qcom_smem_state_update_bits queue_delayed_work_on queue_work_on + radix_tree_gang_lookup radix_tree_insert radix_tree_iter_delete radix_tree_lookup radix_tree_next_chunk + radix_tree_preload ___ratelimit rational_best_approximation raw_notifier_call_chain @@ -1957,6 +2095,7 @@ rdev_get_drvdata rdev_get_id rdev_get_regmap + read_cache_page_gfp reboot_mode refcount_dec_and_lock refcount_dec_not_one @@ -2206,6 +2345,7 @@ send_sig send_sig_info seq_buf_printf + seq_escape seq_hex_dump seq_lseek seq_open @@ -2228,11 +2368,13 @@ serial8250_rpm_put serial8250_suspend_port serial8250_unregister_port + set_cached_acl set_cpus_allowed_ptr set_normalized_timespec64 set_page_dirty_lock __SetPageMovable set_task_cpu + set_task_ioprio set_user_nice sg_alloc_table sg_alloc_table_from_pages @@ -2248,6 +2390,8 @@ __sg_page_iter_start sg_scsi_ioctl show_regs + shrink_dcache_sb + shrink_slab sigprocmask si_mem_available si_meminfo @@ -2432,12 +2576,14 @@ __sw_hweight32 __sw_hweight64 __sw_hweight8 + swp_swap_info sync_file_create sync_file_get_fence synchronize_irq synchronize_net synchronize_rcu synchronize_srcu + sync_inodes_sb syscon_node_to_regmap syscon_regmap_lookup_by_compatible syscon_regmap_lookup_by_phandle @@ -2483,7 +2629,10 @@ __task_pid_nr_ns __task_rq_lock task_rq_lock + tcp_hashinfo tcp_parse_options + thaw_bdev + thaw_super thermal_cooling_device_register thermal_cooling_device_unregister thermal_of_cooling_device_register @@ -2599,10 +2748,10 @@ __traceiter_android_vh_binder_reply __traceiter_android_vh_binder_restore_priority __traceiter_android_vh_binder_set_priority - __traceiter_android_vh_binder_trans - __traceiter_android_vh_binder_transaction_init __traceiter_android_vh_binder_thread_read __traceiter_android_vh_binder_thread_release + __traceiter_android_vh_binder_trans + __traceiter_android_vh_binder_transaction_init __traceiter_android_vh_binder_wait_for_work __traceiter_android_vh_binder_wakeup_ilocked __traceiter_android_vh_build_sched_domains @@ -2614,6 +2763,7 @@ __traceiter_android_vh_check_uninterruptible_tasks_dn __traceiter_android_vh_clear_mask_adjust __traceiter_android_vh_clear_reserved_fmt_fields + __traceiter_android_vh_cma_drain_all_pages_bypass __traceiter_android_vh_commit_creds __traceiter_android_vh_cpufreq_acct_update_power __traceiter_android_vh_cpufreq_fast_switch @@ -2623,6 +2773,7 @@ __traceiter_android_vh_cpu_idle_exit __traceiter_android_vh_cpu_up __traceiter_android_vh_do_send_sig_info + __traceiter_android_vh_drain_all_pages_bypass __traceiter_android_vh_em_cpu_energy __traceiter_android_vh_exclude_reserved_zone __traceiter_android_vh_exit_creds @@ -2656,6 +2807,8 @@ __traceiter_android_vh_mutex_wait_finish __traceiter_android_vh_mutex_wait_start __traceiter_android_vh_override_creds + __traceiter_android_vh_page_referenced_check_bypass + __traceiter_android_vh_pcplist_add_cma_pages_bypass __traceiter_android_vh_prepare_update_load_avg_se __traceiter_android_vh_printk_hotplug __traceiter_android_vh_process_killed @@ -2686,14 +2839,13 @@ __traceiter_android_vh_show_resume_epoch_val __traceiter_android_vh_show_stack_hash __traceiter_android_vh_show_suspend_epoch_val + __traceiter_android_vh_shrink_node_memcgs __traceiter_android_vh_sync_txn_recvd __traceiter_android_vh_syscall_prctl_finished __traceiter_android_vh_timer_calc_index __traceiter_android_vh_tune_inactive_ratio __traceiter_android_vh_tune_scan_type __traceiter_android_vh_tune_swappiness - __traceiter_android_vh_page_referenced_check_bypass - __traceiter_android_vh_drain_all_pages_bypass __traceiter_android_vh_ufs_compl_command __traceiter_android_vh_ufs_send_command __traceiter_android_vh_ufs_send_tm_command @@ -2818,6 +2970,7 @@ __tracepoint_android_vh_check_uninterruptible_tasks_dn __tracepoint_android_vh_clear_mask_adjust __tracepoint_android_vh_clear_reserved_fmt_fields + __tracepoint_android_vh_cma_drain_all_pages_bypass __tracepoint_android_vh_commit_creds __tracepoint_android_vh_cpufreq_acct_update_power __tracepoint_android_vh_cpufreq_fast_switch @@ -2827,6 +2980,7 @@ __tracepoint_android_vh_cpu_idle_exit __tracepoint_android_vh_cpu_up __tracepoint_android_vh_do_send_sig_info + __tracepoint_android_vh_drain_all_pages_bypass __tracepoint_android_vh_em_cpu_energy __tracepoint_android_vh_exclude_reserved_zone __tracepoint_android_vh_exit_creds @@ -2860,6 +3014,8 @@ __tracepoint_android_vh_mutex_wait_finish __tracepoint_android_vh_mutex_wait_start __tracepoint_android_vh_override_creds + __tracepoint_android_vh_page_referenced_check_bypass + __tracepoint_android_vh_pcplist_add_cma_pages_bypass __tracepoint_android_vh_prepare_update_load_avg_se __tracepoint_android_vh_printk_hotplug __tracepoint_android_vh_process_killed @@ -2890,14 +3046,13 @@ __tracepoint_android_vh_show_resume_epoch_val __tracepoint_android_vh_show_stack_hash __tracepoint_android_vh_show_suspend_epoch_val + __tracepoint_android_vh_shrink_node_memcgs __tracepoint_android_vh_sync_txn_recvd __tracepoint_android_vh_syscall_prctl_finished __tracepoint_android_vh_timer_calc_index __tracepoint_android_vh_tune_inactive_ratio __tracepoint_android_vh_tune_scan_type __tracepoint_android_vh_tune_swappiness - __tracepoint_android_vh_page_referenced_check_bypass - __tracepoint_android_vh_drain_all_pages_bypass __tracepoint_android_vh_ufs_compl_command __tracepoint_android_vh_ufs_send_command __tracepoint_android_vh_ufs_send_tm_command @@ -2940,6 +3095,8 @@ trace_seq_printf trace_seq_putc tracing_off + truncate_inode_pages_range + truncate_pagecache_range try_module_get try_to_free_mem_cgroup_pages try_wait_for_completion @@ -3162,6 +3319,11 @@ usb_unregister_notify __usecs_to_jiffies usleep_range + utf8_casefold + utf8_load + utf8s_to_utf16s + utf8_strncasecmp_folded + utf8_unload uuid_null v4l2_async_notifier_add_fwnode_subdev v4l2_async_notifier_add_subdev @@ -3281,6 +3443,9 @@ verify_pkcs7_signature vfree vfs_fsync + vfs_ioc_fssetxattr_check + vfs_ioc_setflags_prepare + vfs_setpos video_devdata video_device_alloc video_device_release @@ -3310,6 +3475,7 @@ vm_map_pages vm_map_ram vm_node_stat + vm_unmap_aliases vm_unmapped_area vm_unmap_ram vm_zone_stat @@ -3324,10 +3490,13 @@ wait_for_completion wait_for_completion_interruptible wait_for_completion_interruptible_timeout + wait_for_completion_io wait_for_completion_io_timeout wait_for_completion_killable wait_for_completion_killable_timeout wait_for_completion_timeout + wait_for_stable_page + wait_on_page_writeback wait_woken __wake_up wake_up_if_idle @@ -3339,9 +3508,11 @@ wakeup_source_register wakeup_source_remove wakeup_source_unregister + walk_page_range __warn_printk watchdog_init_timeout watchdog_set_restart_priority + wbc_account_cgroup_owner wireless_nlevent_flush woken_wake_function work_busy @@ -3350,10 +3521,12 @@ ww_mutex_lock ww_mutex_unlock __xa_alloc + __xa_clear_mark xa_destroy xa_erase xa_find xa_find_after + xa_get_mark xa_load xa_store xhci_add_endpoint diff --git a/android/abi_gki_aarch64_qcom b/android/abi_gki_aarch64_qcom index bcfc4c9ef782..fe1688823b24 100644 --- a/android/abi_gki_aarch64_qcom +++ b/android/abi_gki_aarch64_qcom @@ -2603,12 +2603,14 @@ __traceiter_ipi_entry __traceiter_ipi_raise __traceiter_irq_handler_entry + __traceiter_map __traceiter_rwmmio_post_read __traceiter_rwmmio_read __traceiter_rwmmio_write __traceiter_sched_overutilized_tp __traceiter_sched_switch __traceiter_suspend_resume + __traceiter_unmap __tracepoint_android_rvh_account_irq __tracepoint_android_rvh_after_dequeue_task __tracepoint_android_rvh_after_enqueue_task @@ -2727,6 +2729,7 @@ __tracepoint_ipi_entry __tracepoint_ipi_raise __tracepoint_irq_handler_entry + __tracepoint_map tracepoint_probe_register tracepoint_probe_register_prio tracepoint_probe_unregister @@ -2736,6 +2739,7 @@ __tracepoint_sched_overutilized_tp __tracepoint_sched_switch __tracepoint_suspend_resume + __tracepoint_unmap trace_print_array_seq trace_print_flags_seq trace_print_hex_seq diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 1ea0c5939bb7..da8ab23b9ce5 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -118,6 +118,9 @@ CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=16 CONFIG_READ_ONLY_THP_FOR_FS=y +CONFIG_DAMON=y +CONFIG_DAMON_PADDR=y +CONFIG_DAMON_RECLAIM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -651,6 +654,7 @@ CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_XCBC=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y CONFIG_CRYPTO_ANSI_CPRNG=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index c9ce1777eb58..14ea69c0d417 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -94,6 +94,9 @@ CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=16 CONFIG_READ_ONLY_THP_FOR_FS=y +CONFIG_DAMON=y +CONFIG_DAMON_PADDR=y +CONFIG_DAMON_RECLAIM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -586,6 +589,7 @@ CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_SHA256_SSSE3=y CONFIG_CRYPTO_AES_NI_INTEL=y +CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y CONFIG_CRYPTO_ANSI_CPRNG=y diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h index 97bbb4a9083a..05b48b33baf0 100644 --- a/arch/x86/include/asm/kfence.h +++ b/arch/x86/include/asm/kfence.h @@ -56,8 +56,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) else set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - /* Flush this CPU's TLB. */ + /* + * Flush this CPU's TLB, assuming whoever did the allocation/free is + * likely to continue running on this CPU. + */ + preempt_disable(); flush_tlb_one_kernel(addr); + preempt_enable(); return true; } diff --git a/build.config.common b/build.config.common index b321ae20ecea..9ec1c637a79f 100644 --- a/build.config.common +++ b/build.config.common @@ -7,6 +7,7 @@ DTC=dtc CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r416183b/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 +KCFLAGS="${KCFLAGS} -D__ANDROID_COMMON_KERNEL__" EXTRA_CMDS='' STOP_SHIP_TRACEPRINTK=1 IN_KERNEL_MODULES=1 diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index c9636c3c6dbd..491751ab0dbf 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -74,14 +74,32 @@ config ANDROID_VENDOR_HOOKS Allow vendor modules to attach to tracepoint "hooks" defined via DECLARE_HOOK or DECLARE_RESTRICTED_HOOK. -config ANDROID_STRUCT_PADDING - bool "Android Struct Padding" +config ANDROID_KABI_RESERVE + bool "Android KABI reserve padding" + default y + help + This option enables the padding that the Android GKI kernel adds + to many different kernel structures to support an in-kernel stable ABI + over the lifespan of support for the kernel. + + Only disable this option if you have a system that needs the Android + kernel drivers, but is NOT an Android GKI kernel image. If disabled + it has the possibility to make the kernel static and runtime image + slightly smaller but will NOT be supported by the Google Android + kernel team. + + If even slightly unsure, say Y. + +config ANDROID_VENDOR_OEM_DATA + bool "Android vendor and OEM data padding" default y help This option enables the padding that the Android GKI kernel adds to many different kernel structures to support an in-kernel stable ABI over the lifespan of support for the kernel as well as OEM additional - fields that are needed by some of the Android kernel tracepoints. + fields that are needed by some of the Android kernel tracepoints. The + macros enabled by this option are used to enable padding in vendor modules + used for the above specified purposes. Only disable this option if you have a system that needs the Android kernel drivers, but is NOT an Android GKI kernel image and you do NOT diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 3e26777a54c2..eb50db428e8c 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -8,6 +8,7 @@ #define CREATE_TRACE_POINTS #include +#include #include #include #include @@ -112,6 +113,11 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_read_wait_start); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_read_wait_finish); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_write_wait_start); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_write_wait_finish); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_set_owner); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_set_reader_owned); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_mark_wake_readers); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_up_read_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_up_write_end); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sched_show_task); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_shmem_alloc_page); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cpu_idle_enter); @@ -235,6 +241,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sync_txn_recvd); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_build_sched_domains); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alter_mutex_list_add); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_unlock_slowpath); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_unlock_slowpath_end); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rwsem_wake_finish); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_undefinstr); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_ptrauth_fault); @@ -293,6 +300,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_tune_swappiness); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_shrink_slab_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_page_referenced_check_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_drain_all_pages_bypass); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cma_drain_all_pages_bypass); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_pcplist_add_cma_pages_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_psi_event); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_psi_group); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_cpuset_fork); @@ -393,3 +402,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_binder_thread_release); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_binder_has_work_ilocked); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_binder_read_done); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_handle_tlb_conf); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_shrink_node_memcgs); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ra_tuning_max_page); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_tune_memcg_scan_type); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a6ff146c3828..26d25188275a 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -833,7 +833,7 @@ static int virtblk_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "virtio_blk: invalid block size: 0x%x\n", blk_size); - goto out_free_tags; + goto out_cleanup_disk; } blk_queue_logical_block_size(q, blk_size); @@ -904,6 +904,8 @@ static int virtblk_probe(struct virtio_device *vdev) device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); return 0; +out_cleanup_disk: + blk_cleanup_queue(vblk->disk->queue); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_put_disk: diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1d320eec94aa..ed6902eb8d91 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -268,11 +268,13 @@ int iommu_probe_device(struct device *dev) * support default domains, so the return value is not yet * checked. */ + mutex_lock(&group->mutex); iommu_alloc_default_domain(group, dev); if (group->default_domain) { ret = __iommu_attach_device(group->default_domain, dev); if (ret) { + mutex_unlock(&group->mutex); iommu_group_put(group); goto err_release; } @@ -280,6 +282,7 @@ int iommu_probe_device(struct device *dev) iommu_create_device_direct_mappings(group, dev); + mutex_unlock(&group->mutex); iommu_group_put(group); if (ops->probe_finalize) diff --git a/drivers/md/dm-bow.c b/drivers/md/dm-bow.c index e666ca01d220..2e95d6149692 100644 --- a/drivers/md/dm-bow.c +++ b/drivers/md/dm-bow.c @@ -236,7 +236,6 @@ static void set_type(struct bow_context *bc, struct bow_range **br, int type) (*br)->type = type; - mutex_lock(&bc->ranges_lock); if (next->type == type) { if (type == TRIMMED) list_del(&next->trimmed_list); @@ -250,7 +249,6 @@ static void set_type(struct bow_context *bc, struct bow_range **br, int type) rb_erase(&(*br)->node, &bc->ranges); kfree(*br); } - mutex_unlock(&bc->ranges_lock); *br = NULL; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ec64f80ea408..2cf6beb280fb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -608,18 +608,17 @@ static void start_io_acct(struct dm_io *io) false, 0, &io->stats_aux); } -static void end_io_acct(struct dm_io *io) +static void end_io_acct(struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) { - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; - unsigned long duration = jiffies - io->start_time; + unsigned long duration = jiffies - start_time; - bio_end_io_acct(bio, io->start_time); + bio_end_io_acct(bio, start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), - true, duration, &io->stats_aux); + true, duration, stats_aux); /* nudge anyone waiting on suspend queue */ if (unlikely(wq_has_sleeper(&md->wait))) @@ -904,6 +903,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error) blk_status_t io_error; struct bio *bio; struct mapped_device *md = io->md; + unsigned long start_time = 0; + struct dm_stats_aux stats_aux; /* Push-back supersedes any I/O errors */ if (unlikely(error)) { @@ -930,8 +931,10 @@ static void dec_pending(struct dm_io *io, blk_status_t error) io_error = io->status; bio = io->orig_bio; - end_io_acct(io); + start_time = io->start_time; + stats_aux = io->stats_aux; free_io(md, io); + end_io_acct(md, bio, start_time, &stats_aux); if (io_error == BLK_STS_DM_REQUEUE) return; diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index f9612eeb714e..8dd8b9138789 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc, /* Unique indices for remoteproc devices */ static DEFINE_IDA(rproc_dev_index); +static struct workqueue_struct *rproc_recovery_wq; static const char * const rproc_crash_names[] = { [RPROC_MMUFAULT] = "mmufault", @@ -2488,8 +2489,11 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type) dev_err(&rproc->dev, "crash detected in %s: type %s\n", rproc->name, rproc_crash_to_string(type)); + if (rproc_recovery_wq) + queue_work(rproc_recovery_wq, &rproc->crash_handler); + else /* Have a worker handle the error; ensure system is not suspended */ - queue_work(system_freezable_wq, &rproc->crash_handler); + queue_work(system_freezable_wq, &rproc->crash_handler); } EXPORT_SYMBOL(rproc_report_crash); @@ -2534,6 +2538,11 @@ static void __exit rproc_exit_panic(void) static int __init remoteproc_init(void) { + rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq", + WQ_UNBOUND | WQ_FREEZABLE, 0); + if (!rproc_recovery_wq) + pr_err("remoteproc: creation of rproc_recovery_wq failed\n"); + rproc_init_sysfs(); rproc_init_debugfs(); rproc_init_cdev(); @@ -2550,6 +2559,8 @@ static void __exit remoteproc_exit(void) rproc_exit_panic(); rproc_exit_debugfs(); rproc_exit_sysfs(); + if (rproc_recovery_wq) + destroy_workqueue(rproc_recovery_wq); } module_exit(remoteproc_exit); diff --git a/drivers/staging/android/ion/ion_buffer.c b/drivers/staging/android/ion/ion_buffer.c index 9baca1a472b6..9c7a353a4220 100644 --- a/drivers/staging/android/ion/ion_buffer.c +++ b/drivers/staging/android/ion/ion_buffer.c @@ -249,6 +249,9 @@ void *ion_buffer_kmap_get(struct ion_buffer *buffer) void *vaddr; if (buffer->kmap_cnt) { + if (buffer->kmap_cnt == INT_MAX) + return ERR_PTR(-EOVERFLOW); + buffer->kmap_cnt++; return buffer->vaddr; } diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index cb514a3bd449..ca0966badf42 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -114,8 +114,6 @@ void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) dwc->current_dr_role = mode; } -static int dwc3_core_soft_reset(struct dwc3 *dwc); - static void __dwc3_set_mode(struct work_struct *work) { struct dwc3 *dwc = work_to_dwc(work); @@ -260,7 +258,7 @@ u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type) * dwc3_core_soft_reset - Issues core soft reset and PHY reset * @dwc: pointer to our context structure */ -static int dwc3_core_soft_reset(struct dwc3 *dwc) +int dwc3_core_soft_reset(struct dwc3 *dwc) { u32 reg; int retries = 1000; diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 44f1364d1682..28df4e8b9359 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -729,6 +729,7 @@ struct dwc3_ep { #define DWC3_EP_FIRST_STREAM_PRIMED BIT(10) #define DWC3_EP_PENDING_CLEAR_STALL BIT(11) #define DWC3_EP_TXFIFO_RESIZED BIT(12) +#define DWC3_EP_DELAY_STOP BIT(13) /* This last one is specific to EP0 */ #define DWC3_EP0_DIR_IN BIT(31) @@ -1539,6 +1540,8 @@ bool dwc3_has_imod(struct dwc3 *dwc); int dwc3_event_buffers_setup(struct dwc3 *dwc); void dwc3_event_buffers_cleanup(struct dwc3 *dwc); +int dwc3_core_soft_reset(struct dwc3 *dwc); + #if IS_ENABLED(CONFIG_USB_DWC3_HOST) || IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) int dwc3_host_init(struct dwc3 *dwc); void dwc3_host_exit(struct dwc3 *dwc); diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 658739410992..1064be5518f6 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -271,6 +271,7 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) { struct dwc3_ep *dep; int ret; + int i; complete(&dwc->ep0_in_setup); @@ -279,6 +280,19 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) DWC3_TRBCTL_CONTROL_SETUP, false); ret = dwc3_ep0_start_trans(dep); WARN_ON(ret < 0); + for (i = 2; i < DWC3_ENDPOINTS_NUM; i++) { + struct dwc3_ep *dwc3_ep; + + dwc3_ep = dwc->eps[i]; + if (!dwc3_ep) + continue; + + if (!(dwc3_ep->flags & DWC3_EP_DELAY_STOP)) + continue; + + dwc3_ep->flags &= ~DWC3_EP_DELAY_STOP; + dwc3_stop_active_transfer(dwc3_ep, true, true); + } } static struct dwc3_ep *dwc3_wIndex_to_dep(struct dwc3 *dwc, __le16 wIndex_le) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 2c87cee44020..62a578831acc 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -769,7 +769,8 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) num_fifos = 3; if (dep->endpoint.maxburst > 6 && - usb_endpoint_xfer_bulk(dep->endpoint.desc) && DWC3_IP_IS(DWC31)) + (usb_endpoint_xfer_bulk(dep->endpoint.desc) || + usb_endpoint_xfer_isoc(dep->endpoint.desc)) && DWC3_IP_IS(DWC31)) num_fifos = dwc->tx_fifo_resize_max_num; /* FIFO size for a single buffer */ @@ -1805,7 +1806,13 @@ static int __dwc3_gadget_start_isoc(struct dwc3_ep *dep) } for (i = 0; i < DWC3_ISOC_MAX_RETRIES; i++) { - dep->frame_number = DWC3_ALIGN_FRAME(dep, i + 1); + int future_interval = i + 1; + + /* Give the controller at least 500us to schedule transfers */ + if (desc->bInterval < 3) + future_interval += 3 - desc->bInterval; + + dep->frame_number = DWC3_ALIGN_FRAME(dep, future_interval); ret = __dwc3_gadget_kick_transfer(dep); if (ret != -EAGAIN) @@ -1874,6 +1881,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) */ if ((dep->flags & DWC3_EP_END_TRANSFER_PENDING) || (dep->flags & DWC3_EP_WEDGE) || + (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_STALL)) { dep->flags |= DWC3_EP_DELAY_START; return 0; @@ -1953,10 +1961,10 @@ static void dwc3_gadget_ep_skip_trbs(struct dwc3_ep *dep, struct dwc3_request *r static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) { struct dwc3_request *req; - struct dwc3_request *tmp; struct dwc3 *dwc = dep->dwc; - list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) { + while (!list_empty(&dep->cancelled_list)) { + req = next_request(&dep->cancelled_list); dwc3_gadget_ep_skip_trbs(dep, req); switch (req->status) { case DWC3_REQUEST_STATUS_DISCONNECTED: @@ -1973,6 +1981,12 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) dwc3_gadget_giveback(dep, req, -ECONNRESET); break; } + /* + * The endpoint is disabled, let the dwc3_remove_requests() + * handle the cleanup. + */ + if (!dep->endpoint.desc) + break; } } @@ -2008,6 +2022,16 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, if (r == req) { struct dwc3_request *t; + /* + * If a Setup packet is received but yet to DMA out, the controller will + * not process the End Transfer command of any endpoint. Polling of its + * DEPCMD.CmdAct may block setting up TRB for Setup packet, causing a + * timeout. Delay issuing the End Transfer command until the Setup TRB is + * prepared. + */ + if (dwc->ep0state != EP0_SETUP_PHASE && !dwc->delayed_status) + dep->flags |= DWC3_EP_DELAY_STOP; + /* wait until it is processed */ dwc3_stop_active_transfer(dep, true, true); @@ -2089,7 +2113,8 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) if (!list_empty(&dep->started_list)) dep->flags |= DWC3_EP_DELAY_START; - if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) { + if (dep->flags & DWC3_EP_END_TRANSFER_PENDING || + (dep->flags & DWC3_EP_DELAY_STOP)) { dep->flags |= DWC3_EP_PENDING_CLEAR_STALL; return 0; } @@ -2520,6 +2545,17 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) dwc->ev_buf->length; } } else { + /* + * In the Synopsys DWC_usb31 1.90a programming guide section + * 4.1.9, it specifies that for a reconnect after a + * device-initiated disconnect requires a core soft reset + * (DCTL.CSftRst) before enabling the run/stop bit. + */ + spin_unlock_irqrestore(&dwc->lock, flags); + dwc3_core_soft_reset(dwc); + spin_lock_irqsave(&dwc->lock, flags); + + dwc3_event_buffers_setup(dwc); __dwc3_gadget_start(dwc); } @@ -3241,15 +3277,21 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, const struct dwc3_event_depevt *event, int status) { struct dwc3_request *req; - struct dwc3_request *tmp; - list_for_each_entry_safe(req, tmp, &dep->started_list, list) { + while (!list_empty(&dep->started_list)) { int ret; + req = next_request(&dep->started_list); ret = dwc3_gadget_ep_cleanup_completed_request(dep, event, req, status); if (ret) break; + /* + * The endpoint is disabled, let the dwc3_remove_requests() + * handle the cleanup. + */ + if (!dep->endpoint.desc) + break; } } @@ -3585,6 +3627,7 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, int ret; if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || + (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) return; diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index 77df4b6d6c13..f763380e672e 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -116,6 +116,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request, gfp_t gfp_flags); int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol); void dwc3_ep0_send_delayed_status(struct dwc3 *dwc); +void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt); /** * dwc3_gadget_ep_get_transfer_index - Gets transfer index from HW diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 553382ce3837..9315313108c9 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -159,6 +159,8 @@ int config_ep_by_speed_and_alt(struct usb_gadget *g, int want_comp_desc = 0; struct usb_descriptor_header **d_spd; /* cursor for speed desc */ + struct usb_composite_dev *cdev; + bool incomplete_desc = false; if (!g || !f || !_ep) return -EIO; @@ -167,28 +169,43 @@ int config_ep_by_speed_and_alt(struct usb_gadget *g, switch (g->speed) { case USB_SPEED_SUPER_PLUS: if (gadget_is_superspeed_plus(g)) { - speed_desc = f->ssp_descriptors; - want_comp_desc = 1; - break; + if (f->ssp_descriptors) { + speed_desc = f->ssp_descriptors; + want_comp_desc = 1; + break; + } + incomplete_desc = true; } fallthrough; case USB_SPEED_SUPER: if (gadget_is_superspeed(g)) { - speed_desc = f->ss_descriptors; - want_comp_desc = 1; - break; + if (f->ss_descriptors) { + speed_desc = f->ss_descriptors; + want_comp_desc = 1; + break; + } + incomplete_desc = true; } fallthrough; case USB_SPEED_HIGH: if (gadget_is_dualspeed(g)) { - speed_desc = f->hs_descriptors; - break; + if (f->hs_descriptors) { + speed_desc = f->hs_descriptors; + break; + } + incomplete_desc = true; } fallthrough; default: speed_desc = f->fs_descriptors; } + cdev = get_gadget_data(g); + if (incomplete_desc) + WARNING(cdev, + "%s doesn't hold the descriptors for current speed\n", + f->name); + /* find correct alternate setting descriptor */ for_each_desc(speed_desc, d_spd, USB_DT_INTERFACE) { int_desc = (struct usb_interface_descriptor *)*d_spd; @@ -244,12 +261,8 @@ ep_found: _ep->maxburst = comp_desc->bMaxBurst + 1; break; default: - if (comp_desc->bMaxBurst != 0) { - struct usb_composite_dev *cdev; - - cdev = get_gadget_data(g); + if (comp_desc->bMaxBurst != 0) ERROR(cdev, "ep0 bMaxBurst must be 0\n"); - } _ep->maxburst = 1; break; } diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 2d7b7cd6d3a6..21dd268175c6 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1530,6 +1530,8 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) usb_ep_autoconfig_reset(cdev->gadget); spin_lock_irqsave(&gi->spinlock, flags); cdev->gadget = NULL; + cdev->deactivations = 0; + gadget->deactivated = false; set_gadget_data(gadget, NULL); spin_unlock_irqrestore(&gi->spinlock, flags); } diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index 66a603f3327d..3510f6d39f0c 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -931,6 +931,7 @@ static const struct file_operations acc_fops = { .read = acc_read, .write = acc_write, .unlocked_ioctl = acc_ioctl, + .compat_ioctl = acc_ioctl, .open = acc_open, .release = acc_release, }; diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index f48a00e49794..875df9ef8887 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -44,7 +44,7 @@ MODULE_PARM_DESC(trace, "Trace level bitmask"); #define UVC_STRING_STREAMING_IDX 1 static struct usb_string uvc_en_us_strings[] = { - [UVC_STRING_CONTROL_IDX].s = "UVC Camera", + /* [UVC_STRING_CONTROL_IDX].s = DYNAMIC, */ [UVC_STRING_STREAMING_IDX].s = "Video Streaming", { } }; @@ -674,6 +674,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) uvc_hs_streaming_ep.bEndpointAddress = uvc->video.ep->address; uvc_ss_streaming_ep.bEndpointAddress = uvc->video.ep->address; + uvc_en_us_strings[UVC_STRING_CONTROL_IDX].s = opts->function_name; us = usb_gstrings_attach(cdev, uvc_function_strings, ARRAY_SIZE(uvc_en_us_strings)); if (IS_ERR(us)) { @@ -864,6 +865,7 @@ static struct usb_function_instance *uvc_alloc_inst(void) opts->streaming_interval = 1; opts->streaming_maxpacket = 1024; + snprintf(opts->function_name, sizeof(opts->function_name), "UVC Camera"); ret = uvcg_attach_configfs(opts); if (ret < 0) { @@ -887,13 +889,37 @@ static void uvc_unbind(struct usb_configuration *c, struct usb_function *f) { struct usb_composite_dev *cdev = c->cdev; struct uvc_device *uvc = to_uvc(f); + long wait_ret = 1; uvcg_info(f, "%s\n", __func__); + /* If we know we're connected via v4l2, then there should be a cleanup + * of the device from userspace either via UVC_EVENT_DISCONNECT or + * though the video device removal uevent. Allow some time for the + * application to close out before things get deleted. + */ + if (uvc->func_connected) { + uvcg_dbg(f, "waiting for clean disconnect\n"); + wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, + uvc->func_connected == false, msecs_to_jiffies(500)); + uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret); + } + device_remove_file(&uvc->vdev.dev, &dev_attr_function_name); video_unregister_device(&uvc->vdev); v4l2_device_unregister(&uvc->v4l2_dev); + if (uvc->func_connected) { + /* Wait for the release to occur to ensure there are no longer any + * pending operations that may cause panics when resources are cleaned + * up. + */ + uvcg_warn(f, "%s no clean disconnect, wait for release\n", __func__); + wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, + uvc->func_connected == false, msecs_to_jiffies(1000)); + uvcg_dbg(f, "done waiting for release with ret: %ld\n", wait_ret); + } + usb_ep_free_request(cdev->gadget->ep0, uvc->control_req); kfree(uvc->control_buf); @@ -912,6 +938,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi) mutex_init(&uvc->video.mutex); uvc->state = UVC_STATE_DISCONNECTED; + init_waitqueue_head(&uvc->func_connected_queue); opts = fi_to_f_uvc_opts(fi); mutex_lock(&opts->lock); diff --git a/drivers/usb/gadget/function/u_uvc.h b/drivers/usb/gadget/function/u_uvc.h index 9a01a7d4f17f..24b8681b0d6f 100644 --- a/drivers/usb/gadget/function/u_uvc.h +++ b/drivers/usb/gadget/function/u_uvc.h @@ -27,6 +27,7 @@ struct f_uvc_opts { unsigned int control_interface; unsigned int streaming_interface; + char function_name[32]; /* * Control descriptors array pointers for full-/high-speed and diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h index 893aaa70f81a..59c5a67b5ff3 100644 --- a/drivers/usb/gadget/function/uvc.h +++ b/drivers/usb/gadget/function/uvc.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -65,13 +66,17 @@ extern unsigned int uvc_gadget_trace_param; * Driver specific constants */ -#define UVC_NUM_REQUESTS 4 #define UVC_MAX_REQUEST_SIZE 64 #define UVC_MAX_EVENTS 4 /* ------------------------------------------------------------------------ * Structures */ +struct uvc_request { + struct usb_request *req; + u8 *req_buffer; + struct uvc_video *video; +}; struct uvc_video { struct uvc_device *uvc; @@ -87,10 +92,11 @@ struct uvc_video { unsigned int imagesize; struct mutex mutex; /* protects frame parameters */ + unsigned int uvc_num_requests; + /* Requests */ unsigned int req_size; - struct usb_request *req[UVC_NUM_REQUESTS]; - __u8 *req_buffer[UVC_NUM_REQUESTS]; + struct uvc_request *ureq; struct list_head req_free; spinlock_t req_lock; @@ -118,6 +124,7 @@ struct uvc_device { struct usb_function func; struct uvc_video video; bool func_connected; + wait_queue_head_t func_connected_queue; /* Descriptors */ struct { diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 00fb58e50a15..9fafa8001d33 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2430,10 +2430,51 @@ UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, 15); #undef UVCG_OPTS_ATTR +#define UVCG_OPTS_STRING_ATTR(cname, aname) \ +static ssize_t f_uvc_opts_string_##cname##_show(struct config_item *item,\ + char *page) \ +{ \ + struct f_uvc_opts *opts = to_f_uvc_opts(item); \ + int result; \ + \ + mutex_lock(&opts->lock); \ + result = snprintf(page, sizeof(opts->aname), "%s", opts->aname);\ + mutex_unlock(&opts->lock); \ + \ + return result; \ +} \ + \ +static ssize_t f_uvc_opts_string_##cname##_store(struct config_item *item,\ + const char *page, size_t len) \ +{ \ + struct f_uvc_opts *opts = to_f_uvc_opts(item); \ + int ret = 0; \ + \ + mutex_lock(&opts->lock); \ + if (opts->refcnt) { \ + ret = -EBUSY; \ + goto end; \ + } \ + \ + ret = snprintf(opts->aname, min(sizeof(opts->aname), len), \ + "%s", page); \ + \ +end: \ + mutex_unlock(&opts->lock); \ + return ret; \ +} \ + \ +UVC_ATTR(f_uvc_opts_string_, cname, aname) + +UVCG_OPTS_STRING_ATTR(function_name, function_name); + +#undef UVCG_OPTS_STRING_ATTR + static struct configfs_attribute *uvc_attrs[] = { &f_uvc_opts_attr_streaming_interval, &f_uvc_opts_attr_streaming_maxpacket, &f_uvc_opts_attr_streaming_maxburst, + &f_uvc_opts_string_attr_function_name, NULL, }; diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c index 61e2c94cc0b0..51c83eea8318 100644 --- a/drivers/usb/gadget/function/uvc_queue.c +++ b/drivers/usb/gadget/function/uvc_queue.c @@ -43,6 +43,7 @@ static int uvc_queue_setup(struct vb2_queue *vq, { struct uvc_video_queue *queue = vb2_get_drv_priv(vq); struct uvc_video *video = container_of(queue, struct uvc_video, queue); + struct usb_composite_dev *cdev = video->uvc->func.config->cdev; if (*nbuffers > UVC_MAX_VIDEO_BUFFERS) *nbuffers = UVC_MAX_VIDEO_BUFFERS; @@ -51,6 +52,11 @@ static int uvc_queue_setup(struct vb2_queue *vq, sizes[0] = video->imagesize; + if (cdev->gadget->speed < USB_SPEED_SUPER) + video->uvc_num_requests = 4; + else + video->uvc_num_requests = 64; + return 0; } @@ -163,18 +169,7 @@ int uvcg_query_buffer(struct uvc_video_queue *queue, struct v4l2_buffer *buf) int uvcg_queue_buffer(struct uvc_video_queue *queue, struct v4l2_buffer *buf) { - unsigned long flags; - int ret; - - ret = vb2_qbuf(&queue->queue, NULL, buf); - if (ret < 0) - return ret; - - spin_lock_irqsave(&queue->irqlock, flags); - ret = (queue->flags & UVC_QUEUE_PAUSED) != 0; - queue->flags &= ~UVC_QUEUE_PAUSED; - spin_unlock_irqrestore(&queue->irqlock, flags); - return ret; + return vb2_qbuf(&queue->queue, NULL, buf); } /* @@ -242,6 +237,8 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect) buf->state = UVC_BUF_STATE_ERROR; vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR); } + queue->buf_used = 0; + /* This must be protected by the irqlock spinlock to avoid race * conditions between uvc_queue_buffer and the disconnection event that * could result in an interruptible wait in uvc_dequeue_buffer. Do not @@ -340,8 +337,6 @@ struct uvc_buffer *uvcg_queue_head(struct uvc_video_queue *queue) if (!list_empty(&queue->irqqueue)) buf = list_first_entry(&queue->irqqueue, struct uvc_buffer, queue); - else - queue->flags |= UVC_QUEUE_PAUSED; return buf; } diff --git a/drivers/usb/gadget/function/uvc_queue.h b/drivers/usb/gadget/function/uvc_queue.h index 2f0fff769843..4338ff871ee6 100644 --- a/drivers/usb/gadget/function/uvc_queue.h +++ b/drivers/usb/gadget/function/uvc_queue.h @@ -40,7 +40,6 @@ struct uvc_buffer { #define UVC_QUEUE_DISCONNECTED (1 << 0) #define UVC_QUEUE_DROP_INCOMPLETE (1 << 1) -#define UVC_QUEUE_PAUSED (1 << 2) struct uvc_video_queue { struct vb2_queue queue; diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index 197c26f7aec6..fd8f73bb726d 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -169,7 +169,8 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b) if (ret < 0) return ret; - schedule_work(&video->pump); + if (uvc->state == UVC_STATE_STREAMING) + schedule_work(&video->pump); return ret; } @@ -252,10 +253,11 @@ uvc_v4l2_subscribe_event(struct v4l2_fh *fh, static void uvc_v4l2_disable(struct uvc_device *uvc) { - uvc->func_connected = false; uvc_function_disconnect(uvc); uvcg_video_enable(&uvc->video, 0); uvcg_free_buffers(&uvc->video.queue); + uvc->func_connected = false; + wake_up_interruptible(&uvc->func_connected_queue); } static int diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 633e23d58d86..e5e2c2bae3b9 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -134,9 +134,12 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req) uvcg_err(&video->uvc->func, "Failed to queue request (%d).\n", ret); - /* Isochronous endpoints can't be halted. */ - if (usb_endpoint_xfer_bulk(video->ep->desc)) - usb_ep_set_halt(video->ep); + /* If the endpoint is disabled the descriptor may be NULL. */ + if (video->ep->desc) { + /* Isochronous endpoints can't be halted. */ + if (usb_endpoint_xfer_bulk(video->ep->desc)) + usb_ep_set_halt(video->ep); + } } return ret; @@ -145,8 +148,10 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req) static void uvc_video_complete(struct usb_ep *ep, struct usb_request *req) { - struct uvc_video *video = req->context; + struct uvc_request *ureq = req->context; + struct uvc_video *video = ureq->video; struct uvc_video_queue *queue = &video->queue; + struct uvc_device *uvc = video->uvc; unsigned long flags; switch (req->status) { @@ -169,7 +174,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req) list_add_tail(&req->list, &video->req_free); spin_unlock_irqrestore(&video->req_lock, flags); - schedule_work(&video->pump); + if (uvc->state == UVC_STATE_STREAMING) + schedule_work(&video->pump); } static int @@ -177,16 +183,21 @@ uvc_video_free_requests(struct uvc_video *video) { unsigned int i; - for (i = 0; i < UVC_NUM_REQUESTS; ++i) { - if (video->req[i]) { - usb_ep_free_request(video->ep, video->req[i]); - video->req[i] = NULL; + if (video->ureq) { + for (i = 0; i < video->uvc_num_requests; ++i) { + if (video->ureq[i].req) { + usb_ep_free_request(video->ep, video->ureq[i].req); + video->ureq[i].req = NULL; + } + + if (video->ureq[i].req_buffer) { + kfree(video->ureq[i].req_buffer); + video->ureq[i].req_buffer = NULL; + } } - if (video->req_buffer[i]) { - kfree(video->req_buffer[i]); - video->req_buffer[i] = NULL; - } + kfree(video->ureq); + video->ureq = NULL; } INIT_LIST_HEAD(&video->req_free); @@ -207,21 +218,26 @@ uvc_video_alloc_requests(struct uvc_video *video) * max_t(unsigned int, video->ep->maxburst, 1) * (video->ep->mult); - for (i = 0; i < UVC_NUM_REQUESTS; ++i) { - video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL); - if (video->req_buffer[i] == NULL) + video->ureq = kcalloc(video->uvc_num_requests, sizeof(struct uvc_request), GFP_KERNEL); + if (video->ureq == NULL) + return -ENOMEM; + + for (i = 0; i < video->uvc_num_requests; ++i) { + video->ureq[i].req_buffer = kmalloc(req_size, GFP_KERNEL); + if (video->ureq[i].req_buffer == NULL) goto error; - video->req[i] = usb_ep_alloc_request(video->ep, GFP_KERNEL); - if (video->req[i] == NULL) + video->ureq[i].req = usb_ep_alloc_request(video->ep, GFP_KERNEL); + if (video->ureq[i].req == NULL) goto error; - video->req[i]->buf = video->req_buffer[i]; - video->req[i]->length = 0; - video->req[i]->complete = uvc_video_complete; - video->req[i]->context = video; + video->ureq[i].req->buf = video->ureq[i].req_buffer; + video->ureq[i].req->length = 0; + video->ureq[i].req->complete = uvc_video_complete; + video->ureq[i].req->context = &video->ureq[i]; + video->ureq[i].video = video; - list_add_tail(&video->req[i]->list, &video->req_free); + list_add_tail(&video->ureq[i].req->list, &video->req_free); } video->req_size = req_size; @@ -247,12 +263,12 @@ static void uvcg_video_pump(struct work_struct *work) { struct uvc_video *video = container_of(work, struct uvc_video, pump); struct uvc_video_queue *queue = &video->queue; - struct usb_request *req; + struct usb_request *req = NULL; struct uvc_buffer *buf; unsigned long flags; int ret; - while (1) { + while (video->ep->enabled) { /* Retrieve the first available USB request, protected by the * request lock. */ @@ -288,6 +304,9 @@ static void uvcg_video_pump(struct work_struct *work) } } + if (!req) + return; + spin_lock_irqsave(&video->req_lock, flags); list_add_tail(&req->list, &video->req_free); spin_unlock_irqrestore(&video->req_lock, flags); @@ -312,9 +331,9 @@ int uvcg_video_enable(struct uvc_video *video, int enable) cancel_work_sync(&video->pump); uvcg_queue_cancel(&video->queue, 0); - for (i = 0; i < UVC_NUM_REQUESTS; ++i) - if (video->req[i]) - usb_ep_dequeue(video->ep, video->req[i]); + for (i = 0; i < video->uvc_num_requests; ++i) + if (video->ureq && video->ureq[i].req) + usb_ep_dequeue(video->ep, video->ureq[i].req); uvc_video_free_requests(video); uvcg_queue_enable(&video->queue, 0); diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile index 652c7180ec6d..d39077502e06 100644 --- a/fs/crypto/Makefile +++ b/fs/crypto/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=ANDROID_GKI_VFS_EXPORT_ONLY + fscrypto-y := crypto.o \ fname.o \ hkdf.o \ diff --git a/fs/fs_types.c b/fs/fs_types.c index 78365e5dc08c..a11a1d8c7811 100644 --- a/fs/fs_types.c +++ b/fs/fs_types.c @@ -41,7 +41,7 @@ unsigned char fs_ftype_to_dtype(unsigned int filetype) return fs_dtype_by_ftype[filetype]; } -EXPORT_SYMBOL_GPL(fs_ftype_to_dtype); +EXPORT_SYMBOL_NS_GPL(fs_ftype_to_dtype, ANDROID_GKI_VFS_EXPORT_ONLY); /* * dirent file type to fs on-disk file type conversion diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 79e08e05ef84..9121382cf16d 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -3,6 +3,8 @@ # Makefile for general filesystem caching code # +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=ANDROID_GKI_VFS_EXPORT_ONLY + fscache-y := \ cache.o \ cookie.o \ diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 84db0382765d..2227913674a4 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -131,8 +131,14 @@ struct mount_info { struct path mi_backing_dir_path; struct dentry *mi_index_dir; + /* For stacking mounts, if true, this indicates if the index dir needs + * to be freed for this SB otherwise it was created by lower level SB */ + bool mi_index_free; struct dentry *mi_incomplete_dir; + /* For stacking mounts, if true, this indicates if the incomplete dir + * needs to be freed for this SB. Similar to mi_index_free */ + bool mi_incomplete_free; const struct cred *mi_owner; diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index ea7866fbfd6e..776640451f6f 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -437,7 +437,8 @@ static int incfs_init_dentry(struct dentry *dentry, struct path *path) } static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, - const char *name) + const char *name, + bool *created) { struct dentry *index_dentry; struct inode *backing_inode = d_inode(backing_dir); @@ -450,6 +451,7 @@ static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, return index_dentry; } else if (d_really_is_positive(index_dentry)) { /* Index already exists. */ + *created = false; return index_dentry; } @@ -469,6 +471,7 @@ static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, return ERR_PTR(-EINVAL); } + *created = true; return index_dentry; } @@ -1747,6 +1750,7 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, struct super_block *src_fs_sb = NULL; struct inode *root_inode = NULL; struct super_block *sb = sget(type, NULL, set_anon_super, flags, NULL); + bool dir_created = false; int error = 0; if (IS_ERR(sb)) @@ -1763,17 +1767,23 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, BUILD_BUG_ON(PAGE_SIZE != INCFS_DATA_FILE_BLOCK_SIZE); + if (!dev_name) { + pr_err("incfs: Backing dir is not set, filesystem can't be mounted.\n"); + error = -ENOENT; + goto err_deactivate; + } + error = parse_options(&options, (char *)data); if (error != 0) { pr_err("incfs: Options parsing error. %d\n", error); - goto err; + goto err_deactivate; } sb->s_bdi->ra_pages = options.readahead_pages; if (!dev_name) { pr_err("incfs: Backing dir is not set, filesystem can't be mounted.\n"); error = -ENOENT; - goto err; + goto err_free_opts; } error = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, @@ -1782,69 +1792,80 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, !d_really_is_positive(backing_dir_path.dentry)) { pr_err("incfs: Error accessing: %s.\n", dev_name); - goto err; + goto err_free_opts; } src_fs_sb = backing_dir_path.dentry->d_sb; sb->s_maxbytes = src_fs_sb->s_maxbytes; + sb->s_stack_depth = src_fs_sb->s_stack_depth + 1; + + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + error = -EINVAL; + goto err_put_path; + } mi = incfs_alloc_mount_info(sb, &options, &backing_dir_path); - if (IS_ERR_OR_NULL(mi)) { error = PTR_ERR(mi); pr_err("incfs: Error allocating mount info. %d\n", error); - mi = NULL; - goto err; + goto err_put_path; } + sb->s_fs_info = mi; + mi->mi_backing_dir_path = backing_dir_path; index_dir = open_or_create_special_dir(backing_dir_path.dentry, - INCFS_INDEX_NAME); + INCFS_INDEX_NAME, &dir_created); if (IS_ERR_OR_NULL(index_dir)) { error = PTR_ERR(index_dir); pr_err("incfs: Can't find or create .index dir in %s\n", dev_name); /* No need to null index_dir since we don't put it */ - goto err; + goto err_put_path; } + mi->mi_index_dir = index_dir; + mi->mi_index_free = dir_created; incomplete_dir = open_or_create_special_dir(backing_dir_path.dentry, - INCFS_INCOMPLETE_NAME); + INCFS_INCOMPLETE_NAME, + &dir_created); if (IS_ERR_OR_NULL(incomplete_dir)) { error = PTR_ERR(incomplete_dir); pr_err("incfs: Can't find or create .incomplete dir in %s\n", dev_name); /* No need to null incomplete_dir since we don't put it */ - goto err; + goto err_put_path; } mi->mi_incomplete_dir = incomplete_dir; + mi->mi_incomplete_free = dir_created; - sb->s_fs_info = mi; root_inode = fetch_regular_inode(sb, backing_dir_path.dentry); if (IS_ERR(root_inode)) { error = PTR_ERR(root_inode); - goto err; + goto err_put_path; } sb->s_root = d_make_root(root_inode); if (!sb->s_root) { error = -ENOMEM; - goto err; + goto err_put_path; } error = incfs_init_dentry(sb->s_root, &backing_dir_path); if (error) - goto err; + goto err_put_path; path_put(&backing_dir_path); sb->s_flags |= SB_ACTIVE; pr_debug("incfs: mount\n"); return dget(sb->s_root); -err: - sb->s_fs_info = NULL; + +err_put_path: path_put(&backing_dir_path); - incfs_free_mount_info(mi); - deactivate_locked_super(sb); +err_free_opts: free_options(&options); +err_deactivate: + deactivate_locked_super(sb); + pr_err("incfs: mount failed %d\n", error); return ERR_PTR(error); } @@ -1879,10 +1900,26 @@ out: void incfs_kill_sb(struct super_block *sb) { struct mount_info *mi = sb->s_fs_info; + struct inode *dinode = NULL; pr_debug("incfs: unmount\n"); - generic_shutdown_super(sb); - incfs_free_mount_info(mi); + + if (mi) { + if (mi->mi_backing_dir_path.dentry) + dinode = d_inode(mi->mi_backing_dir_path.dentry); + + if (dinode) { + if (mi->mi_index_dir && mi->mi_index_free) + vfs_rmdir(dinode, mi->mi_index_dir); + + if (mi->mi_incomplete_dir && mi->mi_incomplete_free) + vfs_rmdir(dinode, mi->mi_incomplete_dir); + } + + incfs_free_mount_info(mi); + sb->s_fs_info = NULL; + } + kill_anon_super(sb); } static int show_options(struct seq_file *m, struct dentry *root) diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile index 126b4da6c7de..b64f93331643 100644 --- a/fs/jbd2/Makefile +++ b/fs/jbd2/Makefile @@ -3,6 +3,8 @@ # Makefile for the linux journaling routines. # +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=ANDROID_GKI_VFS_EXPORT_ONLY + obj-$(CONFIG_JBD2) += jbd2.o jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o diff --git a/fs/verity/Makefile b/fs/verity/Makefile index 435559a4fa9e..4b8323450f90 100644 --- a/fs/verity/Makefile +++ b/fs/verity/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=ANDROID_GKI_VFS_EXPORT_ONLY + obj-$(CONFIG_FS_VERITY) += enable.o \ hash_algs.o \ init.o \ diff --git a/include/linux/android_kabi.h b/include/linux/android_kabi.h index dc0da1ab45d6..f6dd7f00b386 100644 --- a/include/linux/android_kabi.h +++ b/include/linux/android_kabi.h @@ -83,7 +83,7 @@ * number: the "number" of the padding variable in the structure. Start with * 1 and go up. */ -#ifdef CONFIG_ANDROID_STRUCT_PADDING +#ifdef CONFIG_ANDROID_KABI_RESERVE #define ANDROID_KABI_RESERVE(number) _ANDROID_KABI_RESERVE(number) #else #define ANDROID_KABI_RESERVE(number) diff --git a/include/linux/android_vendor.h b/include/linux/android_vendor.h index ab3dca23966a..af3014ccc82e 100644 --- a/include/linux/android_vendor.h +++ b/include/linux/android_vendor.h @@ -26,7 +26,7 @@ * Same as ANDROID_VENDOR_DATA but allocates an array of u64 with * the specified size */ -#ifdef CONFIG_ANDROID_STRUCT_PADDING +#ifdef CONFIG_ANDROID_VENDOR_OEM_DATA #define ANDROID_VENDOR_DATA(n) u64 android_vendor_data##n #define ANDROID_VENDOR_DATA_ARRAY(n, s) u64 android_vendor_data##n[s] diff --git a/include/linux/damon.h b/include/linux/damon.h new file mode 100644 index 000000000000..5e1e3a128b77 --- /dev/null +++ b/include/linux/damon.h @@ -0,0 +1,511 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DAMON api + * + * Author: SeongJae Park + */ + +#ifndef _DAMON_H_ +#define _DAMON_H_ + +#include +#include +#include +#include + +/* Minimal region size. Every damon_region is aligned by this. */ +#define DAMON_MIN_REGION PAGE_SIZE +/* Max priority score for DAMON-based operation schemes */ +#define DAMOS_MAX_SCORE (99) + +/* Get a random number in [l, r) */ +static inline unsigned long damon_rand(unsigned long l, unsigned long r) +{ + return l + prandom_u32_max(r - l); +} + +/** + * struct damon_addr_range - Represents an address region of [@start, @end). + * @start: Start address of the region (inclusive). + * @end: End address of the region (exclusive). + */ +struct damon_addr_range { + unsigned long start; + unsigned long end; +}; + +/** + * struct damon_region - Represents a monitoring target region. + * @ar: The address range of the region. + * @sampling_addr: Address of the sample for the next access check. + * @nr_accesses: Access frequency of this region. + * @list: List head for siblings. + * @age: Age of this region. + * + * @age is initially zero, increased for each aggregation interval, and reset + * to zero again if the access frequency is significantly changed. If two + * regions are merged into a new region, both @nr_accesses and @age of the new + * region are set as region size-weighted average of those of the two regions. + */ +struct damon_region { + struct damon_addr_range ar; + unsigned long sampling_addr; + unsigned int nr_accesses; + struct list_head list; + + unsigned int age; +/* private: Internal value for age calculation. */ + unsigned int last_nr_accesses; +}; + +/** + * struct damon_target - Represents a monitoring target. + * @id: Unique identifier for this target. + * @nr_regions: Number of monitoring target regions of this target. + * @regions_list: Head of the monitoring target regions of this target. + * @list: List head for siblings. + * + * Each monitoring context could have multiple targets. For example, a context + * for virtual memory address spaces could have multiple target processes. The + * @id of each target should be unique among the targets of the context. For + * example, in the virtual address monitoring context, it could be a pidfd or + * an address of an mm_struct. + */ +struct damon_target { + unsigned long id; + unsigned int nr_regions; + struct list_head regions_list; + struct list_head list; +}; + +/** + * enum damos_action - Represents an action of a Data Access Monitoring-based + * Operation Scheme. + * + * @DAMOS_WILLNEED: Call ``madvise()`` for the region with MADV_WILLNEED. + * @DAMOS_COLD: Call ``madvise()`` for the region with MADV_COLD. + * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. + * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. + * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. + * @DAMOS_STAT: Do nothing but count the stat. + */ +enum damos_action { + DAMOS_WILLNEED, + DAMOS_COLD, + DAMOS_PAGEOUT, + DAMOS_HUGEPAGE, + DAMOS_NOHUGEPAGE, + DAMOS_STAT, /* Do nothing but only record the stat */ +}; + +/** + * struct damos_quota - Controls the aggressiveness of the given scheme. + * @ms: Maximum milliseconds that the scheme can use. + * @sz: Maximum bytes of memory that the action can be applied. + * @reset_interval: Charge reset interval in milliseconds. + * + * @weight_sz: Weight of the region's size for prioritization. + * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. + * @weight_age: Weight of the region's age for prioritization. + * + * To avoid consuming too much CPU time or IO resources for applying the + * &struct damos->action to large memory, DAMON allows users to set time and/or + * size quotas. The quotas can be set by writing non-zero values to &ms and + * &sz, respectively. If the time quota is set, DAMON tries to use only up to + * &ms milliseconds within &reset_interval for applying the action. If the + * size quota is set, DAMON tries to apply the action only up to &sz bytes + * within &reset_interval. + * + * Internally, the time quota is transformed to a size quota using estimated + * throughput of the scheme's action. DAMON then compares it against &sz and + * uses smaller one as the effective quota. + * + * For selecting regions within the quota, DAMON prioritizes current scheme's + * target memory regions using the &struct damon_primitive->get_scheme_score. + * You could customize the prioritization logic by setting &weight_sz, + * &weight_nr_accesses, and &weight_age, because monitoring primitives are + * encouraged to respect those. + */ +struct damos_quota { + unsigned long ms; + unsigned long sz; + unsigned long reset_interval; + + unsigned int weight_sz; + unsigned int weight_nr_accesses; + unsigned int weight_age; + +/* private: */ + /* For throughput estimation */ + unsigned long total_charged_sz; + unsigned long total_charged_ns; + + unsigned long esz; /* Effective size quota in bytes */ + + /* For charging the quota */ + unsigned long charged_sz; + unsigned long charged_from; + struct damon_target *charge_target_from; + unsigned long charge_addr_from; + + /* For prioritization */ + unsigned long histogram[DAMOS_MAX_SCORE + 1]; + unsigned int min_score; +}; + +/** + * enum damos_wmark_metric - Represents the watermark metric. + * + * @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme. + * @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000]. + */ +enum damos_wmark_metric { + DAMOS_WMARK_NONE, + DAMOS_WMARK_FREE_MEM_RATE, +}; + +/** + * struct damos_watermarks - Controls when a given scheme should be activated. + * @metric: Metric for the watermarks. + * @interval: Watermarks check time interval in microseconds. + * @high: High watermark. + * @mid: Middle watermark. + * @low: Low watermark. + * + * If &metric is &DAMOS_WMARK_NONE, the scheme is always active. Being active + * means DAMON does monitoring and applying the action of the scheme to + * appropriate memory regions. Else, DAMON checks &metric of the system for at + * least every &interval microseconds and works as below. + * + * If &metric is higher than &high, the scheme is inactivated. If &metric is + * between &mid and &low, the scheme is activated. If &metric is lower than + * &low, the scheme is inactivated. + */ +struct damos_watermarks { + enum damos_wmark_metric metric; + unsigned long interval; + unsigned long high; + unsigned long mid; + unsigned long low; + +/* private: */ + bool activated; +}; + +/** + * struct damos_stat - Statistics on a given scheme. + * @nr_tried: Total number of regions that the scheme is tried to be applied. + * @sz_tried: Total size of regions that the scheme is tried to be applied. + * @nr_applied: Total number of regions that the scheme is applied. + * @sz_applied: Total size of regions that the scheme is applied. + * @qt_exceeds: Total number of times the quota of the scheme has exceeded. + */ +struct damos_stat { + unsigned long nr_tried; + unsigned long sz_tried; + unsigned long nr_applied; + unsigned long sz_applied; + unsigned long qt_exceeds; +}; + +/** + * struct damos - Represents a Data Access Monitoring-based Operation Scheme. + * @min_sz_region: Minimum size of target regions. + * @max_sz_region: Maximum size of target regions. + * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. + * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. + * @min_age_region: Minimum age of target regions. + * @max_age_region: Maximum age of target regions. + * @action: &damo_action to be applied to the target regions. + * @quota: Control the aggressiveness of this scheme. + * @wmarks: Watermarks for automated (in)activation of this scheme. + * @stat: Statistics of this scheme. + * @list: List head for siblings. + * + * For each aggregation interval, DAMON finds regions which fit in the + * condition (&min_sz_region, &max_sz_region, &min_nr_accesses, + * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to + * those. To avoid consuming too much CPU time or IO resources for the + * &action, "a is used. + * + * To do the work only when needed, schemes can be activated for specific + * system situations using &wmarks. If all schemes that registered to the + * monitoring context are inactive, DAMON stops monitoring either, and just + * repeatedly checks the watermarks. + * + * If all schemes that registered to a &struct damon_ctx are inactive, DAMON + * stops monitoring and just repeatedly checks the watermarks. + * + * After applying the &action to each region, &stat_count and &stat_sz is + * updated to reflect the number of regions and total size of regions that the + * &action is applied. + */ +struct damos { + unsigned long min_sz_region; + unsigned long max_sz_region; + unsigned int min_nr_accesses; + unsigned int max_nr_accesses; + unsigned int min_age_region; + unsigned int max_age_region; + enum damos_action action; + struct damos_quota quota; + struct damos_watermarks wmarks; + struct damos_stat stat; + struct list_head list; +}; + +struct damon_ctx; + +/** + * struct damon_primitive - Monitoring primitives for given use cases. + * + * @init: Initialize primitive-internal data structures. + * @update: Update primitive-internal data structures. + * @prepare_access_checks: Prepare next access check of target regions. + * @check_accesses: Check the accesses to target regions. + * @reset_aggregated: Reset aggregated accesses monitoring results. + * @get_scheme_score: Get the score of a region for a scheme. + * @apply_scheme: Apply a DAMON-based operation scheme. + * @target_valid: Determine if the target is valid. + * @cleanup: Clean up the context. + * + * DAMON can be extended for various address spaces and usages. For this, + * users should register the low level primitives for their target address + * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread + * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting + * the monitoring, @update after each &damon_ctx.primitive_update_interval, and + * @check_accesses, @target_valid and @prepare_access_checks after each + * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each + * &damon_ctx.aggr_interval. + * + * @init should initialize primitive-internal data structures. For example, + * this could be used to construct proper monitoring target regions and link + * those to @damon_ctx.adaptive_targets. + * @update should update the primitive-internal data structures. For example, + * this could be used to update monitoring target regions for current status. + * @prepare_access_checks should manipulate the monitoring regions to be + * prepared for the next access check. + * @check_accesses should check the accesses to each region that made after the + * last preparation and update the number of observed accesses of each region. + * It should also return max number of observed accesses that made as a result + * of its update. The value will be used for regions adjustment threshold. + * @reset_aggregated should reset the access monitoring results that aggregated + * by @check_accesses. + * @get_scheme_score should return the priority score of a region for a scheme + * as an integer in [0, &DAMOS_MAX_SCORE]. + * @apply_scheme is called from @kdamond when a region for user provided + * DAMON-based operation scheme is found. It should apply the scheme's action + * to the region and return bytes of the region that the action is successfully + * applied. + * @target_valid should check whether the target is still valid for the + * monitoring. + * @cleanup is called from @kdamond just before its termination. + */ +struct damon_primitive { + void (*init)(struct damon_ctx *context); + void (*update)(struct damon_ctx *context); + void (*prepare_access_checks)(struct damon_ctx *context); + unsigned int (*check_accesses)(struct damon_ctx *context); + void (*reset_aggregated)(struct damon_ctx *context); + int (*get_scheme_score)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); + unsigned long (*apply_scheme)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); + bool (*target_valid)(void *target); + void (*cleanup)(struct damon_ctx *context); +}; + +/** + * struct damon_callback - Monitoring events notification callbacks. + * + * @before_start: Called before starting the monitoring. + * @after_sampling: Called after each sampling. + * @after_aggregation: Called after each aggregation. + * @before_terminate: Called before terminating the monitoring. + * @private: User private data. + * + * The monitoring thread (&damon_ctx.kdamond) calls @before_start and + * @before_terminate just before starting and finishing the monitoring, + * respectively. Therefore, those are good places for installing and cleaning + * @private. + * + * The monitoring thread calls @after_sampling and @after_aggregation for each + * of the sampling intervals and aggregation intervals, respectively. + * Therefore, users can safely access the monitoring results without additional + * protection. For the reason, users are recommended to use these callback for + * the accesses to the results. + * + * If any callback returns non-zero, monitoring stops. + */ +struct damon_callback { + void *private; + + int (*before_start)(struct damon_ctx *context); + int (*after_sampling)(struct damon_ctx *context); + int (*after_aggregation)(struct damon_ctx *context); + void (*before_terminate)(struct damon_ctx *context); +}; + +/** + * struct damon_ctx - Represents a context for each monitoring. This is the + * main interface that allows users to set the attributes and get the results + * of the monitoring. + * + * @sample_interval: The time between access samplings. + * @aggr_interval: The time between monitor results aggregations. + * @primitive_update_interval: The time between monitoring primitive updates. + * + * For each @sample_interval, DAMON checks whether each region is accessed or + * not. It aggregates and keeps the access information (number of accesses to + * each region) for @aggr_interval time. DAMON also checks whether the target + * memory regions need update (e.g., by ``mmap()`` calls from the application, + * in case of virtual memory monitoring) and applies the changes for each + * @primitive_update_interval. All time intervals are in micro-seconds. + * Please refer to &struct damon_primitive and &struct damon_callback for more + * detail. + * + * @kdamond: Kernel thread who does the monitoring. + * @kdamond_stop: Notifies whether kdamond should stop. + * @kdamond_lock: Mutex for the synchronizations with @kdamond. + * + * For each monitoring context, one kernel thread for the monitoring is + * created. The pointer to the thread is stored in @kdamond. + * + * Once started, the monitoring thread runs until explicitly required to be + * terminated or every monitoring target is invalid. The validity of the + * targets is checked via the &damon_primitive.target_valid of @primitive. The + * termination can also be explicitly requested by writing non-zero to + * @kdamond_stop. The thread sets @kdamond to NULL when it terminates. + * Therefore, users can know whether the monitoring is ongoing or terminated by + * reading @kdamond. Reads and writes to @kdamond and @kdamond_stop from + * outside of the monitoring thread must be protected by @kdamond_lock. + * + * Note that the monitoring thread protects only @kdamond and @kdamond_stop via + * @kdamond_lock. Accesses to other fields must be protected by themselves. + * + * @primitive: Set of monitoring primitives for given use cases. + * @callback: Set of callbacks for monitoring events notifications. + * + * @min_nr_regions: The minimum number of adaptive monitoring regions. + * @max_nr_regions: The maximum number of adaptive monitoring regions. + * @adaptive_targets: Head of monitoring targets (&damon_target) list. + * @schemes: Head of schemes (&damos) list. + */ +struct damon_ctx { + unsigned long sample_interval; + unsigned long aggr_interval; + unsigned long primitive_update_interval; + +/* private: internal use only */ + struct timespec64 last_aggregation; + struct timespec64 last_primitive_update; + +/* public: */ + struct task_struct *kdamond; + struct mutex kdamond_lock; + + struct damon_primitive primitive; + struct damon_callback callback; + + unsigned long min_nr_regions; + unsigned long max_nr_regions; + struct list_head adaptive_targets; + struct list_head schemes; +}; + +static inline struct damon_region *damon_next_region(struct damon_region *r) +{ + return container_of(r->list.next, struct damon_region, list); +} + +static inline struct damon_region *damon_prev_region(struct damon_region *r) +{ + return container_of(r->list.prev, struct damon_region, list); +} + +static inline struct damon_region *damon_last_region(struct damon_target *t) +{ + return list_last_entry(&t->regions_list, struct damon_region, list); +} + +#define damon_for_each_region(r, t) \ + list_for_each_entry(r, &t->regions_list, list) + +#define damon_for_each_region_safe(r, next, t) \ + list_for_each_entry_safe(r, next, &t->regions_list, list) + +#define damon_for_each_target(t, ctx) \ + list_for_each_entry(t, &(ctx)->adaptive_targets, list) + +#define damon_for_each_target_safe(t, next, ctx) \ + list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list) + +#define damon_for_each_scheme(s, ctx) \ + list_for_each_entry(s, &(ctx)->schemes, list) + +#define damon_for_each_scheme_safe(s, next, ctx) \ + list_for_each_entry_safe(s, next, &(ctx)->schemes, list) + +#ifdef CONFIG_DAMON + +struct damon_region *damon_new_region(unsigned long start, unsigned long end); + +/* + * Add a region between two other regions + */ +static inline void damon_insert_region(struct damon_region *r, + struct damon_region *prev, struct damon_region *next, + struct damon_target *t) +{ + __list_add(&r->list, &prev->list, &next->list); + t->nr_regions++; +} + +void damon_add_region(struct damon_region *r, struct damon_target *t); +void damon_destroy_region(struct damon_region *r, struct damon_target *t); + +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks); +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); +void damon_destroy_scheme(struct damos *s); + +struct damon_target *damon_new_target(unsigned long id); +void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); +bool damon_targets_empty(struct damon_ctx *ctx); +void damon_free_target(struct damon_target *t); +void damon_destroy_target(struct damon_target *t); +unsigned int damon_nr_regions(struct damon_target *t); + +struct damon_ctx *damon_new_ctx(void); +void damon_destroy_ctx(struct damon_ctx *ctx); +int damon_set_targets(struct damon_ctx *ctx, + unsigned long *ids, ssize_t nr_ids); +int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, + unsigned long aggr_int, unsigned long primitive_upd_int, + unsigned long min_nr_reg, unsigned long max_nr_reg); +int damon_set_schemes(struct damon_ctx *ctx, + struct damos **schemes, ssize_t nr_schemes); +int damon_nr_running_ctxs(void); + +int damon_start(struct damon_ctx **ctxs, int nr_ctxs); +int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); + +#endif /* CONFIG_DAMON */ + +#ifdef CONFIG_DAMON_VADDR +bool damon_va_target_valid(void *t); +void damon_va_set_primitives(struct damon_ctx *ctx); +#endif /* CONFIG_DAMON_VADDR */ + +#ifdef CONFIG_DAMON_PADDR +bool damon_pa_target_valid(void *t); +void damon_pa_set_primitives(struct damon_ctx *ctx); +#endif /* CONFIG_DAMON_PADDR */ + +#endif /* _DAMON_H */ diff --git a/include/linux/delay.h b/include/linux/delay.h index 1d0e2ce6b6d9..abecbccab6e4 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -20,6 +20,7 @@ */ #include +#include extern unsigned long loops_per_jiffy; @@ -58,8 +59,15 @@ void calibrate_delay(void); void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); +void usleep_range_state(unsigned long min, unsigned long max, + unsigned int state); void usleep_range(unsigned long min, unsigned long max); +static inline void usleep_idle_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_IDLE); +} + static inline void ssleep(unsigned int seconds) { msleep(seconds * 1000); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a2738944340c..dc636f162194 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -132,7 +132,7 @@ enum pageflags { #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) PG_young, PG_idle, #endif @@ -440,7 +440,7 @@ PAGEFLAG_FALSE(HWPoison) #define __PG_HWPOISON 0 #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) TESTPAGEFLAG(Young, young, PF_ANY) SETPAGEFLAG(Young, young, PF_ANY) TESTCLEARFLAG(Young, young, PF_ANY) diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index cd45c1927d90..8e58a1b73771 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -25,7 +25,7 @@ enum page_ext_flags { /* page migration failed */ PAGE_EXT_PINNER_MIGRATION_FAILED, #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) PAGE_EXT_YOUNG, PAGE_EXT_IDLE, #endif diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 1e894d34bdce..d8a6aecf99cb 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -6,7 +6,7 @@ #include #include -#ifdef CONFIG_IDLE_PAGE_TRACKING +#ifdef CONFIG_PAGE_IDLE_FLAG #ifdef CONFIG_64BIT static inline bool page_is_young(struct page *page) @@ -106,7 +106,7 @@ static inline void clear_page_idle(struct page *page) } #endif /* CONFIG_64BIT */ -#else /* !CONFIG_IDLE_PAGE_TRACKING */ +#else /* !CONFIG_PAGE_IDLE_FLAG */ static inline bool page_is_young(struct page *page) { @@ -135,6 +135,6 @@ static inline void clear_page_idle(struct page *page) { } -#endif /* CONFIG_IDLE_PAGE_TRACKING */ +#endif /* CONFIG_PAGE_IDLE_FLAG */ #endif /* _LINUX_MM_PAGE_IDLE_H */ diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h new file mode 100644 index 000000000000..c79f1d4c39af --- /dev/null +++ b/include/trace/events/damon.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM damon + +#if !defined(_TRACE_DAMON_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DAMON_H + +#include +#include +#include + +TRACE_EVENT(damon_aggregated, + + TP_PROTO(struct damon_target *t, unsigned int target_id, + struct damon_region *r, unsigned int nr_regions), + + TP_ARGS(t, target_id, r, nr_regions), + + TP_STRUCT__entry( + __field(unsigned long, target_id) + __field(unsigned int, nr_regions) + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned int, nr_accesses) + __field(unsigned int, age) + ), + + TP_fast_assign( + __entry->target_id = target_id; + __entry->nr_regions = nr_regions; + __entry->start = r->ar.start; + __entry->end = r->ar.end; + __entry->nr_accesses = r->nr_accesses; + __entry->age = r->age; + ), + + TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u %u", + __entry->target_id, __entry->nr_regions, + __entry->start, __entry->end, + __entry->nr_accesses, __entry->age) +); + +#endif /* _TRACE_DAMON_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 390270e00a1d..d428f0137c49 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -73,7 +73,7 @@ #define IF_HAVE_PG_HWPOISON(flag,string) #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) #define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string} #else #define IF_HAVE_PG_IDLE(flag,string) diff --git a/include/trace/hooks/dtask.h b/include/trace/hooks/dtask.h index 3c49af0e6560..675634238fbd 100644 --- a/include/trace/hooks/dtask.h +++ b/include/trace/hooks/dtask.h @@ -54,6 +54,9 @@ DECLARE_HOOK(android_vh_alter_mutex_list_add, DECLARE_HOOK(android_vh_mutex_unlock_slowpath, TP_PROTO(struct mutex *lock), TP_ARGS(lock)); +DECLARE_HOOK(android_vh_mutex_unlock_slowpath_end, + TP_PROTO(struct mutex *lock, struct task_struct *next), + TP_ARGS(lock, next)); /* macro versions of hooks are no longer required */ diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h index cc00dcecfd71..4af456345aa4 100644 --- a/include/trace/hooks/mm.h +++ b/include/trace/hooks/mm.h @@ -122,10 +122,20 @@ DECLARE_HOOK(android_vh_drain_all_pages_bypass, int migratetype, unsigned long did_some_progress, bool *bypass), TP_ARGS(gfp_mask, order, alloc_flags, migratetype, did_some_progress, bypass)); +DECLARE_HOOK(android_vh_cma_drain_all_pages_bypass, + TP_PROTO(unsigned int migratetype, bool *bypass), + TP_ARGS(migratetype, bypass)); +DECLARE_HOOK(android_vh_pcplist_add_cma_pages_bypass, + TP_PROTO(int migratetype, bool *bypass), + TP_ARGS(migratetype, bypass)); struct device; DECLARE_HOOK(android_vh_subpage_dma_contig_alloc, TP_PROTO(bool *allow_subpage_alloc, struct device *dev, size_t *size), TP_ARGS(allow_subpage_alloc, dev, size)); +struct readahead_control; +DECLARE_HOOK(android_vh_ra_tuning_max_page, + TP_PROTO(struct readahead_control *ractl, unsigned long *max_page), + TP_ARGS(ractl, max_page)); /* macro versions of hooks are no longer required */ #endif /* _TRACE_HOOK_MM_H */ diff --git a/include/trace/hooks/rwsem.h b/include/trace/hooks/rwsem.h index d644a6e21259..c8e445216910 100644 --- a/include/trace/hooks/rwsem.h +++ b/include/trace/hooks/rwsem.h @@ -29,7 +29,21 @@ DECLARE_HOOK(android_vh_alter_rwsem_list_add, DECLARE_HOOK(android_vh_rwsem_wake_finish, TP_PROTO(struct rw_semaphore *sem), TP_ARGS(sem)); - +DECLARE_HOOK(android_vh_rwsem_set_owner, + TP_PROTO(struct rw_semaphore *sem), + TP_ARGS(sem)); +DECLARE_HOOK(android_vh_rwsem_set_reader_owned, + TP_PROTO(struct rw_semaphore *sem), + TP_ARGS(sem)); +DECLARE_HOOK(android_vh_rwsem_up_write_end, + TP_PROTO(struct rw_semaphore *sem), + TP_ARGS(sem)); +DECLARE_HOOK(android_vh_rwsem_up_read_end, + TP_PROTO(struct rw_semaphore *sem), + TP_ARGS(sem)); +DECLARE_HOOK(android_vh_rwsem_mark_wake_readers, + TP_PROTO(struct rw_semaphore *sem, struct rwsem_waiter *waiter), + TP_ARGS(sem, waiter)); /* macro versions of hooks are no longer required */ #endif /* _TRACE_HOOK_RWSEM_H */ diff --git a/include/trace/hooks/vendor_hooks.h b/include/trace/hooks/vendor_hooks.h index 87fe9ccdc331..2c2d57b6cf2a 100644 --- a/include/trace/hooks/vendor_hooks.h +++ b/include/trace/hooks/vendor_hooks.h @@ -7,7 +7,9 @@ * will override the DECLARE_RESTRICTED_HOOK and break the second include. */ +#ifndef __GENKSYMS__ #include +#endif #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_ANDROID_VENDOR_HOOKS) diff --git a/include/trace/hooks/vmscan.h b/include/trace/hooks/vmscan.h index 95fb17af29e8..7b4d222301af 100644 --- a/include/trace/hooks/vmscan.h +++ b/include/trace/hooks/vmscan.h @@ -28,7 +28,12 @@ DECLARE_RESTRICTED_HOOK(android_rvh_set_balance_anon_file_reclaim, DECLARE_HOOK(android_vh_page_referenced_check_bypass, TP_PROTO(struct page *page, unsigned long nr_to_scan, int lru, bool *bypass), TP_ARGS(page, nr_to_scan, lru, bypass)); - +DECLARE_HOOK(android_vh_shrink_node_memcgs, + TP_PROTO(struct mem_cgroup *memcg, bool *skip), + TP_ARGS(memcg, skip)); +DECLARE_HOOK(android_vh_tune_memcg_scan_type, + TP_PROTO(struct mem_cgroup *memcg, char *scan_type), + TP_ARGS(memcg, scan_type)); #endif /* _TRACE_HOOK_VMSCAN_H */ /* This part must be outside protection */ #include diff --git a/init/Kconfig b/init/Kconfig index c08acd288f9f..ee9b2396972d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -53,13 +53,13 @@ config LLD_VERSION config CC_CAN_LINK bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag)) + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag)) config CC_CAN_LINK_STATIC bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag) -static) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag) -static) + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static) config CC_HAS_ASM_GOTO def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) diff --git a/init/init_task.c b/init/init_task.c index 26e018fbbf78..9b8ad6861316 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -213,7 +213,7 @@ struct task_struct init_task #ifdef CONFIG_SECCOMP_FILTER .seccomp = { .filter_count = ATOMIC_INIT(0) }, #endif -#ifdef CONFIG_ANDROID_STRUCT_PADDING +#ifdef CONFIG_ANDROID_VENDOR_OEM_DATA .android_vendor_data1 = {0, }, .android_oem_data1 = {0, }, #endif diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 016f40c76a83..fac88b1e134e 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1293,6 +1293,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne spin_unlock(&lock->wait_lock); wake_up_q(&wake_q); + trace_android_vh_mutex_unlock_slowpath_end(lock, next); } #ifndef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 30a5f91f99cf..a33e6a36dae9 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -176,6 +176,7 @@ static inline void rwsem_set_owner(struct rw_semaphore *sem) { atomic_long_set(&sem->owner, (long)current); + trace_android_vh_rwsem_set_owner(sem); } static inline void rwsem_clear_owner(struct rw_semaphore *sem) @@ -213,6 +214,7 @@ static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) { __rwsem_set_reader_owned(sem, current); + trace_android_vh_rwsem_set_reader_owned(sem); } /* @@ -496,6 +498,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, woken++; list_move_tail(&waiter->list, &wlist); + trace_android_vh_rwsem_mark_wake_readers(sem, waiter); /* * Limit # of readers that can be woken up per wakeup call. */ @@ -1460,6 +1463,7 @@ static inline void __up_read(struct rw_semaphore *sem) clear_wr_nonspinnable(sem); rwsem_wake(sem, tmp); } + trace_android_vh_rwsem_up_read_end(sem); } /* @@ -1481,6 +1485,7 @@ static inline void __up_write(struct rw_semaphore *sem) tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); if (unlikely(tmp & RWSEM_FLAG_WAITERS)) rwsem_wake(sem, tmp); + trace_android_vh_rwsem_up_write_end(sem); } /* diff --git a/kernel/time/timer.c b/kernel/time/timer.c index cc5d9d9d959c..f5147bde3ed0 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2055,6 +2055,32 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); +/** + * usleep_range_state - Sleep for an approximate time in a given state + * @min: Minimum time in usecs to sleep + * @max: Maximum time in usecs to sleep + * @state: State of the current task that will be while sleeping + * + * In non-atomic context where the exact wakeup time is flexible, use + * usleep_range_state() instead of udelay(). The sleep improves responsiveness + * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces + * power usage by allowing hrtimers to take advantage of an already- + * scheduled interrupt instead of scheduling a new one just for this sleep. + */ +void __sched usleep_range_state(unsigned long min, unsigned long max, + unsigned int state) +{ + ktime_t exp = ktime_add_us(ktime_get(), min); + u64 delta = (u64)(max - min) * NSEC_PER_USEC; + + for (;;) { + __set_current_state(state); + /* Do not return before the requested sleep time has elapsed */ + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) + break; + } +} + /** * usleep_range - Sleep for an approximate time * @min: Minimum time in usecs to sleep @@ -2068,14 +2094,6 @@ EXPORT_SYMBOL(msleep_interruptible); */ void __sched usleep_range(unsigned long min, unsigned long max) { - ktime_t exp = ktime_add_us(ktime_get(), min); - u64 delta = (u64)(max - min) * NSEC_PER_USEC; - - for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); - /* Do not return before the requested sleep time has elapsed */ - if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) - break; - } + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(usleep_range); diff --git a/mm/Kconfig b/mm/Kconfig index 884ba3860ec8..6a020a0d7bc5 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -756,10 +756,18 @@ config DEFERRED_STRUCT_PAGE_INIT lifetime of the system until these kthreads finish the initialisation. +config PAGE_IDLE_FLAG + bool + select PAGE_EXTENSION if !64BIT + help + This adds PG_idle and PG_young flags to 'struct page'. PTE Accessed + bit writers can set the state of the bit in the flags so that PTE + Accessed bit readers may avoid disturbance. + config IDLE_PAGE_TRACKING bool "Enable idle page tracking" depends on SYSFS && MMU - select PAGE_EXTENSION if !64BIT + select PAGE_IDLE_FLAG help This feature allows to estimate the amount of user pages that have not been touched during a given period of time. This information can @@ -888,4 +896,6 @@ config ARCH_HAS_HUGEPD config MAPPING_DIRTY_HELPERS bool +source "mm/damon/Kconfig" + endmenu diff --git a/mm/Makefile b/mm/Makefile index 8de8651da069..b90583a89fb5 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -116,6 +116,7 @@ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o +obj-$(CONFIG_DAMON) += damon/ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_ZONE_DEVICE) += memremap.o diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig new file mode 100644 index 000000000000..5bcf05851ad0 --- /dev/null +++ b/mm/damon/Kconfig @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: GPL-2.0-only + +menu "Data Access Monitoring" + +config DAMON + bool "DAMON: Data Access Monitoring Framework" + help + This builds a framework that allows kernel subsystems to monitor + access frequency of each memory region. The information can be useful + for performance-centric DRAM level memory management. + + See https://damonitor.github.io/doc/html/latest-damon/index.html for + more information. + +config DAMON_KUNIT_TEST + bool "Test for damon" if !KUNIT_ALL_TESTS + depends on DAMON && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_VADDR + bool "Data access monitoring primitives for virtual address spaces" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + help + This builds the default data access monitoring primitives for DAMON + that work for virtual address spaces. + +config DAMON_PADDR + bool "Data access monitoring primitives for the physical address space" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + help + This builds the default data access monitoring primitives for DAMON + that works for the physical address space. + +config DAMON_VADDR_KUNIT_TEST + bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS + depends on DAMON_VADDR && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON virtual addresses primitives Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_DBGFS + bool "DAMON debugfs interface" + depends on DAMON_VADDR && DAMON_PADDR && DEBUG_FS + help + This builds the debugfs interface for DAMON. The user space admins + can use the interface for arbitrary data access monitoring. + + If unsure, say N. + +config DAMON_DBGFS_KUNIT_TEST + bool "Test for damon debugfs interface" if !KUNIT_ALL_TESTS + depends on DAMON_DBGFS && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON debugfs interface Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_RECLAIM + bool "Build DAMON-based reclaim (DAMON_RECLAIM)" + depends on DAMON_PADDR + help + This builds the DAMON-based reclamation subsystem. It finds pages + that not accessed for a long time (cold) using DAMON and reclaim + those. + + This is suggested to be used as a proactive and lightweight + reclamation under light memory pressure, while the traditional page + scanning-based reclamation is used for heavy pressure. + +endmenu diff --git a/mm/damon/Makefile b/mm/damon/Makefile new file mode 100644 index 000000000000..f7d5ac377a2b --- /dev/null +++ b/mm/damon/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DAMON) := core.o +obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o +obj-$(CONFIG_DAMON_PADDR) += prmtv-common.o paddr.o +obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o +obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h new file mode 100644 index 000000000000..7008c3735e99 --- /dev/null +++ b/mm/damon/core-test.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Access Monitor Unit Tests + * + * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * Author: SeongJae Park + */ + +#ifdef CONFIG_DAMON_KUNIT_TEST + +#ifndef _DAMON_CORE_TEST_H +#define _DAMON_CORE_TEST_H + +#include + +static void damon_test_regions(struct kunit *test) +{ + struct damon_region *r; + struct damon_target *t; + + r = damon_new_region(1, 2); + KUNIT_EXPECT_EQ(test, 1ul, r->ar.start); + KUNIT_EXPECT_EQ(test, 2ul, r->ar.end); + KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses); + + t = damon_new_target(42); + KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t)); + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, 1u, damon_nr_regions(t)); + + damon_del_region(r, t); + KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t)); + + damon_free_target(t); +} + +static unsigned int nr_damon_targets(struct damon_ctx *ctx) +{ + struct damon_target *t; + unsigned int nr_targets = 0; + + damon_for_each_target(t, ctx) + nr_targets++; + + return nr_targets; +} + +static void damon_test_target(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + + t = damon_new_target(42); + KUNIT_EXPECT_EQ(test, 42ul, t->id); + KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c)); + + damon_add_target(c, t); + KUNIT_EXPECT_EQ(test, 1u, nr_damon_targets(c)); + + damon_destroy_target(t); + KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c)); + + damon_destroy_ctx(c); +} + +/* + * Test kdamond_reset_aggregated() + * + * DAMON checks access to each region and aggregates this information as the + * access frequency of each region. In detail, it increases '->nr_accesses' of + * regions that an access has confirmed. 'kdamond_reset_aggregated()' flushes + * the aggregated information ('->nr_accesses' of each regions) to the result + * buffer. As a result of the flushing, the '->nr_accesses' of regions are + * initialized to zero. + */ +static void damon_test_aggregate(struct kunit *test) +{ + struct damon_ctx *ctx = damon_new_ctx(); + unsigned long target_ids[] = {1, 2, 3}; + unsigned long saddr[][3] = {{10, 20, 30}, {5, 42, 49}, {13, 33, 55} }; + unsigned long eaddr[][3] = {{15, 27, 40}, {31, 45, 55}, {23, 44, 66} }; + unsigned long accesses[][3] = {{42, 95, 84}, {10, 20, 30}, {0, 1, 2} }; + struct damon_target *t; + struct damon_region *r; + int it, ir; + + damon_set_targets(ctx, target_ids, 3); + + it = 0; + damon_for_each_target(t, ctx) { + for (ir = 0; ir < 3; ir++) { + r = damon_new_region(saddr[it][ir], eaddr[it][ir]); + r->nr_accesses = accesses[it][ir]; + damon_add_region(r, t); + } + it++; + } + kdamond_reset_aggregated(ctx); + it = 0; + damon_for_each_target(t, ctx) { + ir = 0; + /* '->nr_accesses' should be zeroed */ + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses); + ir++; + } + /* regions should be preserved */ + KUNIT_EXPECT_EQ(test, 3, ir); + it++; + } + /* targets also should be preserved */ + KUNIT_EXPECT_EQ(test, 3, it); + + damon_destroy_ctx(ctx); +} + +static void damon_test_split_at(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + struct damon_region *r; + + t = damon_new_target(42); + r = damon_new_region(0, 100); + damon_add_region(r, t); + damon_split_region_at(c, t, r, 25); + KUNIT_EXPECT_EQ(test, r->ar.start, 0ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 25ul); + + r = damon_next_region(r); + KUNIT_EXPECT_EQ(test, r->ar.start, 25ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 100ul); + + damon_free_target(t); + damon_destroy_ctx(c); +} + +static void damon_test_merge_two(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r, *r2, *r3; + int i; + + t = damon_new_target(42); + r = damon_new_region(0, 100); + r->nr_accesses = 10; + damon_add_region(r, t); + r2 = damon_new_region(100, 300); + r2->nr_accesses = 20; + damon_add_region(r2, t); + + damon_merge_two_regions(t, r, r2); + KUNIT_EXPECT_EQ(test, r->ar.start, 0ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 300ul); + KUNIT_EXPECT_EQ(test, r->nr_accesses, 16u); + + i = 0; + damon_for_each_region(r3, t) { + KUNIT_EXPECT_PTR_EQ(test, r, r3); + i++; + } + KUNIT_EXPECT_EQ(test, i, 1); + + damon_free_target(t); +} + +static struct damon_region *__nth_region_of(struct damon_target *t, int idx) +{ + struct damon_region *r; + unsigned int i = 0; + + damon_for_each_region(r, t) { + if (i++ == idx) + return r; + } + + return NULL; +} + +static void damon_test_merge_regions_of(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sa[] = {0, 100, 114, 122, 130, 156, 170, 184}; + unsigned long ea[] = {100, 112, 122, 130, 156, 170, 184, 230}; + unsigned int nrs[] = {0, 0, 10, 10, 20, 30, 1, 2}; + + unsigned long saddrs[] = {0, 114, 130, 156, 170}; + unsigned long eaddrs[] = {112, 130, 156, 170, 230}; + int i; + + t = damon_new_target(42); + for (i = 0; i < ARRAY_SIZE(sa); i++) { + r = damon_new_region(sa[i], ea[i]); + r->nr_accesses = nrs[i]; + damon_add_region(r, t); + } + + damon_merge_regions_of(t, 9, 9999); + /* 0-112, 114-130, 130-156, 156-170 */ + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u); + for (i = 0; i < 5; i++) { + r = __nth_region_of(t, i); + KUNIT_EXPECT_EQ(test, r->ar.start, saddrs[i]); + KUNIT_EXPECT_EQ(test, r->ar.end, eaddrs[i]); + } + damon_free_target(t); +} + +static void damon_test_split_regions_of(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + struct damon_region *r; + + t = damon_new_target(42); + r = damon_new_region(0, 22); + damon_add_region(r, t); + damon_split_regions_of(c, t, 2); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u); + damon_free_target(t); + + t = damon_new_target(42); + r = damon_new_region(0, 220); + damon_add_region(r, t); + damon_split_regions_of(c, t, 4); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u); + damon_free_target(t); + damon_destroy_ctx(c); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_test_target), + KUNIT_CASE(damon_test_regions), + KUNIT_CASE(damon_test_aggregate), + KUNIT_CASE(damon_test_split_at), + KUNIT_CASE(damon_test_merge_two), + KUNIT_CASE(damon_test_merge_regions_of), + KUNIT_CASE(damon_test_split_regions_of), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_CORE_TEST_H */ + +#endif /* CONFIG_DAMON_KUNIT_TEST */ diff --git a/mm/damon/core.c b/mm/damon/core.c new file mode 100644 index 000000000000..1dd153c31c9e --- /dev/null +++ b/mm/damon/core.c @@ -0,0 +1,1075 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Data Access Monitor + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon: " fmt + +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#ifdef CONFIG_DAMON_KUNIT_TEST +#undef DAMON_MIN_REGION +#define DAMON_MIN_REGION 1 +#endif + +static DEFINE_MUTEX(damon_lock); +static int nr_running_ctxs; + +/* + * Construct a damon_region struct + * + * Returns the pointer to the new struct if success, or NULL otherwise + */ +struct damon_region *damon_new_region(unsigned long start, unsigned long end) +{ + struct damon_region *region; + + region = kmalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return NULL; + + region->ar.start = start; + region->ar.end = end; + region->nr_accesses = 0; + INIT_LIST_HEAD(®ion->list); + + region->age = 0; + region->last_nr_accesses = 0; + + return region; +} + +void damon_add_region(struct damon_region *r, struct damon_target *t) +{ + list_add_tail(&r->list, &t->regions_list); + t->nr_regions++; +} + +static void damon_del_region(struct damon_region *r, struct damon_target *t) +{ + list_del(&r->list); + t->nr_regions--; +} + +static void damon_free_region(struct damon_region *r) +{ + kfree(r); +} + +void damon_destroy_region(struct damon_region *r, struct damon_target *t) +{ + damon_del_region(r, t); + damon_free_region(r); +} + +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks) +{ + struct damos *scheme; + + scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); + if (!scheme) + return NULL; + scheme->min_sz_region = min_sz_region; + scheme->max_sz_region = max_sz_region; + scheme->min_nr_accesses = min_nr_accesses; + scheme->max_nr_accesses = max_nr_accesses; + scheme->min_age_region = min_age_region; + scheme->max_age_region = max_age_region; + scheme->action = action; + scheme->stat = (struct damos_stat){}; + INIT_LIST_HEAD(&scheme->list); + + scheme->quota.ms = quota->ms; + scheme->quota.sz = quota->sz; + scheme->quota.reset_interval = quota->reset_interval; + scheme->quota.weight_sz = quota->weight_sz; + scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; + scheme->quota.weight_age = quota->weight_age; + scheme->quota.total_charged_sz = 0; + scheme->quota.total_charged_ns = 0; + scheme->quota.esz = 0; + scheme->quota.charged_sz = 0; + scheme->quota.charged_from = 0; + scheme->quota.charge_target_from = NULL; + scheme->quota.charge_addr_from = 0; + + scheme->wmarks.metric = wmarks->metric; + scheme->wmarks.interval = wmarks->interval; + scheme->wmarks.high = wmarks->high; + scheme->wmarks.mid = wmarks->mid; + scheme->wmarks.low = wmarks->low; + scheme->wmarks.activated = true; + + return scheme; +} + +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) +{ + list_add_tail(&s->list, &ctx->schemes); +} + +static void damon_del_scheme(struct damos *s) +{ + list_del(&s->list); +} + +static void damon_free_scheme(struct damos *s) +{ + kfree(s); +} + +void damon_destroy_scheme(struct damos *s) +{ + damon_del_scheme(s); + damon_free_scheme(s); +} + +/* + * Construct a damon_target struct + * + * Returns the pointer to the new struct if success, or NULL otherwise + */ +struct damon_target *damon_new_target(unsigned long id) +{ + struct damon_target *t; + + t = kmalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return NULL; + + t->id = id; + t->nr_regions = 0; + INIT_LIST_HEAD(&t->regions_list); + + return t; +} + +void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) +{ + list_add_tail(&t->list, &ctx->adaptive_targets); +} + +bool damon_targets_empty(struct damon_ctx *ctx) +{ + return list_empty(&ctx->adaptive_targets); +} + +static void damon_del_target(struct damon_target *t) +{ + list_del(&t->list); +} + +void damon_free_target(struct damon_target *t) +{ + struct damon_region *r, *next; + + damon_for_each_region_safe(r, next, t) + damon_free_region(r); + kfree(t); +} + +void damon_destroy_target(struct damon_target *t) +{ + damon_del_target(t); + damon_free_target(t); +} + +unsigned int damon_nr_regions(struct damon_target *t) +{ + return t->nr_regions; +} + +struct damon_ctx *damon_new_ctx(void) +{ + struct damon_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + ctx->sample_interval = 5 * 1000; + ctx->aggr_interval = 100 * 1000; + ctx->primitive_update_interval = 60 * 1000 * 1000; + + ktime_get_coarse_ts64(&ctx->last_aggregation); + ctx->last_primitive_update = ctx->last_aggregation; + + mutex_init(&ctx->kdamond_lock); + + ctx->min_nr_regions = 10; + ctx->max_nr_regions = 1000; + + INIT_LIST_HEAD(&ctx->adaptive_targets); + INIT_LIST_HEAD(&ctx->schemes); + + return ctx; +} + +static void damon_destroy_targets(struct damon_ctx *ctx) +{ + struct damon_target *t, *next_t; + + if (ctx->primitive.cleanup) { + ctx->primitive.cleanup(ctx); + return; + } + + damon_for_each_target_safe(t, next_t, ctx) + damon_destroy_target(t); +} + +void damon_destroy_ctx(struct damon_ctx *ctx) +{ + struct damos *s, *next_s; + + damon_destroy_targets(ctx); + + damon_for_each_scheme_safe(s, next_s, ctx) + damon_destroy_scheme(s); + + kfree(ctx); +} + +/** + * damon_set_targets() - Set monitoring targets. + * @ctx: monitoring context + * @ids: array of target ids + * @nr_ids: number of entries in @ids + * + * This function should not be called while the kdamond is running. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_set_targets(struct damon_ctx *ctx, + unsigned long *ids, ssize_t nr_ids) +{ + ssize_t i; + struct damon_target *t, *next; + + damon_destroy_targets(ctx); + + for (i = 0; i < nr_ids; i++) { + t = damon_new_target(ids[i]); + if (!t) { + /* The caller should do cleanup of the ids itself */ + damon_for_each_target_safe(t, next, ctx) + damon_destroy_target(t); + return -ENOMEM; + } + damon_add_target(ctx, t); + } + + return 0; +} + +/** + * damon_set_attrs() - Set attributes for the monitoring. + * @ctx: monitoring context + * @sample_int: time interval between samplings + * @aggr_int: time interval between aggregations + * @primitive_upd_int: time interval between monitoring primitive updates + * @min_nr_reg: minimal number of regions + * @max_nr_reg: maximum number of regions + * + * This function should not be called while the kdamond is running. + * Every time interval is in micro-seconds. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, + unsigned long aggr_int, unsigned long primitive_upd_int, + unsigned long min_nr_reg, unsigned long max_nr_reg) +{ + if (min_nr_reg < 3) + return -EINVAL; + if (min_nr_reg > max_nr_reg) + return -EINVAL; + + ctx->sample_interval = sample_int; + ctx->aggr_interval = aggr_int; + ctx->primitive_update_interval = primitive_upd_int; + ctx->min_nr_regions = min_nr_reg; + ctx->max_nr_regions = max_nr_reg; + + return 0; +} + +/** + * damon_set_schemes() - Set data access monitoring based operation schemes. + * @ctx: monitoring context + * @schemes: array of the schemes + * @nr_schemes: number of entries in @schemes + * + * This function should not be called while the kdamond of the context is + * running. + * + * Return: 0 if success, or negative error code otherwise. + */ +int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, + ssize_t nr_schemes) +{ + struct damos *s, *next; + ssize_t i; + + damon_for_each_scheme_safe(s, next, ctx) + damon_destroy_scheme(s); + for (i = 0; i < nr_schemes; i++) + damon_add_scheme(ctx, schemes[i]); + return 0; +} + +/** + * damon_nr_running_ctxs() - Return number of currently running contexts. + */ +int damon_nr_running_ctxs(void) +{ + int nr_ctxs; + + mutex_lock(&damon_lock); + nr_ctxs = nr_running_ctxs; + mutex_unlock(&damon_lock); + + return nr_ctxs; +} + +/* Returns the size upper limit for each monitoring region */ +static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sz = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) + sz += r->ar.end - r->ar.start; + } + + if (ctx->min_nr_regions) + sz /= ctx->min_nr_regions; + if (sz < DAMON_MIN_REGION) + sz = DAMON_MIN_REGION; + + return sz; +} + +static int kdamond_fn(void *data); + +/* + * __damon_start() - Starts monitoring with given context. + * @ctx: monitoring context + * + * This function should be called while damon_lock is hold. + * + * Return: 0 on success, negative error code otherwise. + */ +static int __damon_start(struct damon_ctx *ctx) +{ + int err = -EBUSY; + + mutex_lock(&ctx->kdamond_lock); + if (!ctx->kdamond) { + err = 0; + ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", + nr_running_ctxs); + if (IS_ERR(ctx->kdamond)) { + err = PTR_ERR(ctx->kdamond); + ctx->kdamond = NULL; + } + } + mutex_unlock(&ctx->kdamond_lock); + + return err; +} + +/** + * damon_start() - Starts the monitorings for a given group of contexts. + * @ctxs: an array of the pointers for contexts to start monitoring + * @nr_ctxs: size of @ctxs + * + * This function starts a group of monitoring threads for a group of monitoring + * contexts. One thread per each context is created and run in parallel. The + * caller should handle synchronization between the threads by itself. If a + * group of threads that created by other 'damon_start()' call is currently + * running, this function does nothing but returns -EBUSY. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_start(struct damon_ctx **ctxs, int nr_ctxs) +{ + int i; + int err = 0; + + mutex_lock(&damon_lock); + if (nr_running_ctxs) { + mutex_unlock(&damon_lock); + return -EBUSY; + } + + for (i = 0; i < nr_ctxs; i++) { + err = __damon_start(ctxs[i]); + if (err) + break; + nr_running_ctxs++; + } + mutex_unlock(&damon_lock); + + return err; +} + +/* + * __damon_stop() - Stops monitoring of given context. + * @ctx: monitoring context + * + * Return: 0 on success, negative error code otherwise. + */ +static int __damon_stop(struct damon_ctx *ctx) +{ + struct task_struct *tsk; + + mutex_lock(&ctx->kdamond_lock); + tsk = ctx->kdamond; + if (tsk) { + get_task_struct(tsk); + mutex_unlock(&ctx->kdamond_lock); + kthread_stop(tsk); + put_task_struct(tsk); + return 0; + } + mutex_unlock(&ctx->kdamond_lock); + + return -EPERM; +} + +/** + * damon_stop() - Stops the monitorings for a given group of contexts. + * @ctxs: an array of the pointers for contexts to stop monitoring + * @nr_ctxs: size of @ctxs + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) +{ + int i, err = 0; + + for (i = 0; i < nr_ctxs; i++) { + /* nr_running_ctxs is decremented in kdamond_fn */ + err = __damon_stop(ctxs[i]); + if (err) + return err; + } + + return err; +} + +/* + * damon_check_reset_time_interval() - Check if a time interval is elapsed. + * @baseline: the time to check whether the interval has elapsed since + * @interval: the time interval (microseconds) + * + * See whether the given time interval has passed since the given baseline + * time. If so, it also updates the baseline to current time for next check. + * + * Return: true if the time interval has passed, or false otherwise. + */ +static bool damon_check_reset_time_interval(struct timespec64 *baseline, + unsigned long interval) +{ + struct timespec64 now; + + ktime_get_coarse_ts64(&now); + if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < + interval * 1000) + return false; + *baseline = now; + return true; +} + +/* + * Check whether it is time to flush the aggregated information + */ +static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) +{ + return damon_check_reset_time_interval(&ctx->last_aggregation, + ctx->aggr_interval); +} + +/* + * Reset the aggregated monitoring results ('nr_accesses' of each region). + */ +static void kdamond_reset_aggregated(struct damon_ctx *c) +{ + struct damon_target *t; + unsigned int ti = 0; /* target's index */ + + damon_for_each_target(t, c) { + struct damon_region *r; + + damon_for_each_region(r, t) { + trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); + r->last_nr_accesses = r->nr_accesses; + r->nr_accesses = 0; + } + ti++; + } +} + +static void damon_split_region_at(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + unsigned long sz_r); + +static bool __damos_valid_target(struct damon_region *r, struct damos *s) +{ + unsigned long sz; + + sz = r->ar.end - r->ar.start; + return s->min_sz_region <= sz && sz <= s->max_sz_region && + s->min_nr_accesses <= r->nr_accesses && + r->nr_accesses <= s->max_nr_accesses && + s->min_age_region <= r->age && r->age <= s->max_age_region; +} + +static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + bool ret = __damos_valid_target(r, s); + + if (!ret || !s->quota.esz || !c->primitive.get_scheme_score) + return ret; + + return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score; +} + +static void damon_do_apply_schemes(struct damon_ctx *c, + struct damon_target *t, + struct damon_region *r) +{ + struct damos *s; + + damon_for_each_scheme(s, c) { + struct damos_quota *quota = &s->quota; + unsigned long sz = r->ar.end - r->ar.start; + struct timespec64 begin, end; + unsigned long sz_applied = 0; + + if (!s->wmarks.activated) + continue; + + /* Check the quota */ + if (quota->esz && quota->charged_sz >= quota->esz) + continue; + + /* Skip previously charged regions */ + if (quota->charge_target_from) { + if (t != quota->charge_target_from) + continue; + if (r == damon_last_region(t)) { + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + continue; + } + if (quota->charge_addr_from && + r->ar.end <= quota->charge_addr_from) + continue; + + if (quota->charge_addr_from && r->ar.start < + quota->charge_addr_from) { + sz = ALIGN_DOWN(quota->charge_addr_from - + r->ar.start, DAMON_MIN_REGION); + if (!sz) { + if (r->ar.end - r->ar.start <= + DAMON_MIN_REGION) + continue; + sz = DAMON_MIN_REGION; + } + damon_split_region_at(c, t, r, sz); + r = damon_next_region(r); + sz = r->ar.end - r->ar.start; + } + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + } + + if (!damos_valid_target(c, t, r, s)) + continue; + + /* Apply the scheme */ + if (c->primitive.apply_scheme) { + if (quota->esz && + quota->charged_sz + sz > quota->esz) { + sz = ALIGN_DOWN(quota->esz - quota->charged_sz, + DAMON_MIN_REGION); + if (!sz) + goto update_stat; + damon_split_region_at(c, t, r, sz); + } + ktime_get_coarse_ts64(&begin); + sz_applied = c->primitive.apply_scheme(c, t, r, s); + ktime_get_coarse_ts64(&end); + quota->total_charged_ns += timespec64_to_ns(&end) - + timespec64_to_ns(&begin); + quota->charged_sz += sz; + if (quota->esz && quota->charged_sz >= quota->esz) { + quota->charge_target_from = t; + quota->charge_addr_from = r->ar.end + 1; + } + } + if (s->action != DAMOS_STAT) + r->age = 0; + +update_stat: + s->stat.nr_tried++; + s->stat.sz_tried += sz; + if (sz_applied) + s->stat.nr_applied++; + s->stat.sz_applied += sz_applied; + } +} + +/* Shouldn't be called if quota->ms and quota->sz are zero */ +static void damos_set_effective_quota(struct damos_quota *quota) +{ + unsigned long throughput; + unsigned long esz; + + if (!quota->ms) { + quota->esz = quota->sz; + return; + } + + if (quota->total_charged_ns) + throughput = quota->total_charged_sz * 1000000 / + quota->total_charged_ns; + else + throughput = PAGE_SIZE * 1024; + esz = throughput * quota->ms; + + if (quota->sz && quota->sz < esz) + esz = quota->sz; + quota->esz = esz; +} + +static void kdamond_apply_schemes(struct damon_ctx *c) +{ + struct damon_target *t; + struct damon_region *r, *next_r; + struct damos *s; + + damon_for_each_scheme(s, c) { + struct damos_quota *quota = &s->quota; + unsigned long cumulated_sz; + unsigned int score, max_score = 0; + + if (!s->wmarks.activated) + continue; + + if (!quota->ms && !quota->sz) + continue; + + /* New charge window starts */ + if (time_after_eq(jiffies, quota->charged_from + + msecs_to_jiffies( + quota->reset_interval))) { + if (quota->esz && quota->charged_sz >= quota->esz) + s->stat.qt_exceeds++; + quota->total_charged_sz += quota->charged_sz; + quota->charged_from = jiffies; + quota->charged_sz = 0; + damos_set_effective_quota(quota); + } + + if (!c->primitive.get_scheme_score) + continue; + + /* Fill up the score histogram */ + memset(quota->histogram, 0, sizeof(quota->histogram)); + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + if (!__damos_valid_target(r, s)) + continue; + score = c->primitive.get_scheme_score( + c, t, r, s); + quota->histogram[score] += + r->ar.end - r->ar.start; + if (score > max_score) + max_score = score; + } + } + + /* Set the min score limit */ + for (cumulated_sz = 0, score = max_score; ; score--) { + cumulated_sz += quota->histogram[score]; + if (cumulated_sz >= quota->esz || !score) + break; + } + quota->min_score = score; + } + + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next_r, t) + damon_do_apply_schemes(c, t, r); + } +} + +static inline unsigned long sz_damon_region(struct damon_region *r) +{ + return r->ar.end - r->ar.start; +} + +/* + * Merge two adjacent regions into one region + */ +static void damon_merge_two_regions(struct damon_target *t, + struct damon_region *l, struct damon_region *r) +{ + unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); + + l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / + (sz_l + sz_r); + l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); + l->ar.end = r->ar.end; + damon_destroy_region(r, t); +} + +/* + * Merge adjacent regions having similar access frequencies + * + * t target affected by this merge operation + * thres '->nr_accesses' diff threshold for the merge + * sz_limit size upper limit of each region + */ +static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, + unsigned long sz_limit) +{ + struct damon_region *r, *prev = NULL, *next; + + damon_for_each_region_safe(r, next, t) { + if (abs(r->nr_accesses - r->last_nr_accesses) > thres) + r->age = 0; + else + r->age++; + + if (prev && prev->ar.end == r->ar.start && + abs(prev->nr_accesses - r->nr_accesses) <= thres && + sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) + damon_merge_two_regions(t, prev, r); + else + prev = r; + } +} + +/* + * Merge adjacent regions having similar access frequencies + * + * threshold '->nr_accesses' diff threshold for the merge + * sz_limit size upper limit of each region + * + * This function merges monitoring target regions which are adjacent and their + * access frequencies are similar. This is for minimizing the monitoring + * overhead under the dynamically changeable access pattern. If a merge was + * unnecessarily made, later 'kdamond_split_regions()' will revert it. + */ +static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, + unsigned long sz_limit) +{ + struct damon_target *t; + + damon_for_each_target(t, c) + damon_merge_regions_of(t, threshold, sz_limit); +} + +/* + * Split a region in two + * + * r the region to be split + * sz_r size of the first sub-region that will be made + */ +static void damon_split_region_at(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + unsigned long sz_r) +{ + struct damon_region *new; + + new = damon_new_region(r->ar.start + sz_r, r->ar.end); + if (!new) + return; + + r->ar.end = new->ar.start; + + new->age = r->age; + new->last_nr_accesses = r->last_nr_accesses; + + damon_insert_region(new, r, damon_next_region(r), t); +} + +/* Split every region in the given target into 'nr_subs' regions */ +static void damon_split_regions_of(struct damon_ctx *ctx, + struct damon_target *t, int nr_subs) +{ + struct damon_region *r, *next; + unsigned long sz_region, sz_sub = 0; + int i; + + damon_for_each_region_safe(r, next, t) { + sz_region = r->ar.end - r->ar.start; + + for (i = 0; i < nr_subs - 1 && + sz_region > 2 * DAMON_MIN_REGION; i++) { + /* + * Randomly select size of left sub-region to be at + * least 10 percent and at most 90% of original region + */ + sz_sub = ALIGN_DOWN(damon_rand(1, 10) * + sz_region / 10, DAMON_MIN_REGION); + /* Do not allow blank region */ + if (sz_sub == 0 || sz_sub >= sz_region) + continue; + + damon_split_region_at(ctx, t, r, sz_sub); + sz_region = sz_sub; + } + } +} + +/* + * Split every target region into randomly-sized small regions + * + * This function splits every target region into random-sized small regions if + * current total number of the regions is equal or smaller than half of the + * user-specified maximum number of regions. This is for maximizing the + * monitoring accuracy under the dynamically changeable access patterns. If a + * split was unnecessarily made, later 'kdamond_merge_regions()' will revert + * it. + */ +static void kdamond_split_regions(struct damon_ctx *ctx) +{ + struct damon_target *t; + unsigned int nr_regions = 0; + static unsigned int last_nr_regions; + int nr_subregions = 2; + + damon_for_each_target(t, ctx) + nr_regions += damon_nr_regions(t); + + if (nr_regions > ctx->max_nr_regions / 2) + return; + + /* Maybe the middle of the region has different access frequency */ + if (last_nr_regions == nr_regions && + nr_regions < ctx->max_nr_regions / 3) + nr_subregions = 3; + + damon_for_each_target(t, ctx) + damon_split_regions_of(ctx, t, nr_subregions); + + last_nr_regions = nr_regions; +} + +/* + * Check whether it is time to check and apply the target monitoring regions + * + * Returns true if it is. + */ +static bool kdamond_need_update_primitive(struct damon_ctx *ctx) +{ + return damon_check_reset_time_interval(&ctx->last_primitive_update, + ctx->primitive_update_interval); +} + +/* + * Check whether current monitoring should be stopped + * + * The monitoring is stopped when either the user requested to stop, or all + * monitoring targets are invalid. + * + * Returns true if need to stop current monitoring. + */ +static bool kdamond_need_stop(struct damon_ctx *ctx) +{ + struct damon_target *t; + + if (kthread_should_stop()) + return true; + + if (!ctx->primitive.target_valid) + return false; + + damon_for_each_target(t, ctx) { + if (ctx->primitive.target_valid(t)) + return false; + } + + return true; +} + +static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) +{ + struct sysinfo i; + + switch (metric) { + case DAMOS_WMARK_FREE_MEM_RATE: + si_meminfo(&i); + return i.freeram * 1000 / i.totalram; + default: + break; + } + return -EINVAL; +} + +/* + * Returns zero if the scheme is active. Else, returns time to wait for next + * watermark check in micro-seconds. + */ +static unsigned long damos_wmark_wait_us(struct damos *scheme) +{ + unsigned long metric; + + if (scheme->wmarks.metric == DAMOS_WMARK_NONE) + return 0; + + metric = damos_wmark_metric_value(scheme->wmarks.metric); + /* higher than high watermark or lower than low watermark */ + if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { + if (scheme->wmarks.activated) + pr_debug("deactivate a scheme (%d) for %s wmark\n", + scheme->action, + metric > scheme->wmarks.high ? + "high" : "low"); + scheme->wmarks.activated = false; + return scheme->wmarks.interval; + } + + /* inactive and higher than middle watermark */ + if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && + !scheme->wmarks.activated) + return scheme->wmarks.interval; + + if (!scheme->wmarks.activated) + pr_debug("activate a scheme (%d)\n", scheme->action); + scheme->wmarks.activated = true; + return 0; +} + +static void kdamond_usleep(unsigned long usecs) +{ + /* See Documentation/timers/timers-howto.rst for the thresholds */ + if (usecs > 20 * USEC_PER_MSEC) + schedule_timeout_idle(usecs_to_jiffies(usecs)); + else + usleep_idle_range(usecs, usecs + 1); +} + +/* Returns negative error code if it's not activated but should return */ +static int kdamond_wait_activation(struct damon_ctx *ctx) +{ + struct damos *s; + unsigned long wait_time; + unsigned long min_wait_time = 0; + + while (!kdamond_need_stop(ctx)) { + damon_for_each_scheme(s, ctx) { + wait_time = damos_wmark_wait_us(s); + if (!min_wait_time || wait_time < min_wait_time) + min_wait_time = wait_time; + } + if (!min_wait_time) + return 0; + + kdamond_usleep(min_wait_time); + } + return -EBUSY; +} + +/* + * The monitoring daemon that runs as a kernel thread + */ +static int kdamond_fn(void *data) +{ + struct damon_ctx *ctx = (struct damon_ctx *)data; + struct damon_target *t; + struct damon_region *r, *next; + unsigned int max_nr_accesses = 0; + unsigned long sz_limit = 0; + bool done = false; + + pr_debug("kdamond (%d) starts\n", current->pid); + + if (ctx->primitive.init) + ctx->primitive.init(ctx); + if (ctx->callback.before_start && ctx->callback.before_start(ctx)) + done = true; + + sz_limit = damon_region_sz_limit(ctx); + + while (!kdamond_need_stop(ctx) && !done) { + if (kdamond_wait_activation(ctx)) + continue; + + if (ctx->primitive.prepare_access_checks) + ctx->primitive.prepare_access_checks(ctx); + if (ctx->callback.after_sampling && + ctx->callback.after_sampling(ctx)) + done = true; + + kdamond_usleep(ctx->sample_interval); + + if (ctx->primitive.check_accesses) + max_nr_accesses = ctx->primitive.check_accesses(ctx); + + if (kdamond_aggregate_interval_passed(ctx)) { + kdamond_merge_regions(ctx, + max_nr_accesses / 10, + sz_limit); + if (ctx->callback.after_aggregation && + ctx->callback.after_aggregation(ctx)) + done = true; + kdamond_apply_schemes(ctx); + kdamond_reset_aggregated(ctx); + kdamond_split_regions(ctx); + if (ctx->primitive.reset_aggregated) + ctx->primitive.reset_aggregated(ctx); + } + + if (kdamond_need_update_primitive(ctx)) { + if (ctx->primitive.update) + ctx->primitive.update(ctx); + sz_limit = damon_region_sz_limit(ctx); + } + } + damon_for_each_target(t, ctx) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + + if (ctx->callback.before_terminate) + ctx->callback.before_terminate(ctx); + if (ctx->primitive.cleanup) + ctx->primitive.cleanup(ctx); + + pr_debug("kdamond (%d) finishes\n", current->pid); + mutex_lock(&ctx->kdamond_lock); + ctx->kdamond = NULL; + mutex_unlock(&ctx->kdamond_lock); + + mutex_lock(&damon_lock); + nr_running_ctxs--; + mutex_unlock(&damon_lock); + + return 0; +} + +#include "core-test.h" diff --git a/mm/damon/dbgfs-test.h b/mm/damon/dbgfs-test.h new file mode 100644 index 000000000000..86b9f9528231 --- /dev/null +++ b/mm/damon/dbgfs-test.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DAMON Debugfs Interface Unit Tests + * + * Author: SeongJae Park + */ + +#ifdef CONFIG_DAMON_DBGFS_KUNIT_TEST + +#ifndef _DAMON_DBGFS_TEST_H +#define _DAMON_DBGFS_TEST_H + +#include + +static void damon_dbgfs_test_str_to_target_ids(struct kunit *test) +{ + char *question; + unsigned long *answers; + unsigned long expected[] = {12, 35, 46}; + ssize_t nr_integers = 0, i; + + question = "123"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers); + KUNIT_EXPECT_EQ(test, 123ul, answers[0]); + kfree(answers); + + question = "123abc"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers); + KUNIT_EXPECT_EQ(test, 123ul, answers[0]); + kfree(answers); + + question = "a123"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); + kfree(answers); + + question = "12 35"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers); + for (i = 0; i < nr_integers; i++) + KUNIT_EXPECT_EQ(test, expected[i], answers[i]); + kfree(answers); + + question = "12 35 46"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers); + for (i = 0; i < nr_integers; i++) + KUNIT_EXPECT_EQ(test, expected[i], answers[i]); + kfree(answers); + + question = "12 35 abc 46"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers); + for (i = 0; i < 2; i++) + KUNIT_EXPECT_EQ(test, expected[i], answers[i]); + kfree(answers); + + question = ""; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); + kfree(answers); + + question = "\n"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); + kfree(answers); +} + +static void damon_dbgfs_test_set_targets(struct kunit *test) +{ + struct damon_ctx *ctx = dbgfs_new_ctx(); + unsigned long ids[] = {1, 2, 3}; + char buf[64]; + + /* Make DAMON consider target id as plain number */ + ctx->primitive.target_valid = NULL; + ctx->primitive.cleanup = NULL; + + damon_set_targets(ctx, ids, 3); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2 3\n"); + + damon_set_targets(ctx, NULL, 0); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "\n"); + + damon_set_targets(ctx, (unsigned long []){1, 2}, 2); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2\n"); + + damon_set_targets(ctx, (unsigned long []){2}, 1); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "2\n"); + + damon_set_targets(ctx, NULL, 0); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "\n"); + + dbgfs_destroy_ctx(ctx); +} + +static void damon_dbgfs_test_set_init_regions(struct kunit *test) +{ + struct damon_ctx *ctx = damon_new_ctx(); + unsigned long ids[] = {1, 2, 3}; + /* Each line represents one region in `` `` */ + char * const valid_inputs[] = {"2 10 20\n 2 20 30\n2 35 45", + "2 10 20\n", + "2 10 20\n1 39 59\n1 70 134\n 2 20 25\n", + ""}; + /* Reading the file again will show sorted, clean output */ + char * const valid_expects[] = {"2 10 20\n2 20 30\n2 35 45\n", + "2 10 20\n", + "1 39 59\n1 70 134\n2 10 20\n2 20 25\n", + ""}; + char * const invalid_inputs[] = {"4 10 20\n", /* target not exists */ + "2 10 20\n 2 14 26\n", /* regions overlap */ + "1 10 20\n2 30 40\n 1 5 8"}; /* not sorted by address */ + char *input, *expect; + int i, rc; + char buf[256]; + + damon_set_targets(ctx, ids, 3); + + /* Put valid inputs and check the results */ + for (i = 0; i < ARRAY_SIZE(valid_inputs); i++) { + input = valid_inputs[i]; + expect = valid_expects[i]; + + rc = set_init_regions(ctx, input, strnlen(input, 256)); + KUNIT_EXPECT_EQ(test, rc, 0); + + memset(buf, 0, 256); + sprint_init_regions(ctx, buf, 256); + + KUNIT_EXPECT_STREQ(test, (char *)buf, expect); + } + /* Put invalid inputs and check the return error code */ + for (i = 0; i < ARRAY_SIZE(invalid_inputs); i++) { + input = invalid_inputs[i]; + pr_info("input: %s\n", input); + rc = set_init_regions(ctx, input, strnlen(input, 256)); + KUNIT_EXPECT_EQ(test, rc, -EINVAL); + + memset(buf, 0, 256); + sprint_init_regions(ctx, buf, 256); + + KUNIT_EXPECT_STREQ(test, (char *)buf, ""); + } + + damon_set_targets(ctx, NULL, 0); + damon_destroy_ctx(ctx); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_dbgfs_test_str_to_target_ids), + KUNIT_CASE(damon_dbgfs_test_set_targets), + KUNIT_CASE(damon_dbgfs_test_set_init_regions), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon-dbgfs", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_TEST_H */ + +#endif /* CONFIG_DAMON_KUNIT_TEST */ diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c new file mode 100644 index 000000000000..5b899601e56c --- /dev/null +++ b/mm/damon/dbgfs.c @@ -0,0 +1,990 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Debugfs Interface + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-dbgfs: " fmt + +#include +#include +#include +#include +#include +#include +#include + +static struct damon_ctx **dbgfs_ctxs; +static int dbgfs_nr_ctxs; +static struct dentry **dbgfs_dirs; +static DEFINE_MUTEX(damon_dbgfs_lock); + +/* + * Returns non-empty string on success, negative error code otherwise. + */ +static char *user_input_str(const char __user *buf, size_t count, loff_t *ppos) +{ + char *kbuf; + ssize_t ret; + + /* We do not accept continuous write */ + if (*ppos) + return ERR_PTR(-EINVAL); + + kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return ERR_PTR(-ENOMEM); + + ret = simple_write_to_buffer(kbuf, count + 1, ppos, buf, count); + if (ret != count) { + kfree(kbuf); + return ERR_PTR(-EIO); + } + kbuf[ret] = '\0'; + + return kbuf; +} + +static ssize_t dbgfs_attrs_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char kbuf[128]; + int ret; + + mutex_lock(&ctx->kdamond_lock); + ret = scnprintf(kbuf, ARRAY_SIZE(kbuf), "%lu %lu %lu %lu %lu\n", + ctx->sample_interval, ctx->aggr_interval, + ctx->primitive_update_interval, ctx->min_nr_regions, + ctx->max_nr_regions); + mutex_unlock(&ctx->kdamond_lock); + + return simple_read_from_buffer(buf, count, ppos, kbuf, ret); +} + +static ssize_t dbgfs_attrs_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + unsigned long s, a, r, minr, maxr; + char *kbuf; + ssize_t ret; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + if (sscanf(kbuf, "%lu %lu %lu %lu %lu", + &s, &a, &r, &minr, &maxr) != 5) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + ret = damon_set_attrs(ctx, s, a, r, minr, maxr); + if (!ret) + ret = count; +unlock_out: + mutex_unlock(&ctx->kdamond_lock); +out: + kfree(kbuf); + return ret; +} + +static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) +{ + struct damos *s; + int written = 0; + int rc; + + damon_for_each_scheme(s, c) { + rc = scnprintf(&buf[written], len - written, + "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + s->min_sz_region, s->max_sz_region, + s->min_nr_accesses, s->max_nr_accesses, + s->min_age_region, s->max_age_region, + s->action, + s->quota.ms, s->quota.sz, + s->quota.reset_interval, + s->quota.weight_sz, + s->quota.weight_nr_accesses, + s->quota.weight_age, + s->wmarks.metric, s->wmarks.interval, + s->wmarks.high, s->wmarks.mid, s->wmarks.low, + s->stat.nr_tried, s->stat.sz_tried, + s->stat.nr_applied, s->stat.sz_applied, + s->stat.qt_exceeds); + if (!rc) + return -ENOMEM; + + written += rc; + } + return written; +} + +static ssize_t dbgfs_schemes_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + len = sprint_schemes(ctx, kbuf, count); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static void free_schemes_arr(struct damos **schemes, ssize_t nr_schemes) +{ + ssize_t i; + + for (i = 0; i < nr_schemes; i++) + kfree(schemes[i]); + kfree(schemes); +} + +static bool damos_action_valid(int action) +{ + switch (action) { + case DAMOS_WILLNEED: + case DAMOS_COLD: + case DAMOS_PAGEOUT: + case DAMOS_HUGEPAGE: + case DAMOS_NOHUGEPAGE: + case DAMOS_STAT: + return true; + default: + return false; + } +} + +/* + * Converts a string into an array of struct damos pointers + * + * Returns an array of struct damos pointers that converted if the conversion + * success, or NULL otherwise. + */ +static struct damos **str_to_schemes(const char *str, ssize_t len, + ssize_t *nr_schemes) +{ + struct damos *scheme, **schemes; + const int max_nr_schemes = 256; + int pos = 0, parsed, ret; + unsigned long min_sz, max_sz; + unsigned int min_nr_a, max_nr_a, min_age, max_age; + unsigned int action; + + schemes = kmalloc_array(max_nr_schemes, sizeof(scheme), + GFP_KERNEL); + if (!schemes) + return NULL; + + *nr_schemes = 0; + while (pos < len && *nr_schemes < max_nr_schemes) { + struct damos_quota quota = {}; + struct damos_watermarks wmarks; + + ret = sscanf(&str[pos], + "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u %u %lu %lu %lu %lu%n", + &min_sz, &max_sz, &min_nr_a, &max_nr_a, + &min_age, &max_age, &action, "a.ms, + "a.sz, "a.reset_interval, + "a.weight_sz, "a.weight_nr_accesses, + "a.weight_age, &wmarks.metric, + &wmarks.interval, &wmarks.high, &wmarks.mid, + &wmarks.low, &parsed); + if (ret != 18) + break; + if (!damos_action_valid(action)) + goto fail; + + if (min_sz > max_sz || min_nr_a > max_nr_a || min_age > max_age) + goto fail; + + if (wmarks.high < wmarks.mid || wmarks.high < wmarks.low || + wmarks.mid < wmarks.low) + goto fail; + + pos += parsed; + scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a, + min_age, max_age, action, "a, &wmarks); + if (!scheme) + goto fail; + + schemes[*nr_schemes] = scheme; + *nr_schemes += 1; + } + return schemes; +fail: + free_schemes_arr(schemes, *nr_schemes); + return NULL; +} + +static ssize_t dbgfs_schemes_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + struct damos **schemes; + ssize_t nr_schemes = 0, ret; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + schemes = str_to_schemes(kbuf, count, &nr_schemes); + if (!schemes) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + ret = damon_set_schemes(ctx, schemes, nr_schemes); + if (!ret) { + ret = count; + nr_schemes = 0; + } + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); + free_schemes_arr(schemes, nr_schemes); +out: + kfree(kbuf); + return ret; +} + +static inline bool targetid_is_pid(const struct damon_ctx *ctx) +{ + return ctx->primitive.target_valid == damon_va_target_valid; +} + +static ssize_t sprint_target_ids(struct damon_ctx *ctx, char *buf, ssize_t len) +{ + struct damon_target *t; + unsigned long id; + int written = 0; + int rc; + + damon_for_each_target(t, ctx) { + id = t->id; + if (targetid_is_pid(ctx)) + /* Show pid numbers to debugfs users */ + id = (unsigned long)pid_vnr((struct pid *)id); + + rc = scnprintf(&buf[written], len - written, "%lu ", id); + if (!rc) + return -ENOMEM; + written += rc; + } + if (written) + written -= 1; + written += scnprintf(&buf[written], len - written, "\n"); + return written; +} + +static ssize_t dbgfs_target_ids_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + ssize_t len; + char ids_buf[320]; + + mutex_lock(&ctx->kdamond_lock); + len = sprint_target_ids(ctx, ids_buf, 320); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + return len; + + return simple_read_from_buffer(buf, count, ppos, ids_buf, len); +} + +/* + * Converts a string into an array of unsigned long integers + * + * Returns an array of unsigned long integers if the conversion success, or + * NULL otherwise. + */ +static unsigned long *str_to_target_ids(const char *str, ssize_t len, + ssize_t *nr_ids) +{ + unsigned long *ids; + const int max_nr_ids = 32; + unsigned long id; + int pos = 0, parsed, ret; + + *nr_ids = 0; + ids = kmalloc_array(max_nr_ids, sizeof(id), GFP_KERNEL); + if (!ids) + return NULL; + while (*nr_ids < max_nr_ids && pos < len) { + ret = sscanf(&str[pos], "%lu%n", &id, &parsed); + pos += parsed; + if (ret != 1) + break; + ids[*nr_ids] = id; + *nr_ids += 1; + } + + return ids; +} + +static void dbgfs_put_pids(unsigned long *ids, int nr_ids) +{ + int i; + + for (i = 0; i < nr_ids; i++) + put_pid((struct pid *)ids[i]); +} + +static ssize_t dbgfs_target_ids_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + struct damon_target *t, *next_t; + bool id_is_pid = true; + char *kbuf; + unsigned long *targets; + ssize_t nr_targets; + ssize_t ret; + int i; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + if (!strncmp(kbuf, "paddr\n", count)) { + id_is_pid = false; + /* target id is meaningless here, but we set it just for fun */ + scnprintf(kbuf, count, "42 "); + } + + targets = str_to_target_ids(kbuf, count, &nr_targets); + if (!targets) { + ret = -ENOMEM; + goto out; + } + + if (id_is_pid) { + for (i = 0; i < nr_targets; i++) { + targets[i] = (unsigned long)find_get_pid( + (int)targets[i]); + if (!targets[i]) { + dbgfs_put_pids(targets, i); + ret = -EINVAL; + goto free_targets_out; + } + } + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + if (id_is_pid) + dbgfs_put_pids(targets, nr_targets); + ret = -EBUSY; + goto unlock_out; + } + + /* remove previously set targets */ + damon_for_each_target_safe(t, next_t, ctx) { + if (targetid_is_pid(ctx)) + put_pid((struct pid *)t->id); + damon_destroy_target(t); + } + + /* Configure the context for the address space type */ + if (id_is_pid) + damon_va_set_primitives(ctx); + else + damon_pa_set_primitives(ctx); + + ret = damon_set_targets(ctx, targets, nr_targets); + if (ret) { + if (id_is_pid) + dbgfs_put_pids(targets, nr_targets); + } else { + ret = count; + } + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); +free_targets_out: + kfree(targets); +out: + kfree(kbuf); + return ret; +} + +static ssize_t sprint_init_regions(struct damon_ctx *c, char *buf, ssize_t len) +{ + struct damon_target *t; + struct damon_region *r; + int written = 0; + int rc; + + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + rc = scnprintf(&buf[written], len - written, + "%lu %lu %lu\n", + t->id, r->ar.start, r->ar.end); + if (!rc) + return -ENOMEM; + written += rc; + } + } + return written; +} + +static ssize_t dbgfs_init_regions_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + mutex_unlock(&ctx->kdamond_lock); + len = -EBUSY; + goto out; + } + + len = sprint_init_regions(ctx, kbuf, count); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static int add_init_region(struct damon_ctx *c, + unsigned long target_id, struct damon_addr_range *ar) +{ + struct damon_target *t; + struct damon_region *r, *prev; + unsigned long id; + int rc = -EINVAL; + + if (ar->start >= ar->end) + return -EINVAL; + + damon_for_each_target(t, c) { + id = t->id; + if (targetid_is_pid(c)) + id = (unsigned long)pid_vnr((struct pid *)id); + if (id == target_id) { + r = damon_new_region(ar->start, ar->end); + if (!r) + return -ENOMEM; + damon_add_region(r, t); + if (damon_nr_regions(t) > 1) { + prev = damon_prev_region(r); + if (prev->ar.end > r->ar.start) { + damon_destroy_region(r, t); + return -EINVAL; + } + } + rc = 0; + } + } + return rc; +} + +static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len) +{ + struct damon_target *t; + struct damon_region *r, *next; + int pos = 0, parsed, ret; + unsigned long target_id; + struct damon_addr_range ar; + int err; + + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + + while (pos < len) { + ret = sscanf(&str[pos], "%lu %lu %lu%n", + &target_id, &ar.start, &ar.end, &parsed); + if (ret != 3) + break; + err = add_init_region(c, target_id, &ar); + if (err) + goto fail; + pos += parsed; + } + + return 0; + +fail: + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + return err; +} + +static ssize_t dbgfs_init_regions_write(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t ret = count; + int err; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + err = set_init_regions(ctx, kbuf, ret); + if (err) + ret = err; + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); + kfree(kbuf); + return ret; +} + +static ssize_t dbgfs_kdamond_pid_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) + len = scnprintf(kbuf, count, "%d\n", ctx->kdamond->pid); + else + len = scnprintf(kbuf, count, "none\n"); + mutex_unlock(&ctx->kdamond_lock); + if (!len) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static int damon_dbgfs_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + + return nonseekable_open(inode, file); +} + +static const struct file_operations attrs_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_attrs_read, + .write = dbgfs_attrs_write, +}; + +static const struct file_operations schemes_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_schemes_read, + .write = dbgfs_schemes_write, +}; + +static const struct file_operations target_ids_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_target_ids_read, + .write = dbgfs_target_ids_write, +}; + +static const struct file_operations init_regions_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_init_regions_read, + .write = dbgfs_init_regions_write, +}; + +static const struct file_operations kdamond_pid_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_kdamond_pid_read, +}; + +static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx) +{ + const char * const file_names[] = {"attrs", "schemes", "target_ids", + "init_regions", "kdamond_pid"}; + const struct file_operations *fops[] = {&attrs_fops, &schemes_fops, + &target_ids_fops, &init_regions_fops, &kdamond_pid_fops}; + int i; + + for (i = 0; i < ARRAY_SIZE(file_names); i++) + debugfs_create_file(file_names[i], 0600, dir, ctx, fops[i]); +} + +static void dbgfs_before_terminate(struct damon_ctx *ctx) +{ + struct damon_target *t, *next; + + if (!targetid_is_pid(ctx)) + return; + + mutex_lock(&ctx->kdamond_lock); + damon_for_each_target_safe(t, next, ctx) { + put_pid((struct pid *)t->id); + damon_destroy_target(t); + } + mutex_unlock(&ctx->kdamond_lock); +} + +static struct damon_ctx *dbgfs_new_ctx(void) +{ + struct damon_ctx *ctx; + + ctx = damon_new_ctx(); + if (!ctx) + return NULL; + + damon_va_set_primitives(ctx); + ctx->callback.before_terminate = dbgfs_before_terminate; + return ctx; +} + +static void dbgfs_destroy_ctx(struct damon_ctx *ctx) +{ + damon_destroy_ctx(ctx); +} + +/* + * Make a context of @name and create a debugfs directory for it. + * + * This function should be called while holding damon_dbgfs_lock. + * + * Returns 0 on success, negative error code otherwise. + */ +static int dbgfs_mk_context(char *name) +{ + struct dentry *root, **new_dirs, *new_dir; + struct damon_ctx **new_ctxs, *new_ctx; + + if (damon_nr_running_ctxs()) + return -EBUSY; + + new_ctxs = krealloc(dbgfs_ctxs, sizeof(*dbgfs_ctxs) * + (dbgfs_nr_ctxs + 1), GFP_KERNEL); + if (!new_ctxs) + return -ENOMEM; + dbgfs_ctxs = new_ctxs; + + new_dirs = krealloc(dbgfs_dirs, sizeof(*dbgfs_dirs) * + (dbgfs_nr_ctxs + 1), GFP_KERNEL); + if (!new_dirs) + return -ENOMEM; + dbgfs_dirs = new_dirs; + + root = dbgfs_dirs[0]; + if (!root) + return -ENOENT; + + new_dir = debugfs_create_dir(name, root); + dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; + + new_ctx = dbgfs_new_ctx(); + if (!new_ctx) { + debugfs_remove(new_dir); + dbgfs_dirs[dbgfs_nr_ctxs] = NULL; + return -ENOMEM; + } + + dbgfs_ctxs[dbgfs_nr_ctxs] = new_ctx; + dbgfs_fill_ctx_dir(dbgfs_dirs[dbgfs_nr_ctxs], + dbgfs_ctxs[dbgfs_nr_ctxs]); + dbgfs_nr_ctxs++; + + return 0; +} + +static ssize_t dbgfs_mk_context_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + char *kbuf; + char *ctx_name; + ssize_t ret; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + ctx_name = kmalloc(count + 1, GFP_KERNEL); + if (!ctx_name) { + kfree(kbuf); + return -ENOMEM; + } + + /* Trim white space */ + if (sscanf(kbuf, "%s", ctx_name) != 1) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&damon_dbgfs_lock); + ret = dbgfs_mk_context(ctx_name); + if (!ret) + ret = count; + mutex_unlock(&damon_dbgfs_lock); + +out: + kfree(kbuf); + kfree(ctx_name); + return ret; +} + +/* + * Remove a context of @name and its debugfs directory. + * + * This function should be called while holding damon_dbgfs_lock. + * + * Return 0 on success, negative error code otherwise. + */ +static int dbgfs_rm_context(char *name) +{ + struct dentry *root, *dir, **new_dirs; + struct damon_ctx **new_ctxs; + int i, j; + + if (damon_nr_running_ctxs()) + return -EBUSY; + + root = dbgfs_dirs[0]; + if (!root) + return -ENOENT; + + dir = debugfs_lookup(name, root); + if (!dir) + return -ENOENT; + + new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs), + GFP_KERNEL); + if (!new_dirs) + return -ENOMEM; + + new_ctxs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_ctxs), + GFP_KERNEL); + if (!new_ctxs) { + kfree(new_dirs); + return -ENOMEM; + } + + for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) { + if (dbgfs_dirs[i] == dir) { + debugfs_remove(dbgfs_dirs[i]); + dbgfs_destroy_ctx(dbgfs_ctxs[i]); + continue; + } + new_dirs[j] = dbgfs_dirs[i]; + new_ctxs[j++] = dbgfs_ctxs[i]; + } + + kfree(dbgfs_dirs); + kfree(dbgfs_ctxs); + + dbgfs_dirs = new_dirs; + dbgfs_ctxs = new_ctxs; + dbgfs_nr_ctxs--; + + return 0; +} + +static ssize_t dbgfs_rm_context_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + char *kbuf; + ssize_t ret; + char *ctx_name; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + ctx_name = kmalloc(count + 1, GFP_KERNEL); + if (!ctx_name) { + kfree(kbuf); + return -ENOMEM; + } + + /* Trim white space */ + if (sscanf(kbuf, "%s", ctx_name) != 1) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&damon_dbgfs_lock); + ret = dbgfs_rm_context(ctx_name); + if (!ret) + ret = count; + mutex_unlock(&damon_dbgfs_lock); + +out: + kfree(kbuf); + kfree(ctx_name); + return ret; +} + +static ssize_t dbgfs_monitor_on_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + char monitor_on_buf[5]; + bool monitor_on = damon_nr_running_ctxs() != 0; + int len; + + len = scnprintf(monitor_on_buf, 5, monitor_on ? "on\n" : "off\n"); + + return simple_read_from_buffer(buf, count, ppos, monitor_on_buf, len); +} + +static ssize_t dbgfs_monitor_on_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + ssize_t ret; + char *kbuf; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + /* Remove white space */ + if (sscanf(kbuf, "%s", kbuf) != 1) { + kfree(kbuf); + return -EINVAL; + } + + mutex_lock(&damon_dbgfs_lock); + if (!strncmp(kbuf, "on", count)) { + int i; + + for (i = 0; i < dbgfs_nr_ctxs; i++) { + if (damon_targets_empty(dbgfs_ctxs[i])) { + kfree(kbuf); + mutex_unlock(&damon_dbgfs_lock); + return -EINVAL; + } + } + ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs); + } else if (!strncmp(kbuf, "off", count)) { + ret = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs); + } else { + ret = -EINVAL; + } + mutex_unlock(&damon_dbgfs_lock); + + if (!ret) + ret = count; + kfree(kbuf); + return ret; +} + +static const struct file_operations mk_contexts_fops = { + .write = dbgfs_mk_context_write, +}; + +static const struct file_operations rm_contexts_fops = { + .write = dbgfs_rm_context_write, +}; + +static const struct file_operations monitor_on_fops = { + .read = dbgfs_monitor_on_read, + .write = dbgfs_monitor_on_write, +}; + +static int __init __damon_dbgfs_init(void) +{ + struct dentry *dbgfs_root; + const char * const file_names[] = {"mk_contexts", "rm_contexts", + "monitor_on"}; + const struct file_operations *fops[] = {&mk_contexts_fops, + &rm_contexts_fops, &monitor_on_fops}; + int i; + + dbgfs_root = debugfs_create_dir("damon", NULL); + + for (i = 0; i < ARRAY_SIZE(file_names); i++) + debugfs_create_file(file_names[i], 0600, dbgfs_root, NULL, + fops[i]); + dbgfs_fill_ctx_dir(dbgfs_root, dbgfs_ctxs[0]); + + dbgfs_dirs = kmalloc_array(1, sizeof(dbgfs_root), GFP_KERNEL); + if (!dbgfs_dirs) { + debugfs_remove(dbgfs_root); + return -ENOMEM; + } + dbgfs_dirs[0] = dbgfs_root; + + return 0; +} + +/* + * Functions for the initialization + */ + +static int __init damon_dbgfs_init(void) +{ + int rc = -ENOMEM; + + mutex_lock(&damon_dbgfs_lock); + dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL); + if (!dbgfs_ctxs) + goto out; + dbgfs_ctxs[0] = dbgfs_new_ctx(); + if (!dbgfs_ctxs[0]) { + kfree(dbgfs_ctxs); + goto out; + } + dbgfs_nr_ctxs = 1; + + rc = __damon_dbgfs_init(); + if (rc) { + kfree(dbgfs_ctxs[0]); + kfree(dbgfs_ctxs); + pr_err("%s: dbgfs init failed\n", __func__); + } + +out: + mutex_unlock(&damon_dbgfs_lock); + return rc; +} + +module_init(damon_dbgfs_init); + +#include "dbgfs-test.h" diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c new file mode 100644 index 000000000000..5e8244f65a1a --- /dev/null +++ b/mm/damon/paddr.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Primitives for The Physical Address Space + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-pa: " fmt + +#include +#include +#include +#include +#include + +#include "../internal.h" +#include "prmtv-common.h" + +static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct page_vma_mapped_walk pvmw = { + .page = page, + .vma = vma, + .address = addr, + }; + + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) + damon_ptep_mkold(pvmw.pte, vma->vm_mm, addr); + else + damon_pmdp_mkold(pvmw.pmd, vma->vm_mm, addr); + } + return true; +} + +static void damon_pa_mkold(unsigned long paddr) +{ + struct page *page = damon_get_page(PHYS_PFN(paddr)); + struct rmap_walk_control rwc = { + .rmap_one = __damon_pa_mkold, + .anon_lock = page_lock_anon_vma_read, + }; + bool need_lock; + + if (!page) + return; + + if (!page_mapped(page) || !page_rmapping(page)) { + set_page_idle(page); + goto out; + } + + need_lock = !PageAnon(page) || PageKsm(page); + if (need_lock && !trylock_page(page)) + goto out; + + rmap_walk(page, &rwc); + + if (need_lock) + unlock_page(page); + +out: + put_page(page); +} + +static void __damon_pa_prepare_access_check(struct damon_ctx *ctx, + struct damon_region *r) +{ + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + + damon_pa_mkold(r->sampling_addr); +} + +static void damon_pa_prepare_access_checks(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) + __damon_pa_prepare_access_check(ctx, r); + } +} + +struct damon_pa_access_chk_result { + unsigned long page_sz; + bool accessed; +}; + +static bool __damon_pa_young(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct damon_pa_access_chk_result *result = arg; + struct page_vma_mapped_walk pvmw = { + .page = page, + .vma = vma, + .address = addr, + }; + + result->accessed = false; + result->page_sz = PAGE_SIZE; + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) { + result->accessed = pte_young(*pvmw.pte) || + !page_is_idle(page) || + mmu_notifier_test_young(vma->vm_mm, addr); + } else { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + result->accessed = pmd_young(*pvmw.pmd) || + !page_is_idle(page) || + mmu_notifier_test_young(vma->vm_mm, addr); + result->page_sz = ((1UL) << HPAGE_PMD_SHIFT); +#else + WARN_ON_ONCE(1); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + } + if (result->accessed) { + page_vma_mapped_walk_done(&pvmw); + break; + } + } + + /* If accessed, stop walking */ + return !result->accessed; +} + +static bool damon_pa_young(unsigned long paddr, unsigned long *page_sz) +{ + struct page *page = damon_get_page(PHYS_PFN(paddr)); + struct damon_pa_access_chk_result result = { + .page_sz = PAGE_SIZE, + .accessed = false, + }; + struct rmap_walk_control rwc = { + .arg = &result, + .rmap_one = __damon_pa_young, + .anon_lock = page_lock_anon_vma_read, + }; + bool need_lock; + + if (!page) + return false; + + if (!page_mapped(page) || !page_rmapping(page)) { + if (page_is_idle(page)) + result.accessed = false; + else + result.accessed = true; + put_page(page); + goto out; + } + + need_lock = !PageAnon(page) || PageKsm(page); + if (need_lock && !trylock_page(page)) { + put_page(page); + return NULL; + } + + rmap_walk(page, &rwc); + + if (need_lock) + unlock_page(page); + put_page(page); + +out: + *page_sz = result.page_sz; + return result.accessed; +} + +static void __damon_pa_check_access(struct damon_ctx *ctx, + struct damon_region *r) +{ + static unsigned long last_addr; + static unsigned long last_page_sz = PAGE_SIZE; + static bool last_accessed; + + /* If the region is in the last checked page, reuse the result */ + if (ALIGN_DOWN(last_addr, last_page_sz) == + ALIGN_DOWN(r->sampling_addr, last_page_sz)) { + if (last_accessed) + r->nr_accesses++; + return; + } + + last_accessed = damon_pa_young(r->sampling_addr, &last_page_sz); + if (last_accessed) + r->nr_accesses++; + + last_addr = r->sampling_addr; +} + +static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) { + __damon_pa_check_access(ctx, r); + max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + } + } + + return max_nr_accesses; +} + +bool damon_pa_target_valid(void *t) +{ + return true; +} + +static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + unsigned long addr, applied; + LIST_HEAD(page_list); + + if (scheme->action != DAMOS_PAGEOUT) + return 0; + + for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { + struct page *page = damon_get_page(PHYS_PFN(addr)); + + if (!page) + continue; + + ClearPageReferenced(page); + test_and_clear_page_young(page); + if (isolate_lru_page(page)) { + put_page(page); + continue; + } + if (PageUnevictable(page)) { + putback_lru_page(page); + } else { + list_add(&page->lru, &page_list); + put_page(page); + } + } + applied = reclaim_pages(&page_list); + cond_resched(); + return applied * PAGE_SIZE; +} + +static int damon_pa_scheme_score(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_pageout_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + +void damon_pa_set_primitives(struct damon_ctx *ctx) +{ + ctx->primitive.init = NULL; + ctx->primitive.update = NULL; + ctx->primitive.prepare_access_checks = damon_pa_prepare_access_checks; + ctx->primitive.check_accesses = damon_pa_check_accesses; + ctx->primitive.reset_aggregated = NULL; + ctx->primitive.target_valid = damon_pa_target_valid; + ctx->primitive.cleanup = NULL; + ctx->primitive.apply_scheme = damon_pa_apply_scheme; + ctx->primitive.get_scheme_score = damon_pa_scheme_score; +} diff --git a/mm/damon/prmtv-common.c b/mm/damon/prmtv-common.c new file mode 100644 index 000000000000..92a04f5831d6 --- /dev/null +++ b/mm/damon/prmtv-common.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Primitives for Data Access Monitoring + * + * Author: SeongJae Park + */ + +#include +#include +#include +#include + +#include "prmtv-common.h" + +/* + * Get an online page for a pfn if it's in the LRU list. Otherwise, returns + * NULL. + * + * The body of this function is stolen from the 'page_idle_get_page()'. We + * steal rather than reuse it because the code is quite simple. + */ +struct page *damon_get_page(unsigned long pfn) +{ + struct page *page = pfn_to_online_page(pfn); + + if (!page || !PageLRU(page) || !get_page_unless_zero(page)) + return NULL; + + if (unlikely(!PageLRU(page))) { + put_page(page); + page = NULL; + } + return page; +} + +void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr) +{ + bool referenced = false; + struct page *page = damon_get_page(pte_pfn(*pte)); + + if (!page) + return; + + if (pte_young(*pte)) { + referenced = true; + *pte = pte_mkold(*pte); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, addr + PAGE_SIZE)) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +} + +void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool referenced = false; + struct page *page = damon_get_page(pmd_pfn(*pmd)); + + if (!page) + return; + + if (pmd_young(*pmd)) { + referenced = true; + *pmd = pmd_mkold(*pmd); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, + addr + ((1UL) << HPAGE_PMD_SHIFT))) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +} + +#define DAMON_MAX_SUBSCORE (100) +#define DAMON_MAX_AGE_IN_LOG (32) + +int damon_pageout_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s) +{ + unsigned int max_nr_accesses; + int freq_subscore; + unsigned int age_in_sec; + int age_in_log, age_subscore; + unsigned int freq_weight = s->quota.weight_nr_accesses; + unsigned int age_weight = s->quota.weight_age; + int hotness; + + max_nr_accesses = c->aggr_interval / c->sample_interval; + freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / max_nr_accesses; + + age_in_sec = (unsigned long)r->age * c->aggr_interval / 1000000; + for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; + age_in_log++, age_in_sec >>= 1) + ; + + /* If frequency is 0, higher age means it's colder */ + if (freq_subscore == 0) + age_in_log *= -1; + + /* + * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. + * Scale it to be in [0, 100] and set it as age subscore. + */ + age_in_log += DAMON_MAX_AGE_IN_LOG; + age_subscore = age_in_log * DAMON_MAX_SUBSCORE / + DAMON_MAX_AGE_IN_LOG / 2; + + hotness = (freq_weight * freq_subscore + age_weight * age_subscore); + if (freq_weight + age_weight) + hotness /= freq_weight + age_weight; + /* + * Transform it to fit in [0, DAMOS_MAX_SCORE] + */ + hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; + + /* Return coldness of the region */ + return DAMOS_MAX_SCORE - hotness; +} diff --git a/mm/damon/prmtv-common.h b/mm/damon/prmtv-common.h new file mode 100644 index 000000000000..e790cb5f8fe0 --- /dev/null +++ b/mm/damon/prmtv-common.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common Primitives for Data Access Monitoring + * + * Author: SeongJae Park + */ + +#include + +struct page *damon_get_page(unsigned long pfn); + +void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr); +void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr); + +int damon_pageout_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s); diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c new file mode 100644 index 000000000000..183d4f3b4fde --- /dev/null +++ b/mm/damon/reclaim.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON-based page reclamation + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-reclaim: " fmt + +#include +#include +#include +#include +#include + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_reclaim." + +/* + * Enable or disable DAMON_RECLAIM. + * + * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``. + * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could + * do no real monitoring and reclamation due to the watermarks-based activation + * condition. Refer to below descriptions for the watermarks parameter for + * this. + */ +static bool enabled __read_mostly; + +/* + * Time threshold for cold memory regions identification in microseconds. + * + * If a memory region is not accessed for this or longer time, DAMON_RECLAIM + * identifies the region as cold, and reclaims. 120 seconds by default. + */ +static unsigned long min_age __read_mostly = 120000000; +module_param(min_age, ulong, 0600); + +/* + * Limit of time for trying the reclamation in milliseconds. + * + * DAMON_RECLAIM tries to use only up to this time within a time window + * (quota_reset_interval_ms) for trying reclamation of cold pages. This can be + * used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero, + * the limit is disabled. + * + * 10 ms by default. + */ +static unsigned long quota_ms __read_mostly = 10; +module_param(quota_ms, ulong, 0600); + +/* + * Limit of size of memory for the reclamation in bytes. + * + * DAMON_RECLAIM charges amount of memory which it tried to reclaim within a + * time window (quota_reset_interval_ms) and makes no more than this limit is + * tried. This can be used for limiting consumption of CPU and IO. If this + * value is zero, the limit is disabled. + * + * 128 MiB by default. + */ +static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024; +module_param(quota_sz, ulong, 0600); + +/* + * The time/size quota charge reset interval in milliseconds. + * + * The charge reset interval for the quota of time (quota_ms) and size + * (quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than + * quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms + * milliseconds. + * + * 1 second by default. + */ +static unsigned long quota_reset_interval_ms __read_mostly = 1000; +module_param(quota_reset_interval_ms, ulong, 0600); + +/* + * The watermarks check time interval in microseconds. + * + * Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is + * enabled but inactive due to its watermarks rule. 5 seconds by default. + */ +static unsigned long wmarks_interval __read_mostly = 5000000; +module_param(wmarks_interval, ulong, 0600); + +/* + * Free memory rate (per thousand) for the high watermark. + * + * If free memory of the system in bytes per thousand bytes is higher than + * this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically + * checks the watermarks. 500 (50%) by default. + */ +static unsigned long wmarks_high __read_mostly = 500; +module_param(wmarks_high, ulong, 0600); + +/* + * Free memory rate (per thousand) for the middle watermark. + * + * If free memory of the system in bytes per thousand bytes is between this and + * the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring + * and the reclaiming. 400 (40%) by default. + */ +static unsigned long wmarks_mid __read_mostly = 400; +module_param(wmarks_mid, ulong, 0600); + +/* + * Free memory rate (per thousand) for the low watermark. + * + * If free memory of the system in bytes per thousand bytes is lower than this, + * DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks + * the watermarks. In the case, the system falls back to the LRU-based page + * granularity reclamation logic. 200 (20%) by default. + */ +static unsigned long wmarks_low __read_mostly = 200; +module_param(wmarks_low, ulong, 0600); + +/* + * Sampling interval for the monitoring in microseconds. + * + * The sampling interval of DAMON for the cold memory monitoring. Please refer + * to the DAMON documentation for more detail. 5 ms by default. + */ +static unsigned long sample_interval __read_mostly = 5000; +module_param(sample_interval, ulong, 0600); + +/* + * Aggregation interval for the monitoring in microseconds. + * + * The aggregation interval of DAMON for the cold memory monitoring. Please + * refer to the DAMON documentation for more detail. 100 ms by default. + */ +static unsigned long aggr_interval __read_mostly = 100000; +module_param(aggr_interval, ulong, 0600); + +/* + * Minimum number of monitoring regions. + * + * The minimal number of monitoring regions of DAMON for the cold memory + * monitoring. This can be used to set lower-bound of the monitoring quality. + * But, setting this too high could result in increased monitoring overhead. + * Please refer to the DAMON documentation for more detail. 10 by default. + */ +static unsigned long min_nr_regions __read_mostly = 10; +module_param(min_nr_regions, ulong, 0600); + +/* + * Maximum number of monitoring regions. + * + * The maximum number of monitoring regions of DAMON for the cold memory + * monitoring. This can be used to set upper-bound of the monitoring overhead. + * However, setting this too low could result in bad monitoring quality. + * Please refer to the DAMON documentation for more detail. 1000 by default. + */ +static unsigned long max_nr_regions __read_mostly = 1000; +module_param(max_nr_regions, ulong, 0600); + +/* + * Start of the target memory region in physical address. + * + * The start physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_start __read_mostly; +module_param(monitor_region_start, ulong, 0600); + +/* + * End of the target memory region in physical address. + * + * The end physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_end __read_mostly; +module_param(monitor_region_end, ulong, 0600); + +/* + * PID of the DAMON thread + * + * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. + * Else, -1. + */ +static int kdamond_pid __read_mostly = -1; +module_param(kdamond_pid, int, 0400); + +/* + * Number of memory regions that tried to be reclaimed. + */ +static unsigned long nr_reclaim_tried_regions __read_mostly; +module_param(nr_reclaim_tried_regions, ulong, 0400); + +/* + * Total bytes of memory regions that tried to be reclaimed. + */ +static unsigned long bytes_reclaim_tried_regions __read_mostly; +module_param(bytes_reclaim_tried_regions, ulong, 0400); + +/* + * Number of memory regions that successfully be reclaimed. + */ +static unsigned long nr_reclaimed_regions __read_mostly; +module_param(nr_reclaimed_regions, ulong, 0400); + +/* + * Total bytes of memory regions that successfully be reclaimed. + */ +static unsigned long bytes_reclaimed_regions __read_mostly; +module_param(bytes_reclaimed_regions, ulong, 0400); + +/* + * Number of times that the time/space quota limits have exceeded + */ +static unsigned long nr_quota_exceeds __read_mostly; +module_param(nr_quota_exceeds, ulong, 0400); + +static struct damon_ctx *ctx; +static struct damon_target *target; + +struct damon_reclaim_ram_walk_arg { + unsigned long start; + unsigned long end; +}; + +static int walk_system_ram(struct resource *res, void *arg) +{ + struct damon_reclaim_ram_walk_arg *a = arg; + + if (a->end - a->start < res->end - res->start) { + a->start = res->start; + a->end = res->end; + } + return 0; +} + +/* + * Find biggest 'System RAM' resource and store its start and end address in + * @start and @end, respectively. If no System RAM is found, returns false. + */ +static bool get_monitoring_region(unsigned long *start, unsigned long *end) +{ + struct damon_reclaim_ram_walk_arg arg = {}; + + walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); + if (arg.end <= arg.start) + return false; + + *start = arg.start; + *end = arg.end; + return true; +} + +static struct damos *damon_reclaim_new_scheme(void) +{ + struct damos_watermarks wmarks = { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .interval = wmarks_interval, + .high = wmarks_high, + .mid = wmarks_mid, + .low = wmarks_low, + }; + struct damos_quota quota = { + /* + * Do not try reclamation for more than quota_ms milliseconds + * or quota_sz bytes within quota_reset_interval_ms. + */ + .ms = quota_ms, + .sz = quota_sz, + .reset_interval = quota_reset_interval_ms, + /* Within the quota, page out older regions first. */ + .weight_sz = 0, + .weight_nr_accesses = 0, + .weight_age = 1 + }; + struct damos *scheme = damon_new_scheme( + /* Find regions having PAGE_SIZE or larger size */ + PAGE_SIZE, ULONG_MAX, + /* and not accessed at all */ + 0, 0, + /* for min_age or more micro-seconds, and */ + min_age / aggr_interval, UINT_MAX, + /* page out those, as soon as found */ + DAMOS_PAGEOUT, + /* under the quota. */ + "a, + /* (De)activate this according to the watermarks. */ + &wmarks); + + return scheme; +} + +static int damon_reclaim_turn(bool on) +{ + struct damon_region *region; + struct damos *scheme; + int err; + + if (!on) { + err = damon_stop(&ctx, 1); + if (!err) + kdamond_pid = -1; + return err; + } + + err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0, + min_nr_regions, max_nr_regions); + if (err) + return err; + + if (monitor_region_start > monitor_region_end) + return -EINVAL; + if (!monitor_region_start && !monitor_region_end && + !get_monitoring_region(&monitor_region_start, + &monitor_region_end)) + return -EINVAL; + /* DAMON will free this on its own when finish monitoring */ + region = damon_new_region(monitor_region_start, monitor_region_end); + if (!region) + return -ENOMEM; + damon_add_region(region, target); + + /* Will be freed by 'damon_set_schemes()' below */ + scheme = damon_reclaim_new_scheme(); + if (!scheme) { + err = -ENOMEM; + goto free_region_out; + } + err = damon_set_schemes(ctx, &scheme, 1); + if (err) + goto free_scheme_out; + + err = damon_start(&ctx, 1); + if (!err) { + kdamond_pid = ctx->kdamond->pid; + return 0; + } + +free_scheme_out: + damon_destroy_scheme(scheme); +free_region_out: + damon_destroy_region(region, target); + return err; +} + +#define ENABLE_CHECK_INTERVAL_MS 1000 +static struct delayed_work damon_reclaim_timer; +static void damon_reclaim_timer_fn(struct work_struct *work) +{ + static bool last_enabled; + bool now_enabled; + + now_enabled = enabled; + if (last_enabled != now_enabled) { + if (!damon_reclaim_turn(now_enabled)) + last_enabled = now_enabled; + else + enabled = last_enabled; + } + + if (enabled) + schedule_delayed_work(&damon_reclaim_timer, + msecs_to_jiffies(ENABLE_CHECK_INTERVAL_MS)); +} +static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); + +static int enabled_store(const char *val, + const struct kernel_param *kp) +{ + int rc = param_set_bool(val, kp); + + if (rc < 0) + return rc; + + if (enabled) + schedule_delayed_work(&damon_reclaim_timer, 0); + + return 0; +} + +static const struct kernel_param_ops enabled_param_ops = { + .set = enabled_store, + .get = param_get_bool, +}; + +module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +MODULE_PARM_DESC(enabled, + "Enable or disable DAMON_RECLAIM (default: disabled)"); + +static int damon_reclaim_after_aggregation(struct damon_ctx *c) +{ + struct damos *s; + + /* update the stats parameter */ + damon_for_each_scheme(s, c) { + nr_reclaim_tried_regions = s->stat.nr_tried; + bytes_reclaim_tried_regions = s->stat.sz_tried; + nr_reclaimed_regions = s->stat.nr_applied; + bytes_reclaimed_regions = s->stat.sz_applied; + nr_quota_exceeds = s->stat.qt_exceeds; + } + return 0; +} + +static int __init damon_reclaim_init(void) +{ + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + + damon_pa_set_primitives(ctx); + ctx->callback.after_aggregation = damon_reclaim_after_aggregation; + + /* 4242 means nothing but fun */ + target = damon_new_target(4242); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + + schedule_delayed_work(&damon_reclaim_timer, 0); + return 0; +} + +module_init(damon_reclaim_init); diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h new file mode 100644 index 000000000000..6a1b9272ea12 --- /dev/null +++ b/mm/damon/vaddr-test.h @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Access Monitor Unit Tests + * + * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * Author: SeongJae Park + */ + +#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST + +#ifndef _DAMON_VADDR_TEST_H +#define _DAMON_VADDR_TEST_H + +#include + +static void __link_vmas(struct vm_area_struct *vmas, ssize_t nr_vmas) +{ + int i, j; + unsigned long largest_gap, gap; + + if (!nr_vmas) + return; + + for (i = 0; i < nr_vmas - 1; i++) { + vmas[i].vm_next = &vmas[i + 1]; + + vmas[i].vm_rb.rb_left = NULL; + vmas[i].vm_rb.rb_right = &vmas[i + 1].vm_rb; + + largest_gap = 0; + for (j = i; j < nr_vmas; j++) { + if (j == 0) + continue; + gap = vmas[j].vm_start - vmas[j - 1].vm_end; + if (gap > largest_gap) + largest_gap = gap; + } + vmas[i].rb_subtree_gap = largest_gap; + } + vmas[i].vm_next = NULL; + vmas[i].vm_rb.rb_right = NULL; + vmas[i].rb_subtree_gap = 0; +} + +/* + * Test __damon_va_three_regions() function + * + * In case of virtual memory address spaces monitoring, DAMON converts the + * complex and dynamic memory mappings of each target task to three + * discontiguous regions which cover every mapped areas. However, the three + * regions should not include the two biggest unmapped areas in the original + * mapping, because the two biggest areas are normally the areas between 1) + * heap and the mmap()-ed regions, and 2) the mmap()-ed regions and stack. + * Because these two unmapped areas are very huge but obviously never accessed, + * covering the region is just a waste. + * + * '__damon_va_three_regions() receives an address space of a process. It + * first identifies the start of mappings, end of mappings, and the two biggest + * unmapped areas. After that, based on the information, it constructs the + * three regions and returns. For more detail, refer to the comment of + * 'damon_init_regions_of()' function definition in 'mm/damon.c' file. + * + * For example, suppose virtual address ranges of 10-20, 20-25, 200-210, + * 210-220, 300-305, and 307-330 (Other comments represent this mappings in + * more short form: 10-20-25, 200-210-220, 300-305, 307-330) of a process are + * mapped. To cover every mappings, the three regions should start with 10, + * and end with 305. The process also has three unmapped areas, 25-200, + * 220-300, and 305-307. Among those, 25-200 and 220-300 are the biggest two + * unmapped areas, and thus it should be converted to three regions of 10-25, + * 200-220, and 300-330. + */ +static void damon_test_three_regions_in_vmas(struct kunit *test) +{ + struct damon_addr_range regions[3] = {0,}; + /* 10-20-25, 200-210-220, 300-305, 307-330 */ + struct vm_area_struct vmas[] = { + (struct vm_area_struct) {.vm_start = 10, .vm_end = 20}, + (struct vm_area_struct) {.vm_start = 20, .vm_end = 25}, + (struct vm_area_struct) {.vm_start = 200, .vm_end = 210}, + (struct vm_area_struct) {.vm_start = 210, .vm_end = 220}, + (struct vm_area_struct) {.vm_start = 300, .vm_end = 305}, + (struct vm_area_struct) {.vm_start = 307, .vm_end = 330}, + }; + + __link_vmas(vmas, 6); + + __damon_va_three_regions(&vmas[0], regions); + + KUNIT_EXPECT_EQ(test, 10ul, regions[0].start); + KUNIT_EXPECT_EQ(test, 25ul, regions[0].end); + KUNIT_EXPECT_EQ(test, 200ul, regions[1].start); + KUNIT_EXPECT_EQ(test, 220ul, regions[1].end); + KUNIT_EXPECT_EQ(test, 300ul, regions[2].start); + KUNIT_EXPECT_EQ(test, 330ul, regions[2].end); +} + +static struct damon_region *__nth_region_of(struct damon_target *t, int idx) +{ + struct damon_region *r; + unsigned int i = 0; + + damon_for_each_region(r, t) { + if (i++ == idx) + return r; + } + + return NULL; +} + +/* + * Test 'damon_va_apply_three_regions()' + * + * test kunit object + * regions an array containing start/end addresses of current + * monitoring target regions + * nr_regions the number of the addresses in 'regions' + * three_regions The three regions that need to be applied now + * expected start/end addresses of monitoring target regions that + * 'three_regions' are applied + * nr_expected the number of addresses in 'expected' + * + * The memory mapping of the target processes changes dynamically. To follow + * the change, DAMON periodically reads the mappings, simplifies it to the + * three regions, and updates the monitoring target regions to fit in the three + * regions. The update of current target regions is the role of + * 'damon_va_apply_three_regions()'. + * + * This test passes the given target regions and the new three regions that + * need to be applied to the function and check whether it updates the regions + * as expected. + */ +static void damon_do_test_apply_three_regions(struct kunit *test, + unsigned long *regions, int nr_regions, + struct damon_addr_range *three_regions, + unsigned long *expected, int nr_expected) +{ + struct damon_target *t; + struct damon_region *r; + int i; + + t = damon_new_target(42); + for (i = 0; i < nr_regions / 2; i++) { + r = damon_new_region(regions[i * 2], regions[i * 2 + 1]); + damon_add_region(r, t); + } + + damon_va_apply_three_regions(t, three_regions); + + for (i = 0; i < nr_expected / 2; i++) { + r = __nth_region_of(t, i); + KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]); + KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]); + } +} + +/* + * This function test most common case where the three big regions are only + * slightly changed. Target regions should adjust their boundary (10-20-30, + * 50-55, 70-80, 90-100) to fit with the new big regions or remove target + * regions (57-79) that now out of the three regions. + */ +static void damon_test_apply_three_regions1(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 45-55, 73-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 45, .end = 55}, + (struct damon_addr_range){.start = 73, .end = 104} }; + /* 5-20-27, 45-55, 73-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 45, 55, + 73, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test slightly bigger change. Similar to above, but the second big region + * now require two target regions (50-55, 57-59) to be removed. + */ +static void damon_test_apply_three_regions2(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 56-57, 65-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 56, .end = 57}, + (struct damon_addr_range){.start = 65, .end = 104} }; + /* 5-20-27, 56-57, 65-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 56, 57, + 65, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test a big change. The second big region has totally freed and mapped to + * different area (50-59 -> 61-63). The target regions which were in the old + * second big region (50-55-57-59) should be removed and new target region + * covering the second big region (61-63) should be created. + */ +static void damon_test_apply_three_regions3(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 61-63, 65-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 61, .end = 63}, + (struct damon_addr_range){.start = 65, .end = 104} }; + /* 5-20-27, 61-63, 65-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 61, 63, + 65, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test another big change. Both of the second and third big regions (50-59 + * and 70-100) has totally freed and mapped to different area (30-32 and + * 65-68). The target regions which were in the old second and third big + * regions should now be removed and new target regions covering the new second + * and third big regions should be created. + */ +static void damon_test_apply_three_regions4(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-7, 30-32, 65-68 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 7}, + (struct damon_addr_range){.start = 30, .end = 32}, + (struct damon_addr_range){.start = 65, .end = 68} }; + /* expect 5-7, 30-32, 65-68 */ + unsigned long expected[] = {5, 7, 30, 32, 65, 68}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +static void damon_test_split_evenly_fail(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); + + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, r->ar.start, start); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + } + + damon_free_target(t); +} + +static void damon_test_split_evenly_succ(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); + unsigned long expected_width = (end - start) / nr_pieces; + unsigned long i = 0; + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), 0); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces); + + damon_for_each_region(r, t) { + if (i == nr_pieces - 1) + break; + KUNIT_EXPECT_EQ(test, + r->ar.start, start + i++ * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width); + } + KUNIT_EXPECT_EQ(test, r->ar.start, start + i * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + damon_free_target(t); +} + +static void damon_test_split_evenly(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5), + -EINVAL); + + damon_test_split_evenly_fail(test, 0, 100, 0); + damon_test_split_evenly_succ(test, 0, 100, 10); + damon_test_split_evenly_succ(test, 5, 59, 5); + damon_test_split_evenly_fail(test, 5, 6, 2); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_test_three_regions_in_vmas), + KUNIT_CASE(damon_test_apply_three_regions1), + KUNIT_CASE(damon_test_apply_three_regions2), + KUNIT_CASE(damon_test_apply_three_regions3), + KUNIT_CASE(damon_test_apply_three_regions4), + KUNIT_CASE(damon_test_split_evenly), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon-primitives", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_VADDR_TEST_H */ + +#endif /* CONFIG_DAMON_VADDR_KUNIT_TEST */ diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c new file mode 100644 index 000000000000..89b6468da2b9 --- /dev/null +++ b/mm/damon/vaddr.c @@ -0,0 +1,761 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Primitives for Virtual Address Spaces + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-va: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include "prmtv-common.h" + +#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST +#undef DAMON_MIN_REGION +#define DAMON_MIN_REGION 1 +#endif + +/* + * 't->id' should be the pointer to the relevant 'struct pid' having reference + * count. Caller must put the returned task, unless it is NULL. + */ +static inline struct task_struct *damon_get_task_struct(struct damon_target *t) +{ + return get_pid_task((struct pid *)t->id, PIDTYPE_PID); +} + +/* + * Get the mm_struct of the given target + * + * Caller _must_ put the mm_struct after use, unless it is NULL. + * + * Returns the mm_struct of the target on success, NULL on failure + */ +static struct mm_struct *damon_get_mm(struct damon_target *t) +{ + struct task_struct *task; + struct mm_struct *mm; + + task = damon_get_task_struct(t); + if (!task) + return NULL; + + mm = get_task_mm(task); + put_task_struct(task); + return mm; +} + +/* + * Functions for the initial monitoring target regions construction + */ + +/* + * Size-evenly split a region into 'nr_pieces' small regions + * + * Returns 0 on success, or negative error code otherwise. + */ +static int damon_va_evenly_split_region(struct damon_target *t, + struct damon_region *r, unsigned int nr_pieces) +{ + unsigned long sz_orig, sz_piece, orig_end; + struct damon_region *n = NULL, *next; + unsigned long start; + + if (!r || !nr_pieces) + return -EINVAL; + + orig_end = r->ar.end; + sz_orig = r->ar.end - r->ar.start; + sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); + + if (!sz_piece) + return -EINVAL; + + r->ar.end = r->ar.start + sz_piece; + next = damon_next_region(r); + for (start = r->ar.end; start + sz_piece <= orig_end; + start += sz_piece) { + n = damon_new_region(start, start + sz_piece); + if (!n) + return -ENOMEM; + damon_insert_region(n, r, next, t); + r = n; + } + /* complement last region for possible rounding error */ + if (n) + n->ar.end = orig_end; + + return 0; +} + +static unsigned long sz_range(struct damon_addr_range *r) +{ + return r->end - r->start; +} + +/* + * Find three regions separated by two biggest unmapped regions + * + * vma the head vma of the target address space + * regions an array of three address ranges that results will be saved + * + * This function receives an address space and finds three regions in it which + * separated by the two biggest unmapped regions in the space. Please refer to + * below comments of '__damon_va_init_regions()' function to know why this is + * necessary. + * + * Returns 0 if success, or negative error code otherwise. + */ +static int __damon_va_three_regions(struct vm_area_struct *vma, + struct damon_addr_range regions[3]) +{ + struct damon_addr_range gap = {0}, first_gap = {0}, second_gap = {0}; + struct vm_area_struct *last_vma = NULL; + unsigned long start = 0; + struct rb_root rbroot; + + /* Find two biggest gaps so that first_gap > second_gap > others */ + for (; vma; vma = vma->vm_next) { + if (!last_vma) { + start = vma->vm_start; + goto next; + } + + if (vma->rb_subtree_gap <= sz_range(&second_gap)) { + rbroot.rb_node = &vma->vm_rb; + vma = rb_entry(rb_last(&rbroot), + struct vm_area_struct, vm_rb); + goto next; + } + + gap.start = last_vma->vm_end; + gap.end = vma->vm_start; + if (sz_range(&gap) > sz_range(&second_gap)) { + swap(gap, second_gap); + if (sz_range(&second_gap) > sz_range(&first_gap)) + swap(second_gap, first_gap); + } +next: + last_vma = vma; + } + + if (!sz_range(&second_gap) || !sz_range(&first_gap)) + return -EINVAL; + + /* Sort the two biggest gaps by address */ + if (first_gap.start > second_gap.start) + swap(first_gap, second_gap); + + /* Store the result */ + regions[0].start = ALIGN(start, DAMON_MIN_REGION); + regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); + regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); + regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); + regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); + regions[2].end = ALIGN(last_vma->vm_end, DAMON_MIN_REGION); + + return 0; +} + +/* + * Get the three regions in the given target (task) + * + * Returns 0 on success, negative error code otherwise. + */ +static int damon_va_three_regions(struct damon_target *t, + struct damon_addr_range regions[3]) +{ + struct mm_struct *mm; + int rc; + + mm = damon_get_mm(t); + if (!mm) + return -EINVAL; + + mmap_read_lock(mm); + rc = __damon_va_three_regions(mm->mmap, regions); + mmap_read_unlock(mm); + + mmput(mm); + return rc; +} + +/* + * Initialize the monitoring target regions for the given target (task) + * + * t the given target + * + * Because only a number of small portions of the entire address space + * is actually mapped to the memory and accessed, monitoring the unmapped + * regions is wasteful. That said, because we can deal with small noises, + * tracking every mapping is not strictly required but could even incur a high + * overhead if the mapping frequently changes or the number of mappings is + * high. The adaptive regions adjustment mechanism will further help to deal + * with the noise by simply identifying the unmapped areas as a region that + * has no access. Moreover, applying the real mappings that would have many + * unmapped areas inside will make the adaptive mechanism quite complex. That + * said, too huge unmapped areas inside the monitoring target should be removed + * to not take the time for the adaptive mechanism. + * + * For the reason, we convert the complex mappings to three distinct regions + * that cover every mapped area of the address space. Also the two gaps + * between the three regions are the two biggest unmapped areas in the given + * address space. In detail, this function first identifies the start and the + * end of the mappings and the two biggest unmapped areas of the address space. + * Then, it constructs the three regions as below: + * + * [mappings[0]->start, big_two_unmapped_areas[0]->start) + * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) + * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) + * + * As usual memory map of processes is as below, the gap between the heap and + * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed + * region and the stack will be two biggest unmapped regions. Because these + * gaps are exceptionally huge areas in usual address space, excluding these + * two biggest unmapped regions will be sufficient to make a trade-off. + * + * + * + * + * (other mmap()-ed regions and small unmapped regions) + * + * + * + */ +static void __damon_va_init_regions(struct damon_ctx *ctx, + struct damon_target *t) +{ + struct damon_target *ti; + struct damon_region *r; + struct damon_addr_range regions[3]; + unsigned long sz = 0, nr_pieces; + int i, tidx = 0; + + if (damon_va_three_regions(t, regions)) { + damon_for_each_target(ti, ctx) { + if (ti == t) + break; + tidx++; + } + pr_debug("Failed to get three regions of %dth target\n", tidx); + return; + } + + for (i = 0; i < 3; i++) + sz += regions[i].end - regions[i].start; + if (ctx->min_nr_regions) + sz /= ctx->min_nr_regions; + if (sz < DAMON_MIN_REGION) + sz = DAMON_MIN_REGION; + + /* Set the initial three regions of the target */ + for (i = 0; i < 3; i++) { + r = damon_new_region(regions[i].start, regions[i].end); + if (!r) { + pr_err("%d'th init region creation failed\n", i); + return; + } + damon_add_region(r, t); + + nr_pieces = (regions[i].end - regions[i].start) / sz; + damon_va_evenly_split_region(t, r, nr_pieces); + } +} + +/* Initialize '->regions_list' of every target (task) */ +static void damon_va_init(struct damon_ctx *ctx) +{ + struct damon_target *t; + + damon_for_each_target(t, ctx) { + /* the user may set the target regions as they want */ + if (!damon_nr_regions(t)) + __damon_va_init_regions(ctx, t); + } +} + +/* + * Functions for the dynamic monitoring target regions update + */ + +/* + * Check whether a region is intersecting an address range + * + * Returns true if it is. + */ +static bool damon_intersect(struct damon_region *r, + struct damon_addr_range *re) +{ + return !(r->ar.end <= re->start || re->end <= r->ar.start); +} + +/* + * Update damon regions for the three big regions of the given target + * + * t the given target + * bregions the three big regions of the target + */ +static void damon_va_apply_three_regions(struct damon_target *t, + struct damon_addr_range bregions[3]) +{ + struct damon_region *r, *next; + unsigned int i; + + /* Remove regions which are not in the three big regions now */ + damon_for_each_region_safe(r, next, t) { + for (i = 0; i < 3; i++) { + if (damon_intersect(r, &bregions[i])) + break; + } + if (i == 3) + damon_destroy_region(r, t); + } + + /* Adjust intersecting regions to fit with the three big regions */ + for (i = 0; i < 3; i++) { + struct damon_region *first = NULL, *last; + struct damon_region *newr; + struct damon_addr_range *br; + + br = &bregions[i]; + /* Get the first and last regions which intersects with br */ + damon_for_each_region(r, t) { + if (damon_intersect(r, br)) { + if (!first) + first = r; + last = r; + } + if (r->ar.start >= br->end) + break; + } + if (!first) { + /* no damon_region intersects with this big region */ + newr = damon_new_region( + ALIGN_DOWN(br->start, + DAMON_MIN_REGION), + ALIGN(br->end, DAMON_MIN_REGION)); + if (!newr) + continue; + damon_insert_region(newr, damon_prev_region(r), r, t); + } else { + first->ar.start = ALIGN_DOWN(br->start, + DAMON_MIN_REGION); + last->ar.end = ALIGN(br->end, DAMON_MIN_REGION); + } + } +} + +/* + * Update regions for current memory mappings + */ +static void damon_va_update(struct damon_ctx *ctx) +{ + struct damon_addr_range three_regions[3]; + struct damon_target *t; + + damon_for_each_target(t, ctx) { + if (damon_va_three_regions(t, three_regions)) + continue; + damon_va_apply_three_regions(t, three_regions); + } +} + +static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t *pte; + spinlock_t *ptl; + + if (pmd_huge(*pmd)) { + ptl = pmd_lock(walk->mm, pmd); + if (pmd_huge(*pmd)) { + damon_pmdp_mkold(pmd, walk->mm, addr); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + } + + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + return 0; + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte_present(*pte)) + goto out; + damon_ptep_mkold(pte, walk->mm, addr); +out: + pte_unmap_unlock(pte, ptl); + return 0; +} + +#ifdef CONFIG_HUGETLB_PAGE +static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long addr) +{ + bool referenced = false; + pte_t entry = huge_ptep_get(pte); + struct page *page = pte_page(entry); + + if (!page) + return; + + get_page(page); + + if (pte_young(entry)) { + referenced = true; + entry = pte_mkold(entry); + huge_ptep_set_access_flags(vma, addr, pte, entry, + vma->vm_flags & VM_WRITE); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, + addr + huge_page_size(hstate_vma(vma)))) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +} + +static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct hstate *h = hstate_vma(walk->vma); + spinlock_t *ptl; + pte_t entry; + + ptl = huge_pte_lock(h, walk->mm, pte); + entry = huge_ptep_get(pte); + if (!pte_present(entry)) + goto out; + + damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); + +out: + spin_unlock(ptl); + return 0; +} +#else +#define damon_mkold_hugetlb_entry NULL +#endif /* CONFIG_HUGETLB_PAGE */ + +static const struct mm_walk_ops damon_mkold_ops = { + .pmd_entry = damon_mkold_pmd_entry, + .hugetlb_entry = damon_mkold_hugetlb_entry, +}; + +static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) +{ + mmap_read_lock(mm); + walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); + mmap_read_unlock(mm); +} + +/* + * Functions for the access checking of the regions + */ + +static void __damon_va_prepare_access_check(struct damon_ctx *ctx, + struct mm_struct *mm, struct damon_region *r) +{ + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + + damon_va_mkold(mm, r->sampling_addr); +} + +static void damon_va_prepare_access_checks(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct mm_struct *mm; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + mm = damon_get_mm(t); + if (!mm) + continue; + damon_for_each_region(r, t) + __damon_va_prepare_access_check(ctx, mm, r); + mmput(mm); + } +} + +struct damon_young_walk_private { + unsigned long *page_sz; + bool young; +}; + +static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t *pte; + spinlock_t *ptl; + struct page *page; + struct damon_young_walk_private *priv = walk->private; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_huge(*pmd)) { + ptl = pmd_lock(walk->mm, pmd); + if (!pmd_huge(*pmd)) { + spin_unlock(ptl); + goto regular_page; + } + page = damon_get_page(pmd_pfn(*pmd)); + if (!page) + goto huge_out; + if (pmd_young(*pmd) || !page_is_idle(page) || + mmu_notifier_test_young(walk->mm, + addr)) { + *priv->page_sz = ((1UL) << HPAGE_PMD_SHIFT); + priv->young = true; + } + put_page(page); +huge_out: + spin_unlock(ptl); + return 0; + } + +regular_page: +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + return -EINVAL; + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte_present(*pte)) + goto out; + page = damon_get_page(pte_pfn(*pte)); + if (!page) + goto out; + if (pte_young(*pte) || !page_is_idle(page) || + mmu_notifier_test_young(walk->mm, addr)) { + *priv->page_sz = PAGE_SIZE; + priv->young = true; + } + put_page(page); +out: + pte_unmap_unlock(pte, ptl); + return 0; +} + +#ifdef CONFIG_HUGETLB_PAGE +static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct damon_young_walk_private *priv = walk->private; + struct hstate *h = hstate_vma(walk->vma); + struct page *page; + spinlock_t *ptl; + pte_t entry; + + ptl = huge_pte_lock(h, walk->mm, pte); + entry = huge_ptep_get(pte); + if (!pte_present(entry)) + goto out; + + page = pte_page(entry); + if (!page) + goto out; + + get_page(page); + + if (pte_young(entry) || !page_is_idle(page) || + mmu_notifier_test_young(walk->mm, addr)) { + *priv->page_sz = huge_page_size(h); + priv->young = true; + } + + put_page(page); + +out: + spin_unlock(ptl); + return 0; +} +#else +#define damon_young_hugetlb_entry NULL +#endif /* CONFIG_HUGETLB_PAGE */ + +static const struct mm_walk_ops damon_young_ops = { + .pmd_entry = damon_young_pmd_entry, + .hugetlb_entry = damon_young_hugetlb_entry, +}; + +static bool damon_va_young(struct mm_struct *mm, unsigned long addr, + unsigned long *page_sz) +{ + struct damon_young_walk_private arg = { + .page_sz = page_sz, + .young = false, + }; + + mmap_read_lock(mm); + walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); + mmap_read_unlock(mm); + return arg.young; +} + +/* + * Check whether the region was accessed after the last preparation + * + * mm 'mm_struct' for the given virtual address space + * r the region to be checked + */ +static void __damon_va_check_access(struct damon_ctx *ctx, + struct mm_struct *mm, struct damon_region *r) +{ + static struct mm_struct *last_mm; + static unsigned long last_addr; + static unsigned long last_page_sz = PAGE_SIZE; + static bool last_accessed; + + /* If the region is in the last checked page, reuse the result */ + if (mm == last_mm && (ALIGN_DOWN(last_addr, last_page_sz) == + ALIGN_DOWN(r->sampling_addr, last_page_sz))) { + if (last_accessed) + r->nr_accesses++; + return; + } + + last_accessed = damon_va_young(mm, r->sampling_addr, &last_page_sz); + if (last_accessed) + r->nr_accesses++; + + last_mm = mm; + last_addr = r->sampling_addr; +} + +static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct mm_struct *mm; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + + damon_for_each_target(t, ctx) { + mm = damon_get_mm(t); + if (!mm) + continue; + damon_for_each_region(r, t) { + __damon_va_check_access(ctx, mm, r); + max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + } + mmput(mm); + } + + return max_nr_accesses; +} + +/* + * Functions for the target validity check and cleanup + */ + +bool damon_va_target_valid(void *target) +{ + struct damon_target *t = target; + struct task_struct *task; + + task = damon_get_task_struct(t); + if (task) { + put_task_struct(task); + return true; + } + + return false; +} + +#ifndef CONFIG_ADVISE_SYSCALLS +static unsigned long damos_madvise(struct damon_target *target, + struct damon_region *r, int behavior) +{ + return 0; +} +#else +static unsigned long damos_madvise(struct damon_target *target, + struct damon_region *r, int behavior) +{ + struct mm_struct *mm; + unsigned long start = PAGE_ALIGN(r->ar.start); + unsigned long len = PAGE_ALIGN(r->ar.end - r->ar.start); + unsigned long applied; + + mm = damon_get_mm(target); + if (!mm) + return 0; + + applied = do_madvise(mm, start, len, behavior) ? 0 : len; + mmput(mm); + + return applied; +} +#endif /* CONFIG_ADVISE_SYSCALLS */ + +static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + int madv_action; + + switch (scheme->action) { + case DAMOS_WILLNEED: + madv_action = MADV_WILLNEED; + break; + case DAMOS_COLD: + madv_action = MADV_COLD; + break; + case DAMOS_PAGEOUT: + madv_action = MADV_PAGEOUT; + break; + case DAMOS_HUGEPAGE: + madv_action = MADV_HUGEPAGE; + break; + case DAMOS_NOHUGEPAGE: + madv_action = MADV_NOHUGEPAGE; + break; + case DAMOS_STAT: + return 0; + default: + return 0; + } + + return damos_madvise(t, r, madv_action); +} + +static int damon_va_scheme_score(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_pageout_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + +void damon_va_set_primitives(struct damon_ctx *ctx) +{ + ctx->primitive.init = damon_va_init; + ctx->primitive.update = damon_va_update; + ctx->primitive.prepare_access_checks = damon_va_prepare_access_checks; + ctx->primitive.check_accesses = damon_va_check_accesses; + ctx->primitive.reset_aggregated = NULL; + ctx->primitive.target_valid = damon_va_target_valid; + ctx->primitive.cleanup = NULL; + ctx->primitive.apply_scheme = damon_va_apply_scheme; + ctx->primitive.get_scheme_score = damon_va_scheme_score; +} + +#include "vaddr-test.h" diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b9127b79c9eb..607ee6c4acc0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3301,6 +3301,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn) struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; int migratetype; + bool pcp_skip_cma_pages = false; migratetype = get_pcppage_migratetype(page); __count_vm_event(PGFREE); @@ -3313,7 +3314,10 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn) * excessively into the page allocator */ if (migratetype >= MIGRATE_PCPTYPES) { - if (unlikely(is_migrate_isolate(migratetype))) { + trace_android_vh_pcplist_add_cma_pages_bypass(migratetype, + &pcp_skip_cma_pages); + if (unlikely(is_migrate_isolate(migratetype)) || + pcp_skip_cma_pages) { free_one_page(zone, page, pfn, 0, migratetype, FPI_NONE); return; @@ -8727,6 +8731,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, unsigned long outer_start, outer_end; unsigned int order; int ret = 0; + bool skip_drain_all_pages = false; struct compact_control cc = { .nr_migratepages = 0, @@ -8772,7 +8777,10 @@ int alloc_contig_range(unsigned long start, unsigned long end, return ret; } - drain_all_pages(cc.zone); + trace_android_vh_cma_drain_all_pages_bypass(migratetype, + &skip_drain_all_pages); + if (skip_drain_all_pages) + drain_all_pages(cc.zone); /* * In case of -EBUSY, we'd like to know which page causes problem. diff --git a/mm/page_ext.c b/mm/page_ext.c index 7e44726b3549..e5e31ff1adba 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -58,11 +58,21 @@ * can utilize this callback to initialize the state of it correctly. */ +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) +static bool need_page_idle(void) +{ + return true; +} +struct page_ext_operations page_idle_ops = { + .need = need_page_idle, +}; +#endif + static struct page_ext_operations *page_ext_ops[] = { #ifdef CONFIG_PAGE_OWNER &page_owner_ops, #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif #ifdef CONFIG_PAGE_PINNER diff --git a/mm/page_idle.c b/mm/page_idle.c index 057c61df12db..144fb4ed961d 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -211,16 +211,6 @@ static const struct attribute_group page_idle_attr_group = { .name = "page_idle", }; -#ifndef CONFIG_64BIT -static bool need_page_idle(void) -{ - return true; -} -struct page_ext_operations page_idle_ops = { - .need = need_page_idle, -}; -#endif - static int __init page_idle_init(void) { int err; diff --git a/mm/pagewalk.c b/mm/pagewalk.c index e81640d9f177..5d5d1c47894e 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -430,6 +430,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, } while (start = next, start < end); return err; } +EXPORT_SYMBOL_GPL(walk_page_range); /* * Similar to walk_page_range() but can walk any page tables even if they are diff --git a/mm/readahead.c b/mm/readahead.c index a6bfa987a04a..a95364c99487 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -459,6 +459,8 @@ static void ondemand_readahead(struct readahead_control *ractl, if (req_size > max_pages && bdi->io_pages > max_pages) max_pages = min(req_size, bdi->io_pages); + trace_android_vh_ra_tuning_max_page(ractl, &max_pages); + /* * start of file */ diff --git a/mm/shmem.c b/mm/shmem.c index 364703bd305e..8fb9d2ddec70 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4307,6 +4307,7 @@ EXPORT_SYMBOL_GPL(shmem_mark_page_lazyfree); int reclaim_shmem_address_space(struct address_space *mapping) { +#ifdef CONFIG_SHMEM pgoff_t start = 0; struct page *page; LIST_HEAD(page_list); @@ -4340,5 +4341,8 @@ int reclaim_shmem_address_space(struct address_space *mapping) reclaimed = reclaim_pages_from_list(&page_list); return reclaimed; +#else + return 0; +#endif } EXPORT_SYMBOL_GPL(reclaim_shmem_address_space); diff --git a/mm/swapfile.c b/mm/swapfile.c index 5f2e709f2d39..646145953eeb 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3560,6 +3560,7 @@ struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); } +EXPORT_SYMBOL_GPL(swp_swap_info); struct swap_info_struct *page_swap_info(struct page *page) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 46d8e65071d8..0c47c99b4cdb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -670,7 +670,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, * * Returns the number of reclaimed slab objects. */ -static unsigned long shrink_slab(gfp_t gfp_mask, int nid, +unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority) { @@ -722,6 +722,7 @@ out: cond_resched(); return freed; } +EXPORT_SYMBOL_GPL(shrink_slab); void drop_slab_node(int nid) { @@ -2395,6 +2396,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, denominator = ap + fp; out: trace_android_vh_tune_scan_type((char *)(&scan_balance)); + trace_android_vh_tune_memcg_scan_type(memcg, (char *)(&scan_balance)); for_each_evictable_lru(lru) { int file = is_file_lru(lru); unsigned long lruvec_size; @@ -2698,6 +2700,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); unsigned long reclaimed; unsigned long scanned; + bool skip = false; /* * This loop can become CPU-bound when target memcgs @@ -2707,6 +2710,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) */ cond_resched(); + trace_android_vh_shrink_node_memcgs(memcg, &skip); + if (skip) + continue; + mem_cgroup_calculate_protection(target_memcg, memcg); if (mem_cgroup_below_min(memcg)) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0c3c0c9cbe23..fc36c8eddbf7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3669,7 +3669,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; - int err; + int len_diff, err; skb_push(skb, -skb_network_offset(skb) + offset); @@ -3709,9 +3709,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_push(nskb, -skb_network_offset(nskb) + offset); skb_release_head_state(nskb); + len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb); __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); + nskb->transport_header += len_diff; skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, offset + tnl_hlen); diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile new file mode 100644 index 000000000000..8a3f2cd9fec0 --- /dev/null +++ b/tools/testing/selftests/damon/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for damon selftests + +TEST_FILES = _chk_dependency.sh +TEST_PROGS = debugfs_attrs.sh + +include ../lib.mk diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh new file mode 100644 index 000000000000..0189db81550b --- /dev/null +++ b/tools/testing/selftests/damon/_chk_dependency.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +DBGFS=/sys/kernel/debug/damon + +if [ $EUID -ne 0 ]; +then + echo "Run as root" + exit $ksft_skip +fi + +if [ ! -d "$DBGFS" ] +then + echo "$DBGFS not found" + exit $ksft_skip +fi + +for f in attrs target_ids monitor_on +do + if [ ! -f "$DBGFS/$f" ] + then + echo "$f not found" + exit 1 + fi +done diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh new file mode 100644 index 000000000000..196b6640bf37 --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +test_write_result() { + file=$1 + content=$2 + orig_content=$3 + expect_reason=$4 + expected=$5 + + echo "$content" > "$file" + if [ $? -ne "$expected" ] + then + echo "writing $content to $file doesn't return $expected" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +test_write_succ() { + test_write_result "$1" "$2" "$3" "$4" 0 +} + +test_write_fail() { + test_write_result "$1" "$2" "$3" "$4" 1 +} + +test_content() { + file=$1 + orig_content=$2 + expected=$3 + expect_reason=$4 + + content=$(cat "$file") + if [ "$content" != "$expected" ] + then + echo "reading $file expected $expected but $content" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +source ./_chk_dependency.sh + +# Test attrs file +# =============== + +file="$DBGFS/attrs" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5" "$orig_content" "valid input" +test_write_fail "$file" "1 2 3 4" "$orig_content" "no enough fields" +test_write_fail "$file" "1 2 3 5 4" "$orig_content" \ + "min_nr_regions > max_nr_regions" +test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written" +echo "$orig_content" > "$file" + +# Test schemes file +# ================= + +file="$DBGFS/schemes" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ + "$orig_content" "valid input" +test_write_fail "$file" "1 2 +3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" +test_write_succ "$file" "" "$orig_content" "disabling" +echo "$orig_content" > "$file" + +# Test target_ids file +# ==================== + +file="$DBGFS/target_ids" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input" +test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input" +test_content "$file" "$orig_content" "1 2" "non-integer was there" +test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input" +test_content "$file" "$orig_content" "" "wrong input written" +test_write_succ "$file" "" "$orig_content" "empty input" +test_content "$file" "$orig_content" "" "empty input written" +echo "$orig_content" > "$file" + +echo "PASS" diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index f3798029247a..5a2f6301c4b2 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -Werror -I../.. -I../../../../.. -LDLIBS := -llz4 -lzstd -lcrypto -lpthread +CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -Werror -I../.. -I../../../../.. -fno-omit-frame-pointer -fsanitize=address -g +LDLIBS := -llz4 -lzstd -lcrypto -lpthread -fsanitize=address TEST_GEN_PROGS := incfs_test incfs_stress incfs_perf include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 2d2bc933fefc..bfd5f1ac7c7a 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_VERITY_FL 0x00100000 /* Verity protected inode */ +#define TEST_SKIP 2 #define TEST_FAILURE 1 #define TEST_SUCCESS 0 @@ -2793,14 +2795,16 @@ failure: return TEST_FAILURE; } +#define THREE_GB (3LL * 1024 * 1024 * 1024) +#define FOUR_GB (4LL * 1024 * 1024 * 1024) /* Have 1GB of margin */ static int large_file_test(const char *mount_dir) { char *backing_dir; int cmd_fd = -1; int i; - int result = TEST_FAILURE; + int result = TEST_FAILURE, ret; uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; - int block_count = 3LL * 1024 * 1024 * 1024 / INCFS_DATA_FILE_BLOCK_SIZE; + int block_count = THREE_GB / INCFS_DATA_FILE_BLOCK_SIZE; struct incfs_fill_block *block_buf = calloc(block_count, sizeof(struct incfs_fill_block)); struct incfs_fill_blocks fill_blocks = { @@ -2809,6 +2813,22 @@ static int large_file_test(const char *mount_dir) }; incfs_uuid_t id; int fd = -1; + struct statvfs svfs; + unsigned long long free_disksz; + + ret = statvfs(mount_dir, &svfs); + if (ret) { + ksft_print_msg("Can't get disk size. Skipping %s...\n", __func__); + return TEST_SKIP; + } + + free_disksz = (unsigned long long)svfs.f_bavail * svfs.f_bsize; + + if (FOUR_GB > free_disksz) { + ksft_print_msg("Not enough free disk space (%lldMB). Skipping %s...\n", + free_disksz >> 20, __func__); + return TEST_SKIP; + } backing_dir = create_backing_dir(mount_dir); if (!backing_dir) @@ -2849,6 +2869,7 @@ static int large_file_test(const char *mount_dir) failure: close(fd); close(cmd_fd); + unlink("very_large_file"); umount(mount_dir); free(backing_dir); return result; @@ -4665,9 +4686,15 @@ struct test_case { void run_one_test(const char *mount_dir, struct test_case *test_case) { + int ret; + ksft_print_msg("Running %s\n", test_case->name); - if (test_case->pfunc(mount_dir) == TEST_SUCCESS) + ret = test_case->pfunc(mount_dir); + + if (ret == TEST_SUCCESS) ksft_test_result_pass("%s\n", test_case->name); + else if (ret == TEST_SKIP) + ksft_test_result_skip("%s\n", test_case->name); else ksft_test_result_fail("%s\n", test_case->name); } diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index e60b0d36f49d..0411d912ae05 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -116,6 +116,9 @@ size_t format_signature(void **buf, const char *root_hash, const char *add_data) size_t size = sizeof(struct signature_blob) + strlen(add_data) + 1; struct signature_blob *sb = malloc(size); + if (!sb) + return 0; + *sb = (struct signature_blob){ .version = INCFS_SIGNATURE_VERSION, .hash_section_size = sizeof(struct hash_section), @@ -126,7 +129,7 @@ size_t format_signature(void **buf, const char *root_hash, const char *add_data) .salt_size = 0, .hash_size = SHA256_DIGEST_SIZE, }, - .signing_section_size = sizeof(uint32_t) + strlen(add_data) + 1, + .signing_section_size = strlen(add_data) + 1, }; memcpy(sb->hash_section.hash, root_hash, SHA256_DIGEST_SIZE); diff --git a/usr/include/Makefile b/usr/include/Makefile index 703a255cddc6..a8ed689918ce 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -12,6 +12,9 @@ UAPI_CFLAGS := -std=c90 -Wall -Werror=implicit-function-declaration # It is here just because CONFIG_CC_CAN_LINK is tested with -m32 or -m64. UAPI_CFLAGS += $(filter -m32 -m64, $(KBUILD_CFLAGS)) +# USERCFLAGS might contain sysroot location for CC. +UAPI_CFLAGS += $(USERCFLAGS) + override c_flags = $(UAPI_CFLAGS) -Wp,-MMD,$(depfile) -I$(objtree)/usr/include # The following are excluded for now because they fail to build.