From d94df02c476cbc207651d19cfd104690b079bdb4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 11:56:15 +0100 Subject: [PATCH 01/75] docs: Makefile: honor V=0 for docs building Reduce the number of displayed mesages when building the docs with V=0. Suggested-by: Stephen Rothwell Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/478c114a2399b68a18de94ee5f98649304f3903b.1603796153.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/Makefile b/Documentation/Makefile index 6b12dd82f712..4e47dff8b315 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -21,6 +21,10 @@ BUILDDIR = $(obj)/output PDFLATEX = xelatex LATEXOPTS = -interaction=batchmode +ifeq ($(KBUILD_VERBOSE),0) +SPHINXOPTS += "-q" +endif + # User-friendly check for sphinx-build HAVE_SPHINX := $(shell if which $(SPHINXBUILD) >/dev/null 2>&1; then echo 1; else echo 0; fi) From 6b80975c6308b5dc93b270a2903a314d896974c4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 11:20:36 +0100 Subject: [PATCH 02/75] scripts: kernel-doc: fix typedef parsing The include/linux/genalloc.h file defined this typedef: typedef unsigned long (*genpool_algo_t)(unsigned long *map,unsigned long size,unsigned long start,unsigned int nr,void *data, struct gen_pool *pool, unsigned long start_addr); Because it has a type composite of two words (unsigned long), the parser gets the typedef name wrong: .. c:macro:: long **Typedef**: Allocation callback function type definition Fix the regex in order to accept composite types when defining a typedef for a function pointer. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/328e8018041cc44f7a1684e57f8d111230761c4f.1603792384.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index c8f6b11d5da1..c7c9f1e8428d 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1434,13 +1434,14 @@ sub dump_typedef($$) { $x =~ s@/\*.*?\*/@@gos; # strip comments. # Parse function prototypes - if ($x =~ /typedef\s+(\w+)\s*\(\*\s*(\w\S+)\s*\)\s*\((.*)\);/ || - $x =~ /typedef\s+(\w+)\s*(\w\S+)\s*\s*\((.*)\);/) { + if ($x =~ /typedef((?:\s+[\w\*]+){1,8})\s*\(\*?\s*(\w\S+)\s*\)\s*\((.*)\);/ || + $x =~ /typedef((?:\s+[\w\*]+\s+){1,8})\s*\*?(\w\S+)\s*\s*\((.*)\);/) { # Function typedefs $return_type = $1; $declaration_name = $2; my $args = $3; + $return_type =~ s/^\s+//; create_parameterlist($args, ',', $file, $declaration_name); From 7efc6c4295f5489e25eaef6d6fb6ceb6a6e238d0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 11:20:37 +0100 Subject: [PATCH 03/75] scripts: kernel-doc: split typedef complex regex The typedef regex for function prototypes are very complex. Split them into 3 separate regex and then join them using qr. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/3a4af999a0d62d4ab9dfae1cdefdfcad93383356.1603792384.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index c7c9f1e8428d..cf71897df36d 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1427,17 +1427,21 @@ sub dump_enum($$) { } } +my $typedef_type = qr { ((?:\s+[\w\*]+){1,8})\s* }x; +my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x; +my $typedef_args = qr { \s*\((.*)\); }x; + +my $typedef1 = qr { typedef$typedef_type\($typedef_ident\)$typedef_args }x; +my $typedef2 = qr { typedef$typedef_type$typedef_ident$typedef_args }x; + sub dump_typedef($$) { my $x = shift; my $file = shift; $x =~ s@/\*.*?\*/@@gos; # strip comments. - # Parse function prototypes - if ($x =~ /typedef((?:\s+[\w\*]+){1,8})\s*\(\*?\s*(\w\S+)\s*\)\s*\((.*)\);/ || - $x =~ /typedef((?:\s+[\w\*]+\s+){1,8})\s*\*?(\w\S+)\s*\s*\((.*)\);/) { - - # Function typedefs + # Parse function typedef prototypes + if ($x =~ $typedef1 || $x =~ $typedef2) { $return_type = $1; $declaration_name = $2; my $args = $3; From e051955977b7e26124aa8c8398278145f85f94e8 Mon Sep 17 00:00:00 2001 From: Wilken Gottwalt Date: Tue, 27 Oct 2020 07:24:08 +0100 Subject: [PATCH 04/75] documentation: arm: sunxi: add Allwinner H6 documents Add the current Allwinner H6 datasheet and user manual. Signed-off-by: Wilken Gottwalt Link: https://lore.kernel.org/r/20201027062408.GA6761@monster.powergraphx.local Signed-off-by: Jonathan Corbet --- Documentation/arm/sunxi.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/arm/sunxi.rst b/Documentation/arm/sunxi.rst index 62b533d0ba94..0c536ae1d7c2 100644 --- a/Documentation/arm/sunxi.rst +++ b/Documentation/arm/sunxi.rst @@ -148,3 +148,13 @@ SunXi family * User Manual http://dl.linux-sunxi.org/A64/Allwinner%20A64%20User%20Manual%20v1.0.pdf + + - Allwinner H6 + + * Datasheet + + https://linux-sunxi.org/images/5/5c/Allwinner_H6_V200_Datasheet_V1.1.pdf + + * User Manual + + https://linux-sunxi.org/images/4/46/Allwinner_H6_V200_User_Manual_V1.1.pdf From 72b97d0b911872ba36a5742b7d790b6b165e11d8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:05 +0100 Subject: [PATCH 05/75] scripts: kernel-doc: use :c:union when needed Sphinx C domain code after 3.2.1 will start complaning if :c:struct would be used for an union type: .../Documentation/gpu/drm-kms-helpers:352: ../drivers/video/hdmi.c:851: WARNING: C 'identifier' cross-reference uses wrong tag: reference name is 'union hdmi_infoframe' but found name is 'struct hdmi_infoframe'. Full reference name is 'union hdmi_infoframe'. Full found name is 'struct hdmi_infoframe'. So, let's address this issue too in advance, in order to avoid future issues. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/6e4ec3eec914df62389a299797a3880ae4490f35.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index cf71897df36d..f699cf05d409 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1092,7 +1092,11 @@ sub output_struct_rst(%) { print "\n\n.. c:type:: " . $name . "\n\n"; } else { my $name = $args{'struct'}; - print "\n\n.. c:struct:: " . $name . "\n\n"; + if ($args{'type'} eq 'union') { + print "\n\n.. c:union:: " . $name . "\n\n"; + } else { + print "\n\n.. c:struct:: " . $name . "\n\n"; + } } print_lineno($declaration_start_line); $lineprefix = " "; From d29f34c098aa1b7e237ce9979eeb5cef9e5f162f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:06 +0100 Subject: [PATCH 06/75] sphinx: conf.py: properly handle Sphinx 4.0 One of the checks for Sphinx 3+ is broken, causing some C warnings to return back with Sphinx 4.0.x. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/d5abc30056dafeec0778a46263a45401bdc7f11e.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index 376dd0ddf39c..7ee05fd4cb17 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -50,7 +50,7 @@ if major >= 3: support for Sphinx v3.0 and above is brand new. Be prepared for possible issues in the generated output. ''') - if minor > 0 or patch >= 2: + if (major > 3) or (minor > 0 or patch >= 2): # Sphinx c function parser is more pedantic with regards to type # checking. Due to that, having macros at c:function cause problems. # Those needed to be scaped by using c_id_attributes[] array From 6cc6f5ad9bfb430289a356a95fc5c74fe412d5cd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:07 +0100 Subject: [PATCH 07/75] docs: hwmon: adm1266.rst: fix a broken reference The reference was missing the extension, causing the check script to complain. Signed-off-by: Mauro Carvalho Chehab Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/3d64372dabcdcea144cdc9972c245812ea2a84cb.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/hwmon/adm1266.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hwmon/adm1266.rst b/Documentation/hwmon/adm1266.rst index 9257f8a48650..2b877011cfdf 100644 --- a/Documentation/hwmon/adm1266.rst +++ b/Documentation/hwmon/adm1266.rst @@ -20,7 +20,7 @@ ADM1266 is a sequencer that features voltage readback from 17 channels via an integrated 12 bit SAR ADC, accessed using a PMBus interface. The driver is a client driver to the core PMBus driver. Please see -Documentation/hwmon/pmbus for details on PMBus client drivers. +Documentation/hwmon/pmbus.rst for details on PMBus client drivers. Sysfs entries From 2644ccef6f289c0bd545f5b79a4bfc6eb67318fe Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:08 +0100 Subject: [PATCH 08/75] docs: admin-guide: net.rst: add a missing blank line There's a missing blank line after a literal block, which causes this warning: Documentation/admin-guide/sysctl/net.rst:303: WARNING: Literal block ends without a blank line; unexpected unindent. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/b2545be4a4c71269d10278b5990c3e06c4b65f84.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/net.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 57fd6ce68fe0..f2ab8a5b6a4b 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -300,6 +300,7 @@ Note: 0: 0 1 2 3 4 5 6 7 RSS hash key: 84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89 + netdev_tstamp_prequeue ---------------------- From 32519c0326862d95186d018876a07980acf11089 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:09 +0100 Subject: [PATCH 09/75] docs: kasan.rst: add two missing blank lines literal blocks should start and end with a blank line, as otherwise the parser complains and may do the wrong thing, as warned by Sphinx: Documentation/dev-tools/kasan.rst:298: WARNING: Literal block ends without a blank line; unexpected unindent. Documentation/dev-tools/kasan.rst:303: WARNING: Literal block ends without a blank line; unexpected unindent. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/cd6c4280fe26b07f2c5e5ed2918e17e88bb03419.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/dev-tools/kasan.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index c09c9ca2ff1c..2b68addaadcd 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -295,11 +295,13 @@ print the number of the test and the status of the test: pass:: ok 28 - kmalloc_double_kzfree + or, if kmalloc failed:: # kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163 Expected ptr is not null, but is not ok 4 - kmalloc_large_oob_right + or, if a KASAN report was expected, but not found:: # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629 From 97e44c4f0cb69ec4f896451454e7b54b3fe06345 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:10 +0100 Subject: [PATCH 10/75] docs: net: statistics.rst: remove a duplicated kernel-doc include/linux/ethtool.h is included twice with kernel-doc, both to document ethtool_pause_stats(). The first one is at statistics.rst, and the second one at ethtool-netlink.rst. Replace one of the references to use the name of the function. The automarkup.py extension should create the cross-references. Solves this warning: ../Documentation/networking/ethtool-netlink.rst: WARNING: Duplicate C declaration, also defined in 'networking/statistics'. Declaration is 'ethtool_pause_stats'. Signed-off-by: Mauro Carvalho Chehab Acked-by: David S. Miller Link: https://lore.kernel.org/r/fdbf853bbdaf3bc1d38f32744b739d175c5c31f5.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/networking/statistics.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/networking/statistics.rst b/Documentation/networking/statistics.rst index 8e15bc98830b..234abedc29b2 100644 --- a/Documentation/networking/statistics.rst +++ b/Documentation/networking/statistics.rst @@ -175,5 +175,4 @@ The following structures are internal to the kernel, their members are translated to netlink attributes when dumped. Drivers must not overwrite the statistics they don't report with 0. -.. kernel-doc:: include/linux/ethtool.h - :identifiers: ethtool_pause_stats +- ethtool_pause_stats() From 9d8c4f0c0170a8e63256ba257b57975f04b813ab Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:11 +0100 Subject: [PATCH 11/75] docs: hwmon: mp2975.rst: address some html build warnings .../Documentation/hwmon/mp2975.rst:25: WARNING: Unexpected indentation. .../Documentation/hwmon/mp2975.rst:27: WARNING: Block quote ends without a blank line; unexpected unindent. .../Documentation/hwmon/mp2975.rst:69: WARNING: Unexpected indentation. .../Documentation/hwmon/mp2975.rst:70: WARNING: Block quote ends without a blank line; unexpected unindent. .../Documentation/hwmon/mp2975.rst:72: WARNING: Bullet list ends without a blank line; unexpected unindent. .../Documentation/hwmon/mp2975.rst: WARNING: document isn't included in any toctree List blocks should have blank lines before and after them, in order to be properly parsed. Fixes: 4beb7a028e9f ("hwmon: (pmbus) Add support for MPS Multi-phase mp2975 controller") Signed-off-by: Mauro Carvalho Chehab Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/b02f98d886ab1f5af233f8999c7a15529fc52cdc.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/hwmon/index.rst | 1 + Documentation/hwmon/mp2975.rst | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst index e6b91ab12978..b797db738225 100644 --- a/Documentation/hwmon/index.rst +++ b/Documentation/hwmon/index.rst @@ -132,6 +132,7 @@ Hardware Monitoring Kernel Drivers mcp3021 menf21bmc mlxreg-fan + mp2975 nct6683 nct6775 nct7802 diff --git a/Documentation/hwmon/mp2975.rst b/Documentation/hwmon/mp2975.rst index 5b0609c62f48..81d816b71490 100644 --- a/Documentation/hwmon/mp2975.rst +++ b/Documentation/hwmon/mp2975.rst @@ -20,6 +20,7 @@ This driver implements support for Monolithic Power Systems, Inc. (MPS) vendor dual-loop, digital, multi-phase controller MP2975. This device: + - Supports up to two power rail. - Provides 8 pulse-width modulations (PWMs), and can be configured up to 8-phase operation for rail 1 and up to 4-phase operation for rail @@ -32,10 +33,12 @@ This device: 10-mV DAC, IMVP9 mode with 5-mV DAC. Device supports: + - SVID interface. - AVSBus interface. Device complaint with: + - PMBus rev 1.3 interface. Device supports direct format for reading output current, output voltage, @@ -45,11 +48,14 @@ Device supports VID and direct formats for reading output voltage. The below VID modes are supported: VR12, VR13, IMVP9. The driver provides the next attributes for the current: + - for current in: input, maximum alarm; - for current out input, maximum alarm and highest values; - for phase current: input and label. -attributes. + attributes. + The driver exports the following attributes via the 'sysfs' files, where + - 'n' is number of telemetry pages (from 1 to 2); - 'k' is number of configured phases (from 1 to 8); - indexes 1, 1*n for "iin"; @@ -65,11 +71,14 @@ The driver exports the following attributes via the 'sysfs' files, where **curr[1-{2n+k}]_label** The driver provides the next attributes for the voltage: + - for voltage in: input, high critical threshold, high critical alarm, all only from page 0; - for voltage out: input, low and high critical thresholds, low and high critical alarms, from pages 0 and 1; + The driver exports the following attributes via the 'sysfs' files, where + - 'n' is number of telemetry pages (from 1 to 2); - indexes 1 for "iin"; - indexes n+1, n+2 for "vout"; @@ -87,9 +96,12 @@ The driver exports the following attributes via the 'sysfs' files, where **in[2-{n+1}1_lcrit_alarm** The driver provides the next attributes for the power: + - for power in alarm and input. - for power out: highest and input. + The driver exports the following attributes via the 'sysfs' files, where + - 'n' is number of telemetry pages (from 1 to 2); - indexes 1 for "pin"; - indexes n+1, n+2 for "pout"; From de39012afa7fd119fd6387c11a6ce33590717866 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:12 +0100 Subject: [PATCH 12/75] docs: userspace-api: add iommu.rst to the index file There's a new uAPI doc for IOMMU. Add it to the index file. Should address this warning: .../Documentation/userspace-api/iommu.rst: WARNING: document isn't included in any toctree Fixes: d0023e3ee28d ("docs: IOMMU user API") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/cc55219a551e29848e2282cd8939a4115067234c.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/userspace-api/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index 69fc5167e648..acd2cc2a538d 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -22,6 +22,7 @@ place where this information is gathered. spec_ctrl accelerators/ocxl ioctl/index + iommu media/index .. only:: subproject and html From 6a6223ec7779dfdabb9c2567bb42079bc300cf27 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:13 +0100 Subject: [PATCH 13/75] blk-mq: docs: add kernel-doc description for a new struct member As reported by kernel-doc: ./include/linux/blk-mq.h:267: warning: Function parameter or member 'active_queues_shared_sbitmap' not described in 'blk_mq_tag_set' There is now a new member for struct blk_mq_tag_set. Add a description for it, based on the commit that introduced it. Fixes: f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jens Axboe Reviewed-by: John Garry Link: https://lore.kernel.org/r/8e513153b83eefc05e358f51f2632b592c3f6772.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b23eeca4d677..794b2a33a2c3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -235,6 +235,8 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. + * @active_queues_shared_sbitmap: + * number of active request queues per tag set. * @__bitmap_tags: A shared tags sbitmap, used over all hctx's * @__breserved_tags: * A shared reserved tags sbitmap, used over all hctx's From 89b422354409c275e898d26607201797cc05a932 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:17 +0100 Subject: [PATCH 14/75] mm: pagemap.h: fix two kernel-doc markups Changeset a8cf7f272b5a ("mm: add find_lock_head") renamed the index parameter, but forgot to update the kernel-doc markups accordingly. Fixes: a8cf7f272b5a ("mm: add find_lock_head") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/dce89b296a4f5f9f8f798d5e76b6736c14a916ac.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/pagemap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c77b7c31b2e4..e1e19c1f9ec9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -344,9 +344,9 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, /** * find_lock_page - locate, pin and lock a pagecache page * @mapping: the address_space to search - * @offset: the page index + * @index: the page index * - * Looks up the page cache entry at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, it is returned locked and with an increased * refcount. * @@ -363,9 +363,9 @@ static inline struct page *find_lock_page(struct address_space *mapping, /** * find_lock_head - Locate, pin and lock a pagecache page. * @mapping: The address_space to search. - * @offset: The page index. + * @index: The page index. * - * Looks up the page cache entry at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, its head page is returned locked and with an increased * refcount. * From e86c6569c588a01f20e7554cc245f8fae831957b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:18 +0100 Subject: [PATCH 15/75] net: phy: remove kernel-doc duplication Sphinx 3 now checks for duplicated function declarations: .../Documentation/networking/kapi:143: ../include/linux/phy.h:163: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'unsigned int phy_supported_speeds (struct phy_device *phy, unsigned int *speeds, unsigned int size)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1034: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1076: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int __phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1088: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1100: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int __phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'. It turns that both the C and the H files have the same kernel-doc markup for the same functions. Let's drop the at the header file, keeping the one closer to the code. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/75e9a357f9a716833d2094b04898754876365e68.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/phy.h | 40 +++++----------------------------------- 1 file changed, 5 insertions(+), 35 deletions(-) diff --git a/include/linux/phy.h b/include/linux/phy.h index eb3cb1a98b45..56563e5e0dc7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -147,16 +147,8 @@ typedef enum { PHY_INTERFACE_MODE_MAX, } phy_interface_t; -/** +/* * phy_supported_speeds - return all speeds currently supported by a PHY device - * @phy: The PHY device to return supported speeds of. - * @speeds: buffer to store supported speeds in. - * @size: size of speeds buffer. - * - * Description: Returns the number of supported speeds, and fills - * the speeds buffer with the supported speeds. If speeds buffer is - * too small to contain all currently supported speeds, will return as - * many speeds as can fit. */ unsigned int phy_supported_speeds(struct phy_device *phy, unsigned int *speeds, @@ -1022,14 +1014,9 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, regnum, mask, set); } -/** +/* * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for phy_read(); */ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); @@ -1064,38 +1051,21 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); __ret; \ }) -/** +/* * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for __phy_read(); */ int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); -/** +/* * phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to write to - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for phy_write(); */ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); -/** +/* * __phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to write to - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for __phy_write(); */ int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); From e930c39e098499702c23facb8cd180e35a24e9df Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:19 +0100 Subject: [PATCH 16/75] crypto: sun8x-ce*: update entries to its documentation The README file was converted to ReST format. Update the references for it accordingly. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/e3122e9575769bcf74bc2bdef08755209cb51971.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c | 2 +- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c | 2 +- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index fa2f1b4fad7b..a94bf28f858a 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -7,7 +7,7 @@ * * This file add support for MD5 and SHA1/SHA224/SHA256/SHA384/SHA512. * - * You could find the datasheet in Documentation/arm/sunxi/README + * You could find the datasheet in Documentation/arm/sunxi.rst */ #include #include diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c index 78503006949c..cfde9ee4356b 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c @@ -7,7 +7,7 @@ * * This file handle the PRNG * - * You could find a link for the datasheet in Documentation/arm/sunxi/README + * You could find a link for the datasheet in Documentation/arm/sunxi.rst */ #include "sun8i-ce.h" #include diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c index 654328160d19..5b7af4498bd5 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c @@ -7,7 +7,7 @@ * * This file handle the TRNG * - * You could find a link for the datasheet in Documentation/arm/sunxi/README + * You could find a link for the datasheet in Documentation/arm/sunxi.rst */ #include "sun8i-ce.h" #include From 4b7560c5e4bda24fcbd54337c87056833819c73b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:20 +0100 Subject: [PATCH 17/75] ice: docs fix a devlink info that broke a table Changeset 410d06879c01 ("ice: add the DDP Track ID to devlink info") added description for a new devlink field, but forgot to add one of its columns, causing it to break: .../Documentation/networking/devlink/ice.rst:15: WARNING: Error parsing content block for the "list-table" directive: uniform two-level bullet list expected, but row 11 does not contain the same number of items as row 1 (3 vs 4). .. list-table:: devlink info versions implemented :widths: 5 5 5 90 ... * - ``fw.app.bundle_id`` - 0xc0000001 - Unique identifier for the DDP package loaded in the device. Also referred to as the DDP Track ID. Can be used to uniquely identify the specific DDP package. Add the type field to the ``fw.app.bundle_id`` row. Fixes: 410d06879c01 ("ice: add the DDP Track ID to devlink info") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/84ae28bda1987284033966b7b56a4b27ae40713b.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/networking/devlink/ice.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst index b165181d5d4d..a432dc419fa4 100644 --- a/Documentation/networking/devlink/ice.rst +++ b/Documentation/networking/devlink/ice.rst @@ -70,6 +70,7 @@ The ``ice`` driver reports the following versions that both the name (as reported by ``fw.app.name``) and version are required to uniquely identify the package. * - ``fw.app.bundle_id`` + - running - 0xc0000001 - Unique identifier for the DDP package loaded in the device. Also referred to as the DDP Track ID. Can be used to uniquely identify From ef900cccb864d72292b6f5564850d157036905ea Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:21 +0100 Subject: [PATCH 18/75] MAINTAINERS: fix broken doc refs due to yaml conversion Several *.txt files got converted to yaml. Update their references at MAINTAINERS file accordingly. Signed-off-by: Mauro Carvalho Chehab Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/3b58afec5195d4ea505ea9b3f74d53f7abed4e6f.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/devicetree/bindings/clock/hi6220-clock.txt | 2 +- MAINTAINERS | 9 ++++----- .../devicetree/bindings/net/wireless/silabs,wfx.yaml | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/hi6220-clock.txt b/Documentation/devicetree/bindings/clock/hi6220-clock.txt index ef3deb7b86ea..17ac4a3dd26a 100644 --- a/Documentation/devicetree/bindings/clock/hi6220-clock.txt +++ b/Documentation/devicetree/bindings/clock/hi6220-clock.txt @@ -4,7 +4,7 @@ Clock control registers reside in different Hi6220 system controllers, please refer the following document to know more about the binding rules for these system controllers: -Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt +Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml Required Properties: diff --git a/MAINTAINERS b/MAINTAINERS index e73636b75f29..4511501cd59c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -978,7 +978,7 @@ M: Michael Hennerich L: linux-iio@vger.kernel.org S: Supported W: http://ez.analog.com/community/linux-device-drivers -F: Documentation/devicetree/bindings/iio/adc/adi,ad7768-1.txt +F: Documentation/devicetree/bindings/iio/adc/adi,ad7768-1.yaml F: drivers/iio/adc/ad7768-1.c ANALOG DEVICES INC AD7780 DRIVER @@ -3860,7 +3860,7 @@ M: Roger Quadros L: linux-usb@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git -F: Documentation/devicetree/bindings/usb/cdns-usb3.txt +F: Documentation/devicetree/bindings/usb/cdns,usb3.yaml F: drivers/usb/cdns3/ CADET FM/AM RADIO RECEIVER DRIVER @@ -7920,7 +7920,7 @@ HISILICON LPC BUS DRIVER M: john.garry@huawei.com S: Maintained W: http://www.hisilicon.com -F: Documentation/devicetree/bindings/arm/hisilicon/hisilicon-low-pin-count.txt +F: Documentation/devicetree/bindings/arm/hisilicon/low-pin-count.yaml F: drivers/bus/hisi_lpc.c HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3) @@ -14902,7 +14902,6 @@ RENESAS ETHERNET DRIVERS R: Sergei Shtylyov L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org -F: Documentation/devicetree/bindings/net/renesas,*.txt F: Documentation/devicetree/bindings/net/renesas,*.yaml F: drivers/net/ethernet/renesas/ F: include/linux/sh_eth.h @@ -18106,7 +18105,7 @@ M: Yu Chen M: Binghui Wang L: linux-usb@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/phy/phy-hi3660-usb3.txt +F: Documentation/devicetree/bindings/phy/hisilicon,hi3660-usb3.yaml F: drivers/phy/hisilicon/phy-hi3660-usb3.c USB ISP116X DRIVER diff --git a/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml b/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml index 43b5630c0407..510edd12ed19 100644 --- a/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml +++ b/drivers/staging/wfx/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml @@ -24,7 +24,7 @@ description: In addition, it is recommended to declare a mmc-pwrseq on SDIO host above WFx. Without it, you may encounter issues with warm boot. The mmc-pwrseq should be compatible with mmc-pwrseq-simple. Please consult - Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt for more + Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml for more information. For SPI':' From e3e7439dbc27d99cee40674f243ec616458ddce2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:22 +0100 Subject: [PATCH 19/75] docs: lockdep-design: fix some warning issues There are several warnings caused by a recent change 224ec489d3cd ("lockdep/Documention: Recursive read lock detection reasoning") Those are reported by htmldocs build: Documentation/locking/lockdep-design.rst:429: WARNING: Definition list ends without a blank line; unexpected unindent. Documentation/locking/lockdep-design.rst:452: WARNING: Block quote ends without a blank line; unexpected unindent. Documentation/locking/lockdep-design.rst:453: WARNING: Unexpected indentation. Documentation/locking/lockdep-design.rst:453: WARNING: Blank line required after table. Documentation/locking/lockdep-design.rst:454: WARNING: Block quote ends without a blank line; unexpected unindent. Documentation/locking/lockdep-design.rst:455: WARNING: Unexpected indentation. Documentation/locking/lockdep-design.rst:455: WARNING: Blank line required after table. Documentation/locking/lockdep-design.rst:456: WARNING: Block quote ends without a blank line; unexpected unindent. Documentation/locking/lockdep-design.rst:457: WARNING: Unexpected indentation. Documentation/locking/lockdep-design.rst:457: WARNING: Blank line required after table. Besides the reported issues, there are some missing blank lines that ended producing wrong html output, and some literals are not properly identified. Also, the symbols used at the irq enabled/disable table are not displayed as expected, as they're not literals. Also, on another table they're using a different notation. Fixes: 224ec489d3cd ("lockdep/Documention: Recursive read lock detection reasoning") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/3b9431ac5c01e38111cd59928a93e7259ab7db0f.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/locking/lockdep-design.rst | 51 ++++++++++++++---------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/Documentation/locking/lockdep-design.rst b/Documentation/locking/lockdep-design.rst index cec03bd1294a..9f3cfca9f8a4 100644 --- a/Documentation/locking/lockdep-design.rst +++ b/Documentation/locking/lockdep-design.rst @@ -42,6 +42,7 @@ The validator tracks lock-class usage history and divides the usage into (4 usages * n STATEs + 1) categories: where the 4 usages can be: + - 'ever held in STATE context' - 'ever held as readlock in STATE context' - 'ever held with STATE enabled' @@ -49,10 +50,12 @@ where the 4 usages can be: where the n STATEs are coded in kernel/locking/lockdep_states.h and as of now they include: + - hardirq - softirq where the last 1 category is: + - 'ever used' [ == !unused ] When locking rules are violated, these usage bits are presented in the @@ -96,9 +99,9 @@ exact case is for the lock as of the reporting time. +--------------+-------------+--------------+ | | irq enabled | irq disabled | +--------------+-------------+--------------+ - | ever in irq | ? | - | + | ever in irq | '?' | '-' | +--------------+-------------+--------------+ - | never in irq | + | . | + | never in irq | '+' | '.' | +--------------+-------------+--------------+ The character '-' suggests irq is disabled because if otherwise the @@ -216,7 +219,7 @@ looks like this:: BD_MUTEX_PARTITION }; -mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION); + mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION); In this case the locking is done on a bdev object that is known to be a partition. @@ -334,7 +337,7 @@ Troubleshooting: ---------------- The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes. -Exceeding this number will trigger the following lockdep warning: +Exceeding this number will trigger the following lockdep warning:: (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) @@ -420,7 +423,8 @@ the critical section of another reader of the same lock instance. The difference between recursive readers and non-recursive readers is because: recursive readers get blocked only by a write lock *holder*, while non-recursive -readers could get blocked by a write lock *waiter*. Considering the follow example: +readers could get blocked by a write lock *waiter*. Considering the follow +example:: TASK A: TASK B: @@ -448,20 +452,22 @@ There are simply four block conditions: Block condition matrix, Y means the row blocks the column, and N means otherwise. - | E | r | R | +---+---+---+---+ - E | Y | Y | Y | + | | E | r | R | +---+---+---+---+ - r | Y | Y | N | + | E | Y | Y | Y | + +---+---+---+---+ + | r | Y | Y | N | + +---+---+---+---+ + | R | Y | Y | N | +---+---+---+---+ - R | Y | Y | N | (W: writers, r: non-recursive readers, R: recursive readers) acquired recursively. Unlike non-recursive read locks, recursive read locks only get blocked by current write lock *holders* other than write lock -*waiters*, for example: +*waiters*, for example:: TASK A: TASK B: @@ -491,7 +497,7 @@ Recursive locks don't block each other, while non-recursive locks do (this is even true for two non-recursive read locks). A non-recursive lock can block the corresponding recursive lock, and vice versa. -A deadlock case with recursive locks involved is as follow: +A deadlock case with recursive locks involved is as follow:: TASK A: TASK B: @@ -510,7 +516,7 @@ because there are 3 types for lockers, there are, in theory, 9 types of lock dependencies, but we can show that 4 types of lock dependencies are enough for deadlock detection. -For each lock dependency: +For each lock dependency:: L1 -> L2 @@ -525,20 +531,25 @@ same types). With the above combination for simplification, there are 4 types of dependency edges in the lockdep graph: -1) -(ER)->: exclusive writer to recursive reader dependency, "X -(ER)-> Y" means +1) -(ER)->: + exclusive writer to recursive reader dependency, "X -(ER)-> Y" means X -> Y and X is a writer and Y is a recursive reader. -2) -(EN)->: exclusive writer to non-recursive locker dependency, "X -(EN)-> Y" means +2) -(EN)->: + exclusive writer to non-recursive locker dependency, "X -(EN)-> Y" means X -> Y and X is a writer and Y is either a writer or non-recursive reader. -3) -(SR)->: shared reader to recursive reader dependency, "X -(SR)-> Y" means +3) -(SR)->: + shared reader to recursive reader dependency, "X -(SR)-> Y" means X -> Y and X is a reader (recursive or not) and Y is a recursive reader. -4) -(SN)->: shared reader to non-recursive locker dependency, "X -(SN)-> Y" means +4) -(SN)->: + shared reader to non-recursive locker dependency, "X -(SN)-> Y" means X -> Y and X is a reader (recursive or not) and Y is either a writer or non-recursive reader. -Note that given two locks, they may have multiple dependencies between them, for example: +Note that given two locks, they may have multiple dependencies between them, +for example:: TASK A: @@ -592,11 +603,11 @@ circles that won't cause deadlocks. Proof for sufficiency (Lemma 1): -Let's say we have a strong circle: +Let's say we have a strong circle:: L1 -> L2 ... -> Ln -> L1 -, which means we have dependencies: +, which means we have dependencies:: L1 -> L2 L2 -> L3 @@ -633,7 +644,7 @@ a lock held by P2, and P2 is waiting for a lock held by P3, ... and Pn is waitin for a lock held by P1. Let's name the lock Px is waiting as Lx, so since P1 is waiting for L1 and holding Ln, so we will have Ln -> L1 in the dependency graph. Similarly, we have L1 -> L2, L2 -> L3, ..., Ln-1 -> Ln in the dependency graph, which means we -have a circle: +have a circle:: Ln -> L1 -> L2 -> ... -> Ln From cf38cc9f1e71151f22584c40357afaab6609384b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:23 +0100 Subject: [PATCH 20/75] locking/refcount: move kernel-doc markups to the proper place Changeset a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value") added a set of functions starting with __ that have a new parameter, adding a series of new warnings: $ ./scripts/kernel-doc -none include/linux/refcount.h include/linux/refcount.h:169: warning: Function parameter or member 'oldp' not described in '__refcount_add_not_zero' include/linux/refcount.h:208: warning: Function parameter or member 'oldp' not described in '__refcount_add' include/linux/refcount.h:239: warning: Function parameter or member 'oldp' not described in '__refcount_inc_not_zero' include/linux/refcount.h:261: warning: Function parameter or member 'oldp' not described in '__refcount_inc' include/linux/refcount.h:291: warning: Function parameter or member 'oldp' not described in '__refcount_sub_and_test' include/linux/refcount.h:327: warning: Function parameter or member 'oldp' not described in '__refcount_dec_and_test' include/linux/refcount.h:347: warning: Function parameter or member 'oldp' not described in '__refcount_dec' The issue is that the kernel-doc markups are now misplaced, as they should be added just before the functions. So, move the kernel-doc markups to the proper places, in order to drop the warnings. It should be noticed that git show produces a crappy output, for this patch without "--patience" flag. Fixes: a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/7985c31d1ace591bc5e1faa05c367f1295b78afd.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/refcount.h | 150 +++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 7fabb1af18e0..497990c69b0b 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -147,24 +147,6 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -/** - * refcount_add_not_zero - add a value to a refcount unless it is 0 - * @i: the value to add to the refcount - * @r: the refcount - * - * Will saturate at REFCOUNT_SATURATED and WARN. - * - * Provides no memory ordering, it is assumed the caller has guaranteed the - * object memory to be stable (RCU, etc.). It does provide a control dependency - * and thereby orders future stores. See the comment on top. - * - * Use of this function is not recommended for the normal reference counting - * use case in which references are taken and released one at a time. In these - * cases, refcount_inc(), or one of its variants, should instead be used to - * increment a reference count. - * - * Return: false if the passed refcount is 0, true otherwise - */ static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -183,11 +165,42 @@ static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, in return old; } +/** + * refcount_add_not_zero - add a value to a refcount unless it is 0 + * @i: the value to add to the refcount + * @r: the refcount + * + * Will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + * + * Return: false if the passed refcount is 0, true otherwise + */ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) { return __refcount_add_not_zero(i, r, NULL); } +static inline void __refcount_add(int i, refcount_t *r, int *oldp) +{ + int old = atomic_fetch_add_relaxed(i, &r->refs); + + if (oldp) + *oldp = old; + + if (unlikely(!old)) + refcount_warn_saturate(r, REFCOUNT_ADD_UAF); + else if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_OVF); +} + /** * refcount_add - add a value to a refcount * @i: the value to add to the refcount @@ -204,24 +217,16 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. */ -static inline void __refcount_add(int i, refcount_t *r, int *oldp) -{ - int old = atomic_fetch_add_relaxed(i, &r->refs); - - if (oldp) - *oldp = old; - - if (unlikely(!old)) - refcount_warn_saturate(r, REFCOUNT_ADD_UAF); - else if (unlikely(old < 0 || old + i < 0)) - refcount_warn_saturate(r, REFCOUNT_ADD_OVF); -} - static inline void refcount_add(int i, refcount_t *r) { __refcount_add(i, r, NULL); } +static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero(1, r, oldp); +} + /** * refcount_inc_not_zero - increment a refcount unless it is 0 * @r: the refcount to increment @@ -235,16 +240,16 @@ static inline void refcount_add(int i, refcount_t *r) * * Return: true if the increment was successful, false otherwise */ -static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) -{ - return __refcount_add_not_zero(1, r, oldp); -} - static inline __must_check bool refcount_inc_not_zero(refcount_t *r) { return __refcount_inc_not_zero(r, NULL); } +static inline void __refcount_inc(refcount_t *r, int *oldp) +{ + __refcount_add(1, r, oldp); +} + /** * refcount_inc - increment a refcount * @r: the refcount to increment @@ -257,16 +262,29 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) * Will WARN if the refcount is 0, as this represents a possible use-after-free * condition. */ -static inline void __refcount_inc(refcount_t *r, int *oldp) -{ - __refcount_add(1, r, oldp); -} - static inline void refcount_inc(refcount_t *r) { __refcount_inc(r, NULL); } +static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) +{ + int old = atomic_fetch_sub_release(i, &r->refs); + + if (oldp) + *oldp = old; + + if (old == i) { + smp_acquire__after_ctrl_dep(); + return true; + } + + if (unlikely(old < 0 || old - i < 0)) + refcount_warn_saturate(r, REFCOUNT_SUB_UAF); + + return false; +} + /** * refcount_sub_and_test - subtract from a refcount and test if it is 0 * @i: amount to subtract from the refcount @@ -287,29 +305,16 @@ static inline void refcount_inc(refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ -static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) -{ - int old = atomic_fetch_sub_release(i, &r->refs); - - if (oldp) - *oldp = old; - - if (old == i) { - smp_acquire__after_ctrl_dep(); - return true; - } - - if (unlikely(old < 0 || old - i < 0)) - refcount_warn_saturate(r, REFCOUNT_SUB_UAF); - - return false; -} - static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) { return __refcount_sub_and_test(i, r, NULL); } +static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) +{ + return __refcount_sub_and_test(1, r, oldp); +} + /** * refcount_dec_and_test - decrement a refcount and test if it is 0 * @r: the refcount @@ -323,16 +328,22 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ -static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) -{ - return __refcount_sub_and_test(1, r, oldp); -} - static inline __must_check bool refcount_dec_and_test(refcount_t *r) { return __refcount_dec_and_test(r, NULL); } +static inline void __refcount_dec(refcount_t *r, int *oldp) +{ + int old = atomic_fetch_sub_release(1, &r->refs); + + if (oldp) + *oldp = old; + + if (unlikely(old <= 1)) + refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); +} + /** * refcount_dec - decrement a refcount * @r: the refcount @@ -343,17 +354,6 @@ static inline __must_check bool refcount_dec_and_test(refcount_t *r) * Provides release memory ordering, such that prior loads and stores are done * before. */ -static inline void __refcount_dec(refcount_t *r, int *oldp) -{ - int old = atomic_fetch_sub_release(1, &r->refs); - - if (oldp) - *oldp = old; - - if (unlikely(old <= 1)) - refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); -} - static inline void refcount_dec(refcount_t *r) { __refcount_dec(r, NULL); From 1166eb3d5268e8445ffb9b7bac432dfb293bce1d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:24 +0100 Subject: [PATCH 21/75] IB/srpt: docs: add a description for cq_size member Changeset c804af2c1d31 ("IB/srpt: use new shared CQ mechanism") added a new member for struct srpt_rdma_ch, but didn't add the corresponding kernel-doc markup, as repoted when doing "make htmldocs": ./drivers/infiniband/ulp/srpt/ib_srpt.h:331: warning: Function parameter or member 'cq_size' not described in 'srpt_rdma_ch' Add a description for it. Fixes: c804af2c1d31 ("IB/srpt: use new shared CQ mechanism") Signed-off-by: Mauro Carvalho Chehab Tested-by: Brendan Higgins Reviewed-by: Brendan Higgins Link: https://lore.kernel.org/r/df0e5f0e866b91724299ef569a2da8115e48c0cf.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- drivers/infiniband/ulp/srpt/ib_srpt.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 41435a699b53..bdeb010efee6 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -256,6 +256,7 @@ enum rdma_ch_state { * @rdma_cm: See below. * @rdma_cm.cm_id: RDMA CM ID associated with the channel. * @cq: IB completion queue for this channel. + * @cq_size: Number of CQEs in @cq. * @zw_cqe: Zero-length write CQE. * @rcu: RCU head. * @kref: kref for this channel. From 7c128a249c7e7697b5bd4b0ca27c1f4fe5c64f6a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:25 +0100 Subject: [PATCH 22/75] docs: fs: api-summary.rst: get rid of kernel-doc include The direct-io.c file used to have just two exported symbols: - dio_end_io() - __blockdev_direct_IO() The first one was removed by changeset c33fe275b530 ("fs: remove no longer used dio_end_io()") And the last one is used on most places indirectly, via the inline macro blockdev_direct_IO() provided by fs.h. Yet, neither the macro or the function have kernel-doc markups. So, drop the inclusion of fs/direct-io.c at the docs. Fixes: c33fe275b530 ("fs: remove no longer used dio_end_io()") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/d0a9fffedca102633c168adaf157f34288a4ea67.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/api-summary.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/filesystems/api-summary.rst b/Documentation/filesystems/api-summary.rst index bbb0c1c0e5cf..a94f17d9b836 100644 --- a/Documentation/filesystems/api-summary.rst +++ b/Documentation/filesystems/api-summary.rst @@ -86,9 +86,6 @@ Other Functions .. kernel-doc:: fs/dax.c :export: -.. kernel-doc:: fs/direct-io.c - :export: - .. kernel-doc:: fs/libfs.c :export: From ca766ff0c3b4a19ab2eef9a24161f77668c358c4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:26 +0100 Subject: [PATCH 23/75] drm: amdgpu: kernel-doc: update some adev parameters Running "make htmldocs: produce lots of warnings on those files: ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:177: warning: Excess function parameter 'man' description in 'amdgpu_vram_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:177: warning: Excess function parameter 'p_size' description in 'amdgpu_vram_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:211: warning: Excess function parameter 'man' description in 'amdgpu_vram_mgr_fini' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:177: warning: Excess function parameter 'man' description in 'amdgpu_vram_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:177: warning: Excess function parameter 'p_size' description in 'amdgpu_vram_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:211: warning: Excess function parameter 'man' description in 'amdgpu_vram_mgr_fini' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:177: warning: Excess function parameter 'man' description in 'amdgpu_vram_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:177: warning: Excess function parameter 'p_size' description in 'amdgpu_vram_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:211: warning: Excess function parameter 'man' description in 'amdgpu_vram_mgr_fini' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:177: warning: Excess function parameter 'man' description in 'amdgpu_vram_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:177: warning: Excess function parameter 'p_size' description in 'amdgpu_vram_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:211: warning: Excess function parameter 'man' description in 'amdgpu_vram_mgr_fini' ./drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c:90: warning: Excess function parameter 'man' description in 'amdgpu_gtt_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c:90: warning: Excess function parameter 'p_size' description in 'amdgpu_gtt_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c:134: warning: Excess function parameter 'man' description in 'amdgpu_gtt_mgr_fini' ./drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c:90: warning: Excess function parameter 'man' description in 'amdgpu_gtt_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c:90: warning: Excess function parameter 'p_size' description in 'amdgpu_gtt_mgr_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c:134: warning: Excess function parameter 'man' description in 'amdgpu_gtt_mgr_fini' ./drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:675: warning: Excess function parameter 'dev' description in 'amdgpu_device_asic_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:675: warning: Excess function parameter 'dev' description in 'amdgpu_device_asic_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:675: warning: Excess function parameter 'dev' description in 'amdgpu_device_asic_init' ./drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:675: warning: Excess function parameter 'dev' description in 'amdgpu_device_asic_init' They're related to the repacement of some parameters by adev, and due to a few renamed parameters. While here, uniform the name of the parameter for it to be the same on all functions using a pointer to struct amdgpu_device. Update the kernel-doc documentation accordingly. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/5755c2b361890b8ae5cea0f61dfd70b1c135eefe.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 28 ++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 6 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 7 +++-- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 37da3537ba2e..641c039150eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -705,7 +705,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, /** * amdgpu_invalid_rreg - dummy reg read function * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * @reg: offset of register * * Dummy register read function. Used for register blocks @@ -722,7 +722,7 @@ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) /** * amdgpu_invalid_wreg - dummy reg write function * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * @reg: offset of register * @v: value to write to the register * @@ -739,7 +739,7 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32 /** * amdgpu_invalid_rreg64 - dummy 64 bit reg read function * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * @reg: offset of register * * Dummy register read function. Used for register blocks @@ -756,7 +756,7 @@ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) /** * amdgpu_invalid_wreg64 - dummy reg write function * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * @reg: offset of register * @v: value to write to the register * @@ -773,7 +773,7 @@ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint /** * amdgpu_block_invalid_rreg - dummy reg read function * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * @block: offset of instance * @reg: offset of register * @@ -793,7 +793,7 @@ static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, /** * amdgpu_block_invalid_wreg - dummy reg write function * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * @block: offset of instance * @reg: offset of register * @v: value to write to the register @@ -813,7 +813,7 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, /** * amdgpu_device_asic_init - Wrapper for atom asic_init * - * @dev: drm_device pointer + * @adev: amdgpu_device pointer * * Does any asic specific work and then calls atom asic init. */ @@ -827,7 +827,7 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev) /** * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * * Allocates a scratch page of VRAM for use by various things in the * driver. @@ -844,7 +844,7 @@ static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) /** * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * * Frees the VRAM scratch page. */ @@ -3011,7 +3011,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) /** * amdgpu_device_has_dc_support - check if dc is supported * - * @adev: amdgpu_device_pointer + * @adev: amdgpu_device pointer * * Returns true for supported, false for not supported */ @@ -4045,7 +4045,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) /** * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * @from_hypervisor: request from hypervisor * * do VF FLR and reinitialize Asic @@ -4100,7 +4100,7 @@ error: /** * amdgpu_device_has_job_running - check if there is any job in mirror list * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * * check if there is any job in mirror list */ @@ -4128,7 +4128,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) /** * amdgpu_device_should_recover_gpu - check if we should try GPU recovery * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover * a hung GPU. @@ -4477,7 +4477,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer * @job: which job trigger hang * * Attempt to reset the GPU if it has hung (all asics). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index f203e4a6a3f2..731f3aa2e6ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -81,8 +81,8 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func; /** * amdgpu_gtt_mgr_init - init GTT manager and DRM MM * - * @man: TTM memory type manager - * @p_size: maximum size of GTT + * @adev: amdgpu_device pointer + * @gtt_size: maximum size of GTT * * Allocate and initialize the GTT manager. */ @@ -123,7 +123,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) /** * amdgpu_gtt_mgr_fini - free and destroy GTT manager * - * @man: TTM memory type manager + * @adev: amdgpu_device pointer * * Destroy and free the GTT manager, returns -EBUSY if ranges are still * allocated inside it. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 01c1171afbe0..0c6b7c5ecfec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -168,8 +168,7 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func; /** * amdgpu_vram_mgr_init - init VRAM manager and DRM MM * - * @man: TTM memory type manager - * @p_size: maximum size of VRAM + * @adev: amdgpu_device pointer * * Allocate and initialize the VRAM manager. */ @@ -199,7 +198,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) /** * amdgpu_vram_mgr_fini - free and destroy VRAM manager * - * @man: TTM memory type manager + * @adev: amdgpu_device pointer * * Destroy and free the VRAM manager, returns -EBUSY if ranges are still * allocated inside it. @@ -229,7 +228,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) /** * amdgpu_vram_mgr_vis_size - Calculate visible node size * - * @adev: amdgpu device structure + * @adev: amdgpu_device pointer * @node: MM node structure * * Calculate how many bytes of the MM node are inside visible VRAM From cc507c435b05b23f62ae32c092de2da972a98d8e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:30 +0100 Subject: [PATCH 24/75] gpu: docs: amdgpu.rst: get rid of wrong kernel-doc markups As reported by kernel-doc: ./drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c:1: warning: no structured comments found ./drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:1: warning: no structured comments found Those files only contain /** * DOC: */ markups, but they're included twice there: one to parse such markup, and another one to parse internal functions. In the case of amdgpu_xgmi.c, as it has just one such markup, we can simply include the file once, and let it parse the entire file without passing arguments to kernel-doc. This should place everything altogether. For amdgpu_ras.c, however, we need to remove the kernel-doc with just internal. This should be re-introduced if this file ever gets new non-DOC markups. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/bd070923591ae54f9587e7407b6291ac116952b2.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/gpu/amdgpu.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index 1f9ea8221f80..2062a6023678 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -83,10 +83,6 @@ AMDGPU XGMI Support =================== .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c - :doc: AMDGPU XGMI Support - -.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c - :internal: AMDGPU RAS Support ================== @@ -124,9 +120,6 @@ RAS VRAM Bad Pages sysfs Interface .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c :doc: AMDGPU RAS sysfs gpu_vram_bad_pages Interface -.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c - :internal: - Sample Code ----------- Sample code for testing error injection can be found here: From 9cd70d05132266128ccfa3d61264ef061568f049 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:32 +0100 Subject: [PATCH 25/75] drm: amdgpu_dm: fix a typo dm_comressor_info -> dm_compressor_info The kernel-doc markup is right, but the struct itself and their references contain a typo. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/9de495fa791596609eb2e73ba71cea99e09b2689.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e2b23486ba4c..373b8481f76c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -583,7 +583,7 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = drm_to_adev(dev); - struct dm_comressor_info *compressor = &adev->dm.compressor; + struct dm_compressor_info *compressor = &adev->dm.compressor; struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector); struct drm_display_mode *mode; unsigned long max_size = 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 34f6369bf51f..a8a0e8cb1a11 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -86,7 +86,7 @@ struct irq_list_head { * @bo_ptr: Pointer to the buffer object * @gpu_addr: MMIO gpu addr */ -struct dm_comressor_info { +struct dm_compressor_info { void *cpu_addr; struct amdgpu_bo *bo_ptr; uint64_t gpu_addr; @@ -148,7 +148,7 @@ struct amdgpu_dm_backlight_caps { * @soc_bounding_box: SOC bounding box values provided by gpu_info FW * @cached_state: Caches device atomic state for suspend/resume * @cached_dc_state: Cached state of content streams - * @compressor: Frame buffer compression buffer. See &struct dm_comressor_info + * @compressor: Frame buffer compression buffer. See &struct dm_compressor_info * @force_timing_sync: set via debugfs. When set, indicates that all connected * displays will be forced to synchronize. */ @@ -324,7 +324,7 @@ struct amdgpu_display_manager { struct drm_atomic_state *cached_state; struct dc_state *cached_dc_state; - struct dm_comressor_info compressor; + struct dm_compressor_info compressor; const struct firmware *fw_dmcu; uint32_t dmcu_fw_version; From d2692abd6fa9866fda3052efa5cbd116b9fec56b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:33 +0100 Subject: [PATCH 26/75] selftests: kselftest_harness.h: fix kernel-doc markups The kernel-doc markups there is violating the expected syntax, causing it to not parse the name of the markup identifier properly, preventing it to check if the kernel-doc matches the #define below each markup. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/697640045663f1366beb15e76e78b420dac5f5a2.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- tools/testing/selftests/kselftest_harness.h | 44 ++++++++++----------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index f19804df244c..d747d6b1da1a 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -432,7 +432,7 @@ */ /** - * ASSERT_EQ(expected, seen) + * ASSERT_EQ() * * @expected: expected value * @seen: measured value @@ -443,7 +443,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 1) /** - * ASSERT_NE(expected, seen) + * ASSERT_NE() * * @expected: expected value * @seen: measured value @@ -454,7 +454,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 1) /** - * ASSERT_LT(expected, seen) + * ASSERT_LT() * * @expected: expected value * @seen: measured value @@ -465,7 +465,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 1) /** - * ASSERT_LE(expected, seen) + * ASSERT_LE() * * @expected: expected value * @seen: measured value @@ -476,7 +476,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 1) /** - * ASSERT_GT(expected, seen) + * ASSERT_GT() * * @expected: expected value * @seen: measured value @@ -487,7 +487,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 1) /** - * ASSERT_GE(expected, seen) + * ASSERT_GE() * * @expected: expected value * @seen: measured value @@ -498,7 +498,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 1) /** - * ASSERT_NULL(seen) + * ASSERT_NULL() * * @seen: measured value * @@ -508,7 +508,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 1) /** - * ASSERT_TRUE(seen) + * ASSERT_TRUE() * * @seen: measured value * @@ -518,7 +518,7 @@ __EXPECT(0, "0", seen, #seen, !=, 1) /** - * ASSERT_FALSE(seen) + * ASSERT_FALSE() * * @seen: measured value * @@ -528,7 +528,7 @@ __EXPECT(0, "0", seen, #seen, ==, 1) /** - * ASSERT_STREQ(expected, seen) + * ASSERT_STREQ() * * @expected: expected value * @seen: measured value @@ -539,7 +539,7 @@ __EXPECT_STR(expected, seen, ==, 1) /** - * ASSERT_STRNE(expected, seen) + * ASSERT_STRNE() * * @expected: expected value * @seen: measured value @@ -550,7 +550,7 @@ __EXPECT_STR(expected, seen, !=, 1) /** - * EXPECT_EQ(expected, seen) + * EXPECT_EQ() * * @expected: expected value * @seen: measured value @@ -561,7 +561,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 0) /** - * EXPECT_NE(expected, seen) + * EXPECT_NE() * * @expected: expected value * @seen: measured value @@ -572,7 +572,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 0) /** - * EXPECT_LT(expected, seen) + * EXPECT_LT() * * @expected: expected value * @seen: measured value @@ -583,7 +583,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 0) /** - * EXPECT_LE(expected, seen) + * EXPECT_LE() * * @expected: expected value * @seen: measured value @@ -594,7 +594,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 0) /** - * EXPECT_GT(expected, seen) + * EXPECT_GT() * * @expected: expected value * @seen: measured value @@ -605,7 +605,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 0) /** - * EXPECT_GE(expected, seen) + * EXPECT_GE() * * @expected: expected value * @seen: measured value @@ -616,7 +616,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 0) /** - * EXPECT_NULL(seen) + * EXPECT_NULL() * * @seen: measured value * @@ -626,7 +626,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 0) /** - * EXPECT_TRUE(seen) + * EXPECT_TRUE() * * @seen: measured value * @@ -636,7 +636,7 @@ __EXPECT(0, "0", seen, #seen, !=, 0) /** - * EXPECT_FALSE(seen) + * EXPECT_FALSE() * * @seen: measured value * @@ -646,7 +646,7 @@ __EXPECT(0, "0", seen, #seen, ==, 0) /** - * EXPECT_STREQ(expected, seen) + * EXPECT_STREQ() * * @expected: expected value * @seen: measured value @@ -657,7 +657,7 @@ __EXPECT_STR(expected, seen, ==, 0) /** - * EXPECT_STRNE(expected, seen) + * EXPECT_STRNE() * * @expected: expected value * @seen: measured value From b28d70c6a515580b29ce2be53e585bd86c8b8c8c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:34 +0100 Subject: [PATCH 27/75] amdgpu: fix a few kernel-doc markup issues A kernel-doc markup can't be mixed with a random comment, as it causes parsing problems. While here, change an invalid kernel-doc markup into a common comment. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/e899f50404e94ac9a7c3267dd34f951c1a44fb2b.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 641c039150eb..5dbe93fe884c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -239,9 +239,11 @@ bool amdgpu_device_supports_baco(struct drm_device *dev) return amdgpu_asic_supports_baco(adev); } +/* + * VRAM access helper functions + */ + /** - * VRAM access helper functions. - * * amdgpu_device_vram_access - read/write a buffer in vram * * @adev: amdgpu_device pointer @@ -4497,7 +4499,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, bool need_emergency_restart = false; bool audio_suspended = false; - /** + /* * Special case: RAS triggered and full reset isn't supported */ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); From afc74ce7b484da5c5698d8eb2472a58c547cbc2b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:36 +0100 Subject: [PATCH 28/75] docs: SafeSetID: fix a warning As reported by Sphinx 2.4.4: docs/Documentation/admin-guide/LSM/SafeSetID.rst:110: WARNING: Title underline too short. Note on GID policies and setgroups() ================== Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/4afa281c170daabd1ce522653d5d5d5078ebd92c.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/LSM/SafeSetID.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/LSM/SafeSetID.rst b/Documentation/admin-guide/LSM/SafeSetID.rst index 17996c9070e2..0ec34863c674 100644 --- a/Documentation/admin-guide/LSM/SafeSetID.rst +++ b/Documentation/admin-guide/LSM/SafeSetID.rst @@ -107,7 +107,7 @@ for a UID/GID will prevent that UID/GID from obtaining auxiliary setid privileges, such as allowing a user to set up user namespace UID/GID mappings. Note on GID policies and setgroups() -================== +==================================== In v5.9 we are adding support for limiting CAP_SETGID privileges as was done previously for CAP_SETUID. However, for compatibility with common sandboxing related code conventions in userspace, we currently allow arbitrary From 3ad84246a4097010f3ae3d6944120c0be00e9e7a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 28 Oct 2020 17:46:55 +0100 Subject: [PATCH 29/75] x86/boot/compressed/64: Introduce sev_status Introduce sev_status and initialize it together with sme_me_mask to have an indicator which SEV features are enabled. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Link: https://lkml.kernel.org/r/20201028164659.27002-2-joro@8bytes.org --- arch/x86/boot/compressed/mem_encrypt.S | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index dd07e7b41b11..3092ae173f94 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -81,6 +81,19 @@ SYM_FUNC_START(set_sev_encryption_mask) bts %rax, sme_me_mask(%rip) /* Create the encryption mask */ + /* + * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in + * get_sev_encryption_bit() because this function is 32-bit code and + * shared between 64-bit and 32-bit boot path. + */ + movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */ + rdmsr + + /* Store MSR value in sev_status */ + shlq $32, %rdx + orq %rdx, %rax + movq %rax, sev_status(%rip) + .Lno_sev_mask: movq %rbp, %rsp /* Restore original stack pointer */ @@ -96,5 +109,6 @@ SYM_FUNC_END(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 -SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sev_status, .quad 0) #endif From ed7b895f3efb5df184722f5a30f8164fcaffceb1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 28 Oct 2020 17:46:56 +0100 Subject: [PATCH 30/75] x86/boot/compressed/64: Sanity-check CPUID results in the early #VC handler The early #VC handler which doesn't have a GHCB can only handle CPUID exit codes. It is needed by the early boot code to handle #VC exceptions raised in verify_cpu() and to get the position of the C-bit. But the CPUID information comes from the hypervisor which is untrusted and might return results which trick the guest into the no-SEV boot path with no C-bit set in the page-tables. All data written to memory would then be unencrypted and could leak sensitive data to the hypervisor. Add sanity checks to the early #VC handler to make sure the hypervisor can not pretend that SEV is disabled. [ bp: Massage a bit. ] Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Link: https://lkml.kernel.org/r/20201028164659.27002-3-joro@8bytes.org --- arch/x86/kernel/sev-es-shared.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 5f83ccaab877..7d04b356d44d 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -178,6 +178,32 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) goto fail; regs->dx = val >> 32; + /* + * This is a VC handler and the #VC is only raised when SEV-ES is + * active, which means SEV must be active too. Do sanity checks on the + * CPUID results to make sure the hypervisor does not trick the kernel + * into the no-sev path. This could map sensitive data unencrypted and + * make it accessible to the hypervisor. + * + * In particular, check for: + * - Hypervisor CPUID bit + * - Availability of CPUID leaf 0x8000001f + * - SEV CPUID bit. + * + * The hypervisor might still report the wrong C-bit position, but this + * can't be checked here. + */ + + if ((fn == 1 && !(regs->cx & BIT(31)))) + /* Hypervisor bit */ + goto fail; + else if (fn == 0x80000000 && (regs->ax < 0x8000001f)) + /* SEV leaf check */ + goto fail; + else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) + /* SEV bit */ + goto fail; + /* Skip over the CPUID two-byte opcode */ regs->ip += 2; From 86ce43f7dde81562f58b24b426cef068bd9f7595 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 28 Oct 2020 17:46:57 +0100 Subject: [PATCH 31/75] x86/boot/compressed/64: Check SEV encryption in 64-bit boot-path Check whether the hypervisor reported the correct C-bit when running as an SEV guest. Using a wrong C-bit position could be used to leak sensitive data from the guest to the hypervisor. The check function is in a separate file: arch/x86/kernel/sev_verify_cbit.S so that it can be re-used in the running kernel image. [ bp: Massage. ] Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Link: https://lkml.kernel.org/r/20201028164659.27002-4-joro@8bytes.org --- arch/x86/boot/compressed/ident_map_64.c | 1 + arch/x86/boot/compressed/mem_encrypt.S | 4 ++ arch/x86/boot/compressed/misc.h | 2 + arch/x86/kernel/sev_verify_cbit.S | 89 +++++++++++++++++++++++++ 4 files changed, 96 insertions(+) create mode 100644 arch/x86/kernel/sev_verify_cbit.S diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index a5e5db6ada3c..39b2eded7bc2 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -164,6 +164,7 @@ void initialize_identity_maps(void *rmode) add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); /* Load the new page-table. */ + sev_verify_cbit(top_level_pgt); write_cr3(top_level_pgt); } diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index 3092ae173f94..aa561795efd1 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -68,6 +68,9 @@ SYM_FUNC_START(get_sev_encryption_bit) SYM_FUNC_END(get_sev_encryption_bit) .code64 + +#include "../../kernel/sev_verify_cbit.S" + SYM_FUNC_START(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT push %rbp @@ -111,4 +114,5 @@ SYM_FUNC_END(set_sev_encryption_mask) .balign 8 SYM_DATA(sme_me_mask, .quad 0) SYM_DATA(sev_status, .quad 0) +SYM_DATA(sev_check_data, .quad 0) #endif diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 6d31f1b4c4d1..d9a631c5973c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -159,4 +159,6 @@ void boot_page_fault(void); void boot_stage1_vc(void); void boot_stage2_vc(void); +unsigned long sev_verify_cbit(unsigned long cr3); + #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S new file mode 100644 index 000000000000..ee04941a6546 --- /dev/null +++ b/arch/x86/kernel/sev_verify_cbit.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * sev_verify_cbit.S - Code for verification of the C-bit position reported + * by the Hypervisor when running with SEV enabled. + * + * Copyright (c) 2020 Joerg Roedel (jroedel@suse.de) + * + * sev_verify_cbit() is called before switching to a new long-mode page-table + * at boot. + * + * Verify that the C-bit position is correct by writing a random value to + * an encrypted memory location while on the current page-table. Then it + * switches to the new page-table to verify the memory content is still the + * same. After that it switches back to the current page-table and when the + * check succeeded it returns. If the check failed the code invalidates the + * stack pointer and goes into a hlt loop. The stack-pointer is invalidated to + * make sure no interrupt or exception can get the CPU out of the hlt loop. + * + * New page-table pointer is expected in %rdi (first parameter) + * + */ +SYM_FUNC_START(sev_verify_cbit) +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* First check if a C-bit was detected */ + movq sme_me_mask(%rip), %rsi + testq %rsi, %rsi + jz 3f + + /* sme_me_mask != 0 could mean SME or SEV - Check also for SEV */ + movq sev_status(%rip), %rsi + testq %rsi, %rsi + jz 3f + + /* Save CR4 in %rsi */ + movq %cr4, %rsi + + /* Disable Global Pages */ + movq %rsi, %rdx + andq $(~X86_CR4_PGE), %rdx + movq %rdx, %cr4 + + /* + * Verified that running under SEV - now get a random value using + * RDRAND. This instruction is mandatory when running as an SEV guest. + * + * Don't bail out of the loop if RDRAND returns errors. It is better to + * prevent forward progress than to work with a non-random value here. + */ +1: rdrand %rdx + jnc 1b + + /* Store value to memory and keep it in %rdx */ + movq %rdx, sev_check_data(%rip) + + /* Backup current %cr3 value to restore it later */ + movq %cr3, %rcx + + /* Switch to new %cr3 - This might unmap the stack */ + movq %rdi, %cr3 + + /* + * Compare value in %rdx with memory location. If C-bit is incorrect + * this would read the encrypted data and make the check fail. + */ + cmpq %rdx, sev_check_data(%rip) + + /* Restore old %cr3 */ + movq %rcx, %cr3 + + /* Restore previous CR4 */ + movq %rsi, %cr4 + + /* Check CMPQ result */ + je 3f + + /* + * The check failed, prevent any forward progress to prevent ROP + * attacks, invalidate the stack and go into a hlt loop. + */ + xorq %rsp, %rsp + subq $0x1000, %rsp +2: hlt + jmp 2b +3: +#endif + /* Return page-table pointer */ + movq %rdi, %rax + ret +SYM_FUNC_END(sev_verify_cbit) From c9f09539e16e281f92a27760fdfae71e8af036f6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 28 Oct 2020 17:46:58 +0100 Subject: [PATCH 32/75] x86/head/64: Check SEV encryption before switching to kernel page-table When SEV is enabled, the kernel requests the C-bit position again from the hypervisor to build its own page-table. Since the hypervisor is an untrusted source, the C-bit position needs to be verified before the kernel page-table is used. Call sev_verify_cbit() before writing the CR3. [ bp: Massage. ] Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Link: https://lkml.kernel.org/r/20201028164659.27002-5-joro@8bytes.org --- arch/x86/kernel/head_64.S | 16 ++++++++++++++++ arch/x86/mm/mem_encrypt.c | 1 + 2 files changed, 17 insertions(+) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7eb2a1c87969..3c417734790f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -161,6 +161,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) /* Setup early boot stage 4-/5-level pagetables. */ addq phys_base(%rip), %rax + + /* + * For SEV guests: Verify that the C-bit is correct. A malicious + * hypervisor could lie about the C-bit position to perform a ROP + * attack on the guest by writing to the unencrypted stack and wait for + * the next RET instruction. + * %rsi carries pointer to realmode data and is callee-clobbered. Save + * and restore it. + */ + pushq %rsi + movq %rax, %rdi + call sev_verify_cbit + popq %rsi + + /* Switch to new page-table */ movq %rax, %cr3 /* Ensure I am executing from virtual addresses */ @@ -279,6 +294,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" +#include "sev_verify_cbit.S" #ifdef CONFIG_HOTPLUG_CPU /* diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index efbb3de472df..bc0833713be9 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -39,6 +39,7 @@ */ u64 sme_me_mask __section(".data") = 0; u64 sev_status __section(".data") = 0; +u64 sev_check_data __section(".data") = 0; EXPORT_SYMBOL(sme_me_mask); DEFINE_STATIC_KEY_FALSE(sev_enable_key); EXPORT_SYMBOL_GPL(sev_enable_key); From 2411cd82112397bfb9d8f0f19cd46c3d71e0ce67 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 28 Oct 2020 17:46:59 +0100 Subject: [PATCH 33/75] x86/sev-es: Do not support MMIO to/from encrypted memory MMIO memory is usually not mapped encrypted, so there is no reason to support emulated MMIO when it is mapped encrypted. Prevent a possible hypervisor attack where a RAM page is mapped as an MMIO page in the nested page-table, so that any guest access to it will trigger a #VC exception and leak the data on that page to the hypervisor via the GHCB (like with valid MMIO). On the read side this attack would allow the HV to inject data into the guest. Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Link: https://lkml.kernel.org/r/20201028164659.27002-6-joro@8bytes.org --- arch/x86/kernel/sev-es.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 4a96726fbaf8..0bd1a0fc587e 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -374,8 +374,8 @@ fault: return ES_EXCEPTION; } -static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, - unsigned long vaddr, phys_addr_t *paddr) +static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + unsigned long vaddr, phys_addr_t *paddr) { unsigned long va = (unsigned long)vaddr; unsigned int level; @@ -394,15 +394,19 @@ static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, if (user_mode(ctxt->regs)) ctxt->fi.error_code |= X86_PF_USER; - return false; + return ES_EXCEPTION; } + if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) + /* Emulated MMIO to/from encrypted memory not supported */ + return ES_UNSUPPORTED; + pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; pa |= va & ~page_level_mask(level); *paddr = pa; - return true; + return ES_OK; } /* Include code shared with pre-decompression boot stage */ @@ -731,6 +735,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, { u64 exit_code, exit_info_1, exit_info_2; unsigned long ghcb_pa = __pa(ghcb); + enum es_result res; phys_addr_t paddr; void __user *ref; @@ -740,11 +745,12 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; - if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr)) { - if (!read) + res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); + if (res != ES_OK) { + if (res == ES_EXCEPTION && !read) ctxt->fi.error_code |= X86_PF_WRITE; - return ES_EXCEPTION; + return res; } exit_info_1 = paddr; From 495023e4e49e4b7dee35928800bf0317276576c1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 2 Nov 2020 11:54:22 +0100 Subject: [PATCH 34/75] of: Drop superfluous ULL suffix for ~0 There is no need to specify a "ULL" suffix for "all bits set": "~0" is sufficient, and works regardless of type. In fact adding the suffix makes the code more fragile. Fixes: 48ab6d5d1f09 ("dma-mapping: fix 32-bit overflow with CONFIG_ARM_LPAE=n") Suggested-by: Linus Torvalds Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- drivers/of/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index 3a469c79e6b0..aedfaaafd3e7 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -112,7 +112,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, u64 dma_end = 0; /* Determine the overall bounds of all DMA regions */ - for (dma_start = ~0ULL; r->size; r++) { + for (dma_start = ~0; r->size; r++) { /* Take lower and upper limits */ if (r->dma_start < dma_start) dma_start = r->dma_start; From 46b1ee38b2ba1a9524c8e886ad078bd3ca40de2a Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Sun, 1 Nov 2020 17:07:23 -0800 Subject: [PATCH 35/75] mm/mremap_pages: fix static key devmap_managed_key updates commit 6f42193fd86e ("memremap: don't use a separate devm action for devmap_managed_enable_get") changed the static key updates such that we now call devmap_managed_enable_put() without doing the equivalent devmap_managed_enable_get(). devmap_managed_enable_get() is only called for MEMORY_DEVICE_PRIVATE and MEMORY_DEVICE_FS_DAX, But memunmap_pages() get called for other pgmap types too. This results in the below warning when switching between system-ram and devdax mode for devdax namespace. jump label: negative count! WARNING: CPU: 52 PID: 1335 at kernel/jump_label.c:235 static_key_slow_try_dec+0x88/0xa0 Modules linked in: .... NIP static_key_slow_try_dec+0x88/0xa0 LR static_key_slow_try_dec+0x84/0xa0 Call Trace: static_key_slow_try_dec+0x84/0xa0 __static_key_slow_dec_cpuslocked+0x34/0xd0 static_key_slow_dec+0x54/0xf0 memunmap_pages+0x36c/0x500 devm_action_release+0x30/0x50 release_nodes+0x2f4/0x3e0 device_release_driver_internal+0x17c/0x280 bus_remove_device+0x124/0x210 device_del+0x1d4/0x530 unregister_dev_dax+0x48/0xe0 devm_action_release+0x30/0x50 release_nodes+0x2f4/0x3e0 device_release_driver_internal+0x17c/0x280 unbind_store+0x130/0x170 drv_attr_store+0x40/0x60 sysfs_kf_write+0x6c/0xb0 kernfs_fop_write+0x118/0x280 vfs_write+0xe8/0x2a0 ksys_write+0x84/0x140 system_call_exception+0x120/0x270 system_call_common+0xf0/0x27c Reported-by: Aneesh Kumar K.V Signed-off-by: Ralph Campbell Signed-off-by: Andrew Morton Tested-by: Sachin Sant Reviewed-by: Aneesh Kumar K.V Reviewed-by: Ira Weiny Reviewed-by: Christoph Hellwig Cc: Dan Williams Cc: Jason Gunthorpe Link: https://lkml.kernel.org/r/20201023183222.13186-1-rcampbell@nvidia.com Signed-off-by: Linus Torvalds --- mm/memremap.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/mm/memremap.c b/mm/memremap.c index 73a206d0f645..16b2fb482da1 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -41,28 +41,24 @@ EXPORT_SYMBOL_GPL(memremap_compat_align); DEFINE_STATIC_KEY_FALSE(devmap_managed_key); EXPORT_SYMBOL(devmap_managed_key); -static void devmap_managed_enable_put(void) +static void devmap_managed_enable_put(struct dev_pagemap *pgmap) { - static_branch_dec(&devmap_managed_key); + if (pgmap->type == MEMORY_DEVICE_PRIVATE || + pgmap->type == MEMORY_DEVICE_FS_DAX) + static_branch_dec(&devmap_managed_key); } -static int devmap_managed_enable_get(struct dev_pagemap *pgmap) +static void devmap_managed_enable_get(struct dev_pagemap *pgmap) { - if (pgmap->type == MEMORY_DEVICE_PRIVATE && - (!pgmap->ops || !pgmap->ops->page_free)) { - WARN(1, "Missing page_free method\n"); - return -EINVAL; - } - - static_branch_inc(&devmap_managed_key); - return 0; + if (pgmap->type == MEMORY_DEVICE_PRIVATE || + pgmap->type == MEMORY_DEVICE_FS_DAX) + static_branch_inc(&devmap_managed_key); } #else -static int devmap_managed_enable_get(struct dev_pagemap *pgmap) +static void devmap_managed_enable_get(struct dev_pagemap *pgmap) { - return -EINVAL; } -static void devmap_managed_enable_put(void) +static void devmap_managed_enable_put(struct dev_pagemap *pgmap) { } #endif /* CONFIG_DEV_PAGEMAP_OPS */ @@ -169,7 +165,7 @@ void memunmap_pages(struct dev_pagemap *pgmap) pageunmap_range(pgmap, i); WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); - devmap_managed_enable_put(); + devmap_managed_enable_put(pgmap); } EXPORT_SYMBOL_GPL(memunmap_pages); @@ -307,7 +303,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) .pgprot = PAGE_KERNEL, }; const int nr_range = pgmap->nr_range; - bool need_devmap_managed = true; int error, i; if (WARN_ONCE(!nr_range, "nr_range must be specified\n")) @@ -323,6 +318,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) WARN(1, "Missing migrate_to_ram method\n"); return ERR_PTR(-EINVAL); } + if (!pgmap->ops->page_free) { + WARN(1, "Missing page_free method\n"); + return ERR_PTR(-EINVAL); + } if (!pgmap->owner) { WARN(1, "Missing owner\n"); return ERR_PTR(-EINVAL); @@ -336,11 +335,9 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } break; case MEMORY_DEVICE_GENERIC: - need_devmap_managed = false; break; case MEMORY_DEVICE_PCI_P2PDMA: params.pgprot = pgprot_noncached(params.pgprot); - need_devmap_managed = false; break; default: WARN(1, "Invalid pgmap type %d\n", pgmap->type); @@ -364,11 +361,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } } - if (need_devmap_managed) { - error = devmap_managed_enable_get(pgmap); - if (error) - return ERR_PTR(error); - } + devmap_managed_enable_get(pgmap); /* * Clear the pgmap nr_range as it will be incremented for each From 79aa925bf239c234be8586780e482872dc4690dd Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Sun, 1 Nov 2020 17:07:27 -0800 Subject: [PATCH 36/75] hugetlb_cgroup: fix reservation accounting Michal Privoznik was using "free page reporting" in QEMU/virtio-balloon with hugetlbfs and hit the warning below. QEMU with free page hinting uses fallocate(FALLOC_FL_PUNCH_HOLE) to discard pages that are reported as free by a VM. The reporting granularity is in pageblock granularity. So when the guest reports 2M chunks, we fallocate(FALLOC_FL_PUNCH_HOLE) one huge page in QEMU. WARNING: CPU: 7 PID: 6636 at mm/page_counter.c:57 page_counter_uncharge+0x4b/0x50 Modules linked in: ... CPU: 7 PID: 6636 Comm: qemu-system-x86 Not tainted 5.9.0 #137 Hardware name: Gigabyte Technology Co., Ltd. X570 AORUS PRO/X570 AORUS PRO, BIOS F21 07/31/2020 RIP: 0010:page_counter_uncharge+0x4b/0x50 ... Call Trace: hugetlb_cgroup_uncharge_file_region+0x4b/0x80 region_del+0x1d3/0x300 hugetlb_unreserve_pages+0x39/0xb0 remove_inode_hugepages+0x1a8/0x3d0 hugetlbfs_fallocate+0x3c4/0x5c0 vfs_fallocate+0x146/0x290 __x64_sys_fallocate+0x3e/0x70 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Investigation of the issue uncovered bugs in hugetlb cgroup reservation accounting. This patch addresses the found issues. Fixes: 075a61d07a8e ("hugetlb_cgroup: add accounting for shared mappings") Reported-by: Michal Privoznik Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Tested-by: Michal Privoznik Reviewed-by: Mina Almasry Acked-by: Michael S. Tsirkin Cc: Cc: David Hildenbrand Cc: Michal Hocko Cc: Muchun Song Cc: "Aneesh Kumar K . V" Cc: Tejun Heo Link: https://lkml.kernel.org/r/20201021204426.36069-1-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fe76f8fd5a73..5a620f690911 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -648,6 +648,8 @@ retry: } del += t - f; + hugetlb_cgroup_uncharge_file_region( + resv, rg, t - f); /* New entry for end of split region */ nrg->from = t; @@ -660,9 +662,6 @@ retry: /* Original entry is trimmed */ rg->to = f; - hugetlb_cgroup_uncharge_file_region( - resv, rg, nrg->to - nrg->from); - list_add(&nrg->link, &rg->link); nrg = NULL; break; @@ -678,17 +677,17 @@ retry: } if (f <= rg->from) { /* Trim beginning of region */ - del += t - rg->from; - rg->from = t; - hugetlb_cgroup_uncharge_file_region(resv, rg, t - rg->from); - } else { /* Trim end of region */ - del += rg->to - f; - rg->to = f; + del += t - rg->from; + rg->from = t; + } else { /* Trim end of region */ hugetlb_cgroup_uncharge_file_region(resv, rg, rg->to - f); + + del += rg->to - f; + rg->to = f; } } @@ -2443,6 +2442,9 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, rsv_adjust = hugepage_subpool_put_pages(spool, 1); hugetlb_acct_memory(h, -rsv_adjust); + if (deferred_reserve) + hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h), + pages_per_huge_page(h), page); } return page; From 7de2e9f195b9cb27583c5c64deaaf5e6afcc163e Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Sun, 1 Nov 2020 17:07:30 -0800 Subject: [PATCH 37/75] mm: memcontrol: correct the NR_ANON_THPS counter of hierarchical memcg memcg_page_state will get the specified number in hierarchical memcg, It should multiply by HPAGE_PMD_NR rather than an page if the item is NR_ANON_THPS. [akpm@linux-foundation.org: fix printk warning] [akpm@linux-foundation.org: use u64 cast, per Michal] Fixes: 468c398233da ("mm: memcontrol: switch to native NR_ANON_THPS counter") Signed-off-by: zhongjiang-ali Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: Michal Hocko Link: https://lkml.kernel.org/r/1603722395-72443-1-git-send-email-zhongjiang-ali@linux.alibaba.com Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3a24e3b619f5..c3b6dc7d5c94 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4110,11 +4110,17 @@ static int memcg_stat_show(struct seq_file *m, void *v) (u64)memsw * PAGE_SIZE); for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { + unsigned long nr; + if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; + nr = memcg_page_state(memcg, memcg1_stats[i]); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (memcg1_stats[i] == NR_ANON_THPS) + nr *= HPAGE_PMD_NR; +#endif seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], - (u64)memcg_page_state(memcg, memcg1_stats[i]) * - PAGE_SIZE); + (u64)nr * PAGE_SIZE); } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) From 8de15e920dc85d1705ab9c202c95d56845bc2d48 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 1 Nov 2020 17:07:34 -0800 Subject: [PATCH 38/75] mm: memcg: link page counters to root if use_hierarchy is false MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Richard reported a warning which can be reproduced by running the LTP madvise6 test (cgroup v1 in the non-hierarchical mode should be used): WARNING: CPU: 0 PID: 12 at mm/page_counter.c:57 page_counter_uncharge (mm/page_counter.c:57 mm/page_counter.c:50 mm/page_counter.c:156) Modules linked in: CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.9.0-rc7-22-default #77 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-48-gd9c812d-rebuilt.opensuse.org 04/01/2014 Workqueue: events drain_local_stock RIP: 0010:page_counter_uncharge (mm/page_counter.c:57 mm/page_counter.c:50 mm/page_counter.c:156) Call Trace: __memcg_kmem_uncharge (mm/memcontrol.c:3022) drain_obj_stock (./include/linux/rcupdate.h:689 mm/memcontrol.c:3114) drain_local_stock (mm/memcontrol.c:2255) process_one_work (./arch/x86/include/asm/jump_label.h:25 ./include/linux/jump_label.h:200 ./include/trace/events/workqueue.h:108 kernel/workqueue.c:2274) worker_thread (./include/linux/list.h:282 kernel/workqueue.c:2416) kthread (kernel/kthread.c:292) ret_from_fork (arch/x86/entry/entry_64.S:300) The problem occurs because in the non-hierarchical mode non-root page counters are not linked to root page counters, so the charge is not propagated to the root memory cgroup. After the removal of the original memory cgroup and reparenting of the object cgroup, the root cgroup might be uncharged by draining a objcg stock, for example. It leads to an eventual underflow of the charge and triggers a warning. Fix it by linking all page counters to corresponding root page counters in the non-hierarchical mode. Please note, that in the non-hierarchical mode all objcgs are always reparented to the root memory cgroup, even if the hierarchy has more than 1 level. This patch doesn't change it. The patch also doesn't affect how the hierarchical mode is working, which is the only sane and truly supported mode now. Thanks to Richard for reporting, debugging and providing an alternative version of the fix! Fixes: bf4f059954dc ("mm: memcg/slab: obj_cgroup API") Reported-by: Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Reviewed-by: Michal Koutný Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Link: https://lkml.kernel.org/r/20201026231326.3212225-1-guro@fb.com Debugged-by: Richard Palethorpe Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c3b6dc7d5c94..3dcbf24d2227 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5345,17 +5345,22 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) memcg->swappiness = mem_cgroup_swappiness(parent); memcg->oom_kill_disable = parent->oom_kill_disable; } - if (parent && parent->use_hierarchy) { + if (!parent) { + page_counter_init(&memcg->memory, NULL); + page_counter_init(&memcg->swap, NULL); + page_counter_init(&memcg->kmem, NULL); + page_counter_init(&memcg->tcpmem, NULL); + } else if (parent->use_hierarchy) { memcg->use_hierarchy = true; page_counter_init(&memcg->memory, &parent->memory); page_counter_init(&memcg->swap, &parent->swap); page_counter_init(&memcg->kmem, &parent->kmem); page_counter_init(&memcg->tcpmem, &parent->tcpmem); } else { - page_counter_init(&memcg->memory, NULL); - page_counter_init(&memcg->swap, NULL); - page_counter_init(&memcg->kmem, NULL); - page_counter_init(&memcg->tcpmem, NULL); + page_counter_init(&memcg->memory, &root_mem_cgroup->memory); + page_counter_init(&memcg->swap, &root_mem_cgroup->swap); + page_counter_init(&memcg->kmem, &root_mem_cgroup->kmem); + page_counter_init(&memcg->tcpmem, &root_mem_cgroup->tcpmem); /* * Deeper hierachy with use_hierarchy == false doesn't make * much sense so let cgroup subsystem know about this From 58b999d7a22c59313e1e84832607c7a61640f4e7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Sun, 1 Nov 2020 17:07:37 -0800 Subject: [PATCH 39/75] kasan: adopt KUNIT tests to SW_TAGS mode Now that we have KASAN-KUNIT tests integration, it's easy to see that some KASAN tests are not adopted to the SW_TAGS mode and are failing. Adjust the allocation size for kasan_memchr() and kasan_memcmp() by roung it up to OOB_TAG_OFF so the bad access ends up in a separate memory granule. Add a new kmalloc_uaf_16() tests that relies on UAF, and a new kasan_bitops_tags() test that is tailored to tag-based mode, as it's hard to adopt the existing kmalloc_oob_16() and kasan_bitops_generic() (renamed from kasan_bitops()) without losing the precision. Add new kmalloc_uaf_16() and kasan_bitops_uaf() tests that rely on UAFs, as it's hard to adopt the existing kmalloc_oob_16() and kasan_bitops_oob() (rename from kasan_bitops()) without losing the precision. Disable kasan_global_oob() and kasan_alloca_oob_left/right() as SW_TAGS mode doesn't instrument globals nor dynamic allocas. Signed-off-by: Andrey Konovalov Signed-off-by: Andrew Morton Tested-by: David Gow Link: https://lkml.kernel.org/r/76eee17b6531ca8b3ca92b240cb2fd23204aaff7.1603129942.git.andreyknvl@google.com Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 149 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 107 insertions(+), 42 deletions(-) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 63c26171a791..662f862702fc 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -216,6 +216,12 @@ static void kmalloc_oob_16(struct kunit *test) u64 words[2]; } *ptr1, *ptr2; + /* This test is specifically crafted for the generic mode. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required\n"); + return; + } + ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); @@ -227,6 +233,23 @@ static void kmalloc_oob_16(struct kunit *test) kfree(ptr2); } +static void kmalloc_uaf_16(struct kunit *test) +{ + struct { + u64 words[2]; + } *ptr1, *ptr2; + + ptr1 = kmalloc(sizeof(*ptr1), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); + + ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2); + kfree(ptr2); + + KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2); + kfree(ptr1); +} + static void kmalloc_oob_memset_2(struct kunit *test) { char *ptr; @@ -429,6 +452,12 @@ static void kasan_global_oob(struct kunit *test) volatile int i = 3; char *p = &global_array[ARRAY_SIZE(global_array) + i]; + /* Only generic mode instruments globals. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required"); + return; + } + KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); } @@ -467,6 +496,12 @@ static void kasan_alloca_oob_left(struct kunit *test) char alloca_array[i]; char *p = alloca_array - 1; + /* Only generic mode instruments dynamic allocas. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required"); + return; + } + if (!IS_ENABLED(CONFIG_KASAN_STACK)) { kunit_info(test, "CONFIG_KASAN_STACK is not enabled"); return; @@ -481,6 +516,12 @@ static void kasan_alloca_oob_right(struct kunit *test) char alloca_array[i]; char *p = alloca_array + i; + /* Only generic mode instruments dynamic allocas. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required"); + return; + } + if (!IS_ENABLED(CONFIG_KASAN_STACK)) { kunit_info(test, "CONFIG_KASAN_STACK is not enabled"); return; @@ -551,6 +592,9 @@ static void kasan_memchr(struct kunit *test) return; } + if (OOB_TAG_OFF) + size = round_up(size, OOB_TAG_OFF); + ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); @@ -573,6 +617,9 @@ static void kasan_memcmp(struct kunit *test) return; } + if (OOB_TAG_OFF) + size = round_up(size, OOB_TAG_OFF); + ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset(arr, 0, sizeof(arr)); @@ -619,13 +666,50 @@ static void kasan_strings(struct kunit *test) KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strnlen(ptr, 1)); } -static void kasan_bitops(struct kunit *test) +static void kasan_bitops_modify(struct kunit *test, int nr, void *addr) { + KUNIT_EXPECT_KASAN_FAIL(test, set_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, change_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(nr, addr)); +} + +static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr) +{ + KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __test_and_set_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit_lock(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, test_and_clear_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __test_and_clear_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, test_and_change_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr)); + +#if defined(clear_bit_unlock_is_negative_byte) + KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = + clear_bit_unlock_is_negative_byte(nr, addr)); +#endif +} + +static void kasan_bitops_generic(struct kunit *test) +{ + long *bits; + + /* This test is specifically crafted for the generic mode. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required\n"); + return; + } + /* * Allocate 1 more byte, which causes kzalloc to round up to 16-bytes; * this way we do not actually corrupt other memory. */ - long *bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL); + bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits); /* @@ -633,55 +717,34 @@ static void kasan_bitops(struct kunit *test) * below accesses are still out-of-bounds, since bitops are defined to * operate on the whole long the bit is in. */ - KUNIT_EXPECT_KASAN_FAIL(test, set_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, change_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(BITS_PER_LONG, bits)); + kasan_bitops_modify(test, BITS_PER_LONG, bits); /* * Below calls try to access bit beyond allocated memory. */ - KUNIT_EXPECT_KASAN_FAIL(test, - test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); + kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, bits); - KUNIT_EXPECT_KASAN_FAIL(test, - __test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); + kfree(bits); +} - KUNIT_EXPECT_KASAN_FAIL(test, - test_and_set_bit_lock(BITS_PER_LONG + BITS_PER_BYTE, bits)); +static void kasan_bitops_tags(struct kunit *test) +{ + long *bits; - KUNIT_EXPECT_KASAN_FAIL(test, - test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); + /* This test is specifically crafted for the tag-based mode. */ + if (IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_SW_TAGS required\n"); + return; + } - KUNIT_EXPECT_KASAN_FAIL(test, - __test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); + /* Allocation size will be rounded to up granule size, which is 16. */ + bits = kzalloc(sizeof(*bits), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits); - KUNIT_EXPECT_KASAN_FAIL(test, - test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); + /* Do the accesses past the 16 allocated bytes. */ + kasan_bitops_modify(test, BITS_PER_LONG, &bits[1]); + kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, &bits[1]); - KUNIT_EXPECT_KASAN_FAIL(test, - __test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, - kasan_int_result = - test_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); - -#if defined(clear_bit_unlock_is_negative_byte) - KUNIT_EXPECT_KASAN_FAIL(test, - kasan_int_result = clear_bit_unlock_is_negative_byte( - BITS_PER_LONG + BITS_PER_BYTE, bits)); -#endif kfree(bits); } @@ -728,6 +791,7 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kmalloc_oob_krealloc_more), KUNIT_CASE(kmalloc_oob_krealloc_less), KUNIT_CASE(kmalloc_oob_16), + KUNIT_CASE(kmalloc_uaf_16), KUNIT_CASE(kmalloc_oob_in_memset), KUNIT_CASE(kmalloc_oob_memset_2), KUNIT_CASE(kmalloc_oob_memset_4), @@ -751,7 +815,8 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kasan_memchr), KUNIT_CASE(kasan_memcmp), KUNIT_CASE(kasan_strings), - KUNIT_CASE(kasan_bitops), + KUNIT_CASE(kasan_bitops_generic), + KUNIT_CASE(kasan_bitops_tags), KUNIT_CASE(kmalloc_double_kzfree), KUNIT_CASE(vmalloc_oob), {} From 3f08842098e842c51e3b97d0dcdebf810b32558e Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Sun, 1 Nov 2020 17:07:40 -0800 Subject: [PATCH 40/75] mm: mempolicy: fix potential pte_unmap_unlock pte error When flags in queue_pages_pte_range don't have MPOL_MF_MOVE or MPOL_MF_MOVE_ALL bits, code breaks and passing origin pte - 1 to pte_unmap_unlock seems like not a good idea. queue_pages_pte_range can run in MPOL_MF_MOVE_ALL mode which doesn't migrate misplaced pages but returns with EIO when encountering such a page. Since commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") and early break on the first pte in the range results in pte_unmap_unlock on an underflow pte. This can lead to lockups later on when somebody tries to lock the pte resp. page_table_lock again.. Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Shijie Luo Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Miaohe Lin Cc: Feilong Lin Cc: Shijie Luo Cc: Link: https://lkml.kernel.org/r/20201019074853.50856-1-luoshijie1@huawei.com Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3fde772ef5ef..3ca4898f3f24 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -525,7 +525,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, unsigned long flags = qp->flags; int ret; bool has_unmovable = false; - pte_t *pte; + pte_t *pte, *mapped_pte; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); @@ -539,7 +539,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, if (pmd_trans_unstable(pmd)) return 0; - pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { if (!pte_present(*pte)) continue; @@ -571,7 +571,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, } else break; } - pte_unmap_unlock(pte - 1, ptl); + pte_unmap_unlock(mapped_pte, ptl); cond_resched(); if (has_unmovable) From 7b3c36fc4c231ca532120bbc0df67a12f09c1d96 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 1 Nov 2020 17:07:44 -0800 Subject: [PATCH 41/75] ptrace: fix task_join_group_stop() for the case when current is traced This testcase #include #include #include #include #include #include #include void *tf(void *arg) { return NULL; } int main(void) { int pid = fork(); if (!pid) { kill(getpid(), SIGSTOP); pthread_t th; pthread_create(&th, NULL, tf, NULL); return 0; } waitpid(pid, NULL, WSTOPPED); ptrace(PTRACE_SEIZE, pid, 0, PTRACE_O_TRACECLONE); waitpid(pid, NULL, 0); ptrace(PTRACE_CONT, pid, 0,0); waitpid(pid, NULL, 0); int status; int thread = waitpid(-1, &status, 0); assert(thread > 0 && thread != pid); assert(status == 0x80137f); return 0; } fails and triggers WARN_ON_ONCE(!signr) in do_jobctl_trap(). This is because task_join_group_stop() has 2 problems when current is traced: 1. We can't rely on the "JOBCTL_STOP_PENDING" check, a stopped tracee can be woken up by debugger and it can clone another thread which should join the group-stop. We need to check group_stop_count || SIGNAL_STOP_STOPPED. 2. If SIGNAL_STOP_STOPPED is already set, we should not increment sig->group_stop_count and add JOBCTL_STOP_CONSUME. The new thread should stop without another do_notify_parent_cldstop() report. To clarify, the problem is very old and we should blame ptrace_init_task(). But now that we have task_join_group_stop() it makes more sense to fix this helper to avoid the code duplication. Reported-by: syzbot+3485e3773f7da290eecc@syzkaller.appspotmail.com Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Cc: Jens Axboe Cc: Christian Brauner Cc: "Eric W . Biederman" Cc: Zhiqiang Liu Cc: Tejun Heo Cc: Link: https://lkml.kernel.org/r/20201019134237.GA18810@redhat.com Signed-off-by: Linus Torvalds --- kernel/signal.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index a38b3edc6851..ef8f2a28d37c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -391,16 +391,17 @@ static bool task_participate_group_stop(struct task_struct *task) void task_join_group_stop(struct task_struct *task) { + unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK; + struct signal_struct *sig = current->signal; + + if (sig->group_stop_count) { + sig->group_stop_count++; + mask |= JOBCTL_STOP_CONSUME; + } else if (!(sig->flags & SIGNAL_STOP_STOPPED)) + return; + /* Have the new thread join an on-going signal group stop */ - unsigned long jobctl = current->jobctl; - if (jobctl & JOBCTL_STOP_PENDING) { - struct signal_struct *sig = current->signal; - unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK; - unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; - if (task_set_jobctl_pending(task, signr | gstop)) { - sig->group_stop_count++; - } - } + task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING); } /* From aa4e460f0976351fddd2f5ac6e08b74320c277a1 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 1 Nov 2020 17:07:47 -0800 Subject: [PATCH 42/75] lib/crc32test: remove extra local_irq_disable/enable Commit 4d004099a668 ("lockdep: Fix lockdep recursion") uncovered the following issue in lib/crc32test reported on s390: BUG: using __this_cpu_read() in preemptible [00000000] code: swapper/0/1 caller is lockdep_hardirqs_on_prepare+0x48/0x270 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.9.0-next-20201015-15164-g03d992bd2de6 #19 Hardware name: IBM 3906 M04 704 (LPAR) Call Trace: lockdep_hardirqs_on_prepare+0x48/0x270 trace_hardirqs_on+0x9c/0x1b8 crc32_test.isra.0+0x170/0x1c0 crc32test_init+0x1c/0x40 do_one_initcall+0x40/0x130 do_initcalls+0x126/0x150 kernel_init_freeable+0x1f6/0x230 kernel_init+0x22/0x150 ret_from_fork+0x24/0x2c no locks held by swapper/0/1. Remove extra local_irq_disable/local_irq_enable helpers calls. Fixes: 5fb7f87408f1 ("lib: add module support to crc32 tests") Signed-off-by: Vasily Gorbik Signed-off-by: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/patch.git-4369da00c06e.your-ad-here.call-01602859837-ext-1679@work.hours Signed-off-by: Linus Torvalds --- lib/crc32test.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/crc32test.c b/lib/crc32test.c index 97d6a57cefcc..61ddce2cff77 100644 --- a/lib/crc32test.c +++ b/lib/crc32test.c @@ -683,7 +683,6 @@ static int __init crc32c_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -694,7 +693,6 @@ static int __init crc32c_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); @@ -768,7 +766,6 @@ static int __init crc32_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -783,7 +780,6 @@ static int __init crc32_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", CRC_LE_BITS, CRC_BE_BITS); From a77eedbc871ee3b435bffc30b123b60eecca402c Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sun, 1 Nov 2020 17:07:50 -0800 Subject: [PATCH 43/75] mm/truncate.c: make __invalidate_mapping_pages() static Fix the following sparse warning: mm/truncate.c:531:15: warning: symbol '__invalidate_mapping_pages' was not declared. Should it be static? Fixes: eb1d7a65f08a ("mm, fadvise: improve the expensive remote LRU cache draining after FADV_DONTNEED") Signed-off-by: Jason Yan Signed-off-by: Andrew Morton Reviewed-by: Yafang Shao Link: https://lkml.kernel.org/r/20201015054808.2445904-1-yanaijie@huawei.com Signed-off-by: Linus Torvalds --- mm/truncate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/truncate.c b/mm/truncate.c index 18cec39a9f53..960edf5803ca 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -528,7 +528,7 @@ void truncate_inode_pages_final(struct address_space *mapping) } EXPORT_SYMBOL(truncate_inode_pages_final); -unsigned long __invalidate_mapping_pages(struct address_space *mapping, +static unsigned long __invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) { pgoff_t indices[PAGEVEC_SIZE]; From 6993d0fdbee0eb38bfac350aa016f65ad11ed3b1 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Sun, 1 Nov 2020 17:07:53 -0800 Subject: [PATCH 44/75] kthread_worker: prevent queuing delayed work from timer_fn when it is being canceled There is a small race window when a delayed work is being canceled and the work still might be queued from the timer_fn: CPU0 CPU1 kthread_cancel_delayed_work_sync() __kthread_cancel_work_sync() __kthread_cancel_work() work->canceling++; kthread_delayed_work_timer_fn() kthread_insert_work(); BUG: kthread_insert_work() should not get called when work->canceling is set. Signed-off-by: Zqiang Signed-off-by: Andrew Morton Reviewed-by: Petr Mladek Acked-by: Tejun Heo Cc: Link: https://lkml.kernel.org/r/20201014083030.16895-1-qiang.zhang@windriver.com Signed-off-by: Linus Torvalds --- kernel/kthread.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index e29773c82b70..933a625621b8 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -897,7 +897,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t) /* Move the work from worker->delayed_work_list. */ WARN_ON_ONCE(list_empty(&work->node)); list_del_init(&work->node); - kthread_insert_work(worker, work, &worker->work_list); + if (!work->canceling) + kthread_insert_work(worker, work, &worker->work_list); raw_spin_unlock_irqrestore(&worker->lock, flags); } From 66606567dedf395e0857f531976efad4cbbd39ea Mon Sep 17 00:00:00 2001 From: Charles Haithcock Date: Sun, 1 Nov 2020 17:07:56 -0800 Subject: [PATCH 45/75] mm, oom: keep oom_adj under or at upper limit when printing For oom_score_adj values in the range [942,999], the current calculations will print 16 for oom_adj. This patch simply limits the output so output is inline with docs. Signed-off-by: Charles Haithcock Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Alexey Dobriyan Link: https://lkml.kernel.org/r/20201020165130.33927-1-chaithco@redhat.com Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index 0f707003dda5..b362523a9829 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1049,6 +1049,8 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX; put_task_struct(task); + if (oom_adj > OOM_ADJUST_MAX) + oom_adj = OOM_ADJUST_MAX; len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } From f8f6ae5d077a9bdaf5cbf2ac960a5d1a04b47482 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 1 Nov 2020 17:08:00 -0800 Subject: [PATCH 46/75] mm: always have io_remap_pfn_range() set pgprot_decrypted() The purpose of io_remap_pfn_range() is to map IO memory, such as a memory mapped IO exposed through a PCI BAR. IO devices do not understand encryption, so this memory must always be decrypted. Automatically call pgprot_decrypted() as part of the generic implementation. This fixes a bug where enabling AMD SME causes subsystems, such as RDMA, using io_remap_pfn_range() to expose BAR pages to user space to fail. The CPU will encrypt access to those BAR pages instead of passing unencrypted IO directly to the device. Places not mapping IO should use remap_pfn_range(). Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption") Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Cc: Arnd Bergmann Cc: Tom Lendacky Cc: Thomas Gleixner Cc: Andrey Ryabinin Cc: Borislav Petkov Cc: Brijesh Singh Cc: Jonathan Corbet Cc: Dmitry Vyukov Cc: "Dave Young" Cc: Alexander Potapenko Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Larry Woodman Cc: Matt Fleming Cc: Ingo Molnar Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rik van Riel Cc: Toshimitsu Kani Cc: Link: https://lkml.kernel.org/r/0-v1-025d64bdf6c4+e-amd_sme_fix_jgg@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 +++++++++ include/linux/pgtable.h | 4 ---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index ef360fe70aaf..db6ae4d3fb4e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2759,6 +2759,15 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +#ifndef io_remap_pfn_range +static inline int io_remap_pfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot)); +} +#endif + static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 38c33eabea89..71125a4676c4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,10 +1427,6 @@ typedef unsigned int pgtbl_mod_mask; #endif /* !__ASSEMBLY__ */ -#ifndef io_remap_pfn_range -#define io_remap_pfn_range remap_pfn_range -#endif - #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 From afabdf3338728c3aaa9f55d127e903dcd5f4acc7 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sun, 1 Nov 2020 17:08:07 -0800 Subject: [PATCH 47/75] epoll: add a selftest for epoll timeout race Add a test case to ensure an event is observed by at least one poller when an epoll timeout is used. Signed-off-by: Guantao Liu Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Reviewed-by: Eric Dumazet Reviewed-by: Khazhismel Kumykov Acked-by: Willem de Bruijn Cc: Al Viro Cc: Davidlohr Bueso Link: https://lkml.kernel.org/r/20201028180202.952079-2-soheil.kdev@gmail.com Signed-off-by: Linus Torvalds --- .../filesystems/epoll/epoll_wakeup_test.c | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index d979ff14775a..8f82f99f7748 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3282,4 +3282,99 @@ TEST(epoll60) close(ctx.epfd); } +struct epoll61_ctx { + int epfd; + int evfd; +}; + +static void *epoll61_write_eventfd(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + int64_t l = 1; + + usleep(10950); + write(ctx->evfd, &l, sizeof(l)); + return NULL; +} + +static void *epoll61_epoll_with_timeout(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + int n; + + n = epoll_wait(ctx->epfd, events, 1, 11); + /* + * If epoll returned the eventfd, write on the eventfd to wake up the + * blocking poller. + */ + if (n == 1) { + int64_t l = 1; + + write(ctx->evfd, &l, sizeof(l)); + } + return NULL; +} + +static void *epoll61_blocking_epoll(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + + epoll_wait(ctx->epfd, events, 1, -1); + return NULL; +} + +TEST(epoll61) +{ + struct epoll61_ctx ctx; + struct epoll_event ev; + int i, r; + + ctx.epfd = epoll_create1(0); + ASSERT_GE(ctx.epfd, 0); + ctx.evfd = eventfd(0, EFD_NONBLOCK); + ASSERT_GE(ctx.evfd, 0); + + ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP; + ev.data.ptr = NULL; + r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev); + ASSERT_EQ(r, 0); + + /* + * We are testing a race. Repeat the test case 1000 times to make it + * more likely to fail in case of a bug. + */ + for (i = 0; i < 1000; i++) { + pthread_t threads[3]; + int n; + + /* + * Start 3 threads: + * Thread 1 sleeps for 10.9ms and writes to the evenfd. + * Thread 2 calls epoll with a timeout of 11ms. + * Thread 3 calls epoll with a timeout of -1. + * + * The eventfd write by Thread 1 should either wakeup Thread 2 + * or Thread 3. If it wakes up Thread 2, Thread 2 writes on the + * eventfd to wake up Thread 3. + * + * If no events are missed, all three threads should eventually + * be joinable. + */ + ASSERT_EQ(pthread_create(&threads[0], NULL, + epoll61_write_eventfd, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[1], NULL, + epoll61_epoll_with_timeout, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[2], NULL, + epoll61_blocking_epoll, &ctx), 0); + + for (n = 0; n < ARRAY_SIZE(threads); ++n) + ASSERT_EQ(pthread_join(threads[n], NULL), 0); + } + + close(ctx.epfd); + close(ctx.evfd); +} + TEST_HARNESS_MAIN From 3b70ae4f5c4e050bdebeeefe0c369524f37917cf Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 1 Nov 2020 17:08:10 -0800 Subject: [PATCH 48/75] kernel/hung_task.c: make type annotations consistent Commit 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") removed various __user annotations from function signatures as part of its refactoring. It also removed the __user annotation for proc_dohung_task_timeout_secs() at its declaration in sched/sysctl.h, but not at its definition in kernel/hung_task.c. Hence, sparse complains: kernel/hung_task.c:271:5: error: symbol 'proc_dohung_task_timeout_secs' redeclared with different type (incompatible argument 3 (different address spaces)) Adjust the annotation at the definition fitting to that refactoring to make sparse happy again, which also resolves this warning from sparse: kernel/hung_task.c:277:52: warning: incorrect type in argument 3 (different address spaces) kernel/hung_task.c:277:52: expected void * kernel/hung_task.c:277:52: got void [noderef] __user *buffer No functional change. No change in object code. Signed-off-by: Lukas Bulwahn Signed-off-by: Andrew Morton Cc: Christoph Hellwig Cc: Tetsuo Handa Cc: Al Viro Cc: Andrey Ignatov Link: https://lkml.kernel.org/r/20201028130541.20320-1-lukas.bulwahn@gmail.com Signed-off-by: Linus Torvalds --- kernel/hung_task.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index ce76f490126c..396ebaebea3f 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -225,8 +225,7 @@ static long hung_timeout_jiffies(unsigned long last_checked, * Process updating of timeout sysctl */ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; From 90bfdeef83f1d6c696039b6a917190dcbbad3220 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 26 Oct 2020 13:15:23 -0700 Subject: [PATCH 49/75] tty: make FONTX ioctl use the tty pointer they were actually passed Some of the font tty ioctl's always used the current foreground VC for their operations. Don't do that then. This fixes a data race on fg_console. Side note: both Michael Ellerman and Jiri Slaby point out that all these ioctls are deprecated, and should probably have been removed long ago, and everything seems to be using the KDFONTOP ioctl instead. In fact, Michael points out that it looks like busybox's loadfont program seems to have switched over to using KDFONTOP exactly _because_ of this bug (ahem.. 12 years ago ;-). Reported-by: Minh Yuan Acked-by: Michael Ellerman Acked-by: Jiri Slaby Cc: Greg KH Signed-off-by: Linus Torvalds --- drivers/tty/vt/vt_ioctl.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 2321775ef098..5f61b25a9aaa 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -484,7 +484,7 @@ static int vt_k_ioctl(struct tty_struct *tty, unsigned int cmd, return 0; } -static inline int do_fontx_ioctl(int cmd, +static inline int do_fontx_ioctl(struct vc_data *vc, int cmd, struct consolefontdesc __user *user_cfd, struct console_font_op *op) { @@ -502,15 +502,16 @@ static inline int do_fontx_ioctl(int cmd, op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = cfdarg.chardata; - return con_font_op(vc_cons[fg_console].d, op); - case GIO_FONTX: { + return con_font_op(vc, op); + + case GIO_FONTX: op->op = KD_FONT_OP_GET; op->flags = KD_FONT_FLAG_OLD; op->width = 8; op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = cfdarg.chardata; - i = con_font_op(vc_cons[fg_console].d, op); + i = con_font_op(vc, op); if (i) return i; cfdarg.charheight = op->height; @@ -518,12 +519,11 @@ static inline int do_fontx_ioctl(int cmd, if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc))) return -EFAULT; return 0; - } } return -EINVAL; } -static int vt_io_fontreset(struct console_font_op *op) +static int vt_io_fontreset(struct vc_data *vc, struct console_font_op *op) { int ret; @@ -537,12 +537,12 @@ static int vt_io_fontreset(struct console_font_op *op) op->op = KD_FONT_OP_SET_DEFAULT; op->data = NULL; - ret = con_font_op(vc_cons[fg_console].d, op); + ret = con_font_op(vc, op); if (ret) return ret; console_lock(); - con_set_default_unimap(vc_cons[fg_console].d); + con_set_default_unimap(vc); console_unlock(); return 0; @@ -584,7 +584,7 @@ static int vt_io_ioctl(struct vc_data *vc, unsigned int cmd, void __user *up, op.height = 0; op.charcount = 256; op.data = up; - return con_font_op(vc_cons[fg_console].d, &op); + return con_font_op(vc, &op); case GIO_FONT: op.op = KD_FONT_OP_GET; @@ -593,7 +593,7 @@ static int vt_io_ioctl(struct vc_data *vc, unsigned int cmd, void __user *up, op.height = 32; op.charcount = 256; op.data = up; - return con_font_op(vc_cons[fg_console].d, &op); + return con_font_op(vc, &op); case PIO_CMAP: if (!perm) @@ -609,13 +609,13 @@ static int vt_io_ioctl(struct vc_data *vc, unsigned int cmd, void __user *up, fallthrough; case GIO_FONTX: - return do_fontx_ioctl(cmd, up, &op); + return do_fontx_ioctl(vc, cmd, up, &op); case PIO_FONTRESET: if (!perm) return -EPERM; - return vt_io_fontreset(&op); + return vt_io_fontreset(vc, &op); case PIO_SCRNMAP: if (!perm) @@ -1066,8 +1066,9 @@ struct compat_consolefontdesc { }; static inline int -compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, - int perm, struct console_font_op *op) +compat_fontx_ioctl(struct vc_data *vc, int cmd, + struct compat_consolefontdesc __user *user_cfd, + int perm, struct console_font_op *op) { struct compat_consolefontdesc cfdarg; int i; @@ -1085,7 +1086,8 @@ compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = compat_ptr(cfdarg.chardata); - return con_font_op(vc_cons[fg_console].d, op); + return con_font_op(vc, op); + case GIO_FONTX: op->op = KD_FONT_OP_GET; op->flags = KD_FONT_FLAG_OLD; @@ -1093,7 +1095,7 @@ compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = compat_ptr(cfdarg.chardata); - i = con_font_op(vc_cons[fg_console].d, op); + i = con_font_op(vc, op); if (i) return i; cfdarg.charheight = op->height; @@ -1183,7 +1185,7 @@ long vt_compat_ioctl(struct tty_struct *tty, */ case PIO_FONTX: case GIO_FONTX: - return compat_fontx_ioctl(cmd, up, perm, &op); + return compat_fontx_ioctl(vc, cmd, up, perm, &op); case KDFONTOP: return compat_kdfontop_ioctl(up, perm, &op, vc); From b773ea650576f14442f7a546f2b15e64b10ed0eb Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Wed, 28 Oct 2020 13:59:00 -0300 Subject: [PATCH 50/75] perf tools: Remove LTO compiler options when building perl support To avoid breaking the build by mixing files compiled with things coming from distro specific compiler options for perl with the rest of perf, i.e. to avoid this: `.gnu.debuglto_.debug_macro' referenced in section `.gnu.debuglto_.debug_macro' of /tmp/build/perf/util/scripting-engines/perf-in.o: defined in discarded section `.gnu.debuglto_.debug_macro[wm4.stdcpredef.h.19.8dc41bed5d9037ff9622e015fb5f0ce3]' of /tmp/build/perf/util/scripting-engines/perf-in.o Noticed on Fedora 33. Signed-off-by: Justin M. Forbes Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1593431 Cc: Jiri Olsa Link: https://src.fedoraproject.org/rpms/kernel-tools/c/589a32b62f0c12516ab7b34e3dd30d450145bfa4?branch=master Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6890fc4b063a..ce8516e4de34 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -749,6 +749,7 @@ else PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) From e555b4b8d7b2844a9e48e06a7c3e4f9e44af847f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:26:45 -0300 Subject: [PATCH 51/75] perf tools: Update copy of libbpf's hashmap.c To pick the changes in: 85367030a6c7ef33 ("libbpf: Centralize poisoning and poison reallocarray()") 7d9c71e10baa3496 ("libbpf: Extract generic string hashing function for reuse") That don't entail any changes in tools/perf. This addresses this perf build warning: Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h' diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h Not a kernel ABI, its just that this uses the mechanism in place for checking kernel ABI files drift. Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hashmap.c | 3 +++ tools/perf/util/hashmap.h | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c index a405dad068f5..3c20b126d60d 100644 --- a/tools/perf/util/hashmap.c +++ b/tools/perf/util/hashmap.c @@ -15,6 +15,9 @@ /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray + /* start with 4 buckets */ #define HASHMAP_MIN_CAP_BITS 2 diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index e0af36b0e5d8..d9b385fe808c 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits) #endif } +/* generic C-string hashing function */ +static inline size_t str_hash(const char *s) +{ + size_t h = 0; + + while (*s) { + h = h * 31 + *s; + s++; + } + return h; +} + typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); From 263e452eff397b370e39d464c8cbd30f6bd59fb9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:29:30 -0300 Subject: [PATCH 52/75] tools headers UAPI: Update process_madvise affected files To pick the changes from: ecb8ac8b1f146915 ("mm/madvise: introduce process_madvise() syscall: an external memory hinting API") That addresses these perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Linus Torvalds Cc: Minchan Kim Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 4 +++- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 11 +++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index f2b5d72a46c2..2056318988f7 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2) __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) #define __NR_faccessat2 439 __SYSCALL(__NR_faccessat2, sys_faccessat2) +#define __NR_process_madvise 440 +__SYSCALL(__NR_process_madvise, sys_process_madvise) #undef __NR_syscalls -#define __NR_syscalls 440 +#define __NR_syscalls 441 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 347809649ba2..379819244b91 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -361,12 +361,13 @@ 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise # -# x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. The __x32_compat_sys stubs are created -# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 -# is defined. +# Due to a historical design error, certain syscalls are numbered differently +# in x32 as compared to native x86_64. These syscalls have numbers 512-547. +# Do not add new syscalls to this range. Numbers 548 and above are available +# for non-x32 use. # 512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn @@ -404,3 +405,5 @@ 545 x32 execveat compat_sys_execveat 546 x32 preadv2 compat_sys_preadv64v2 547 x32 pwritev2 compat_sys_pwritev64v2 +# This is the end of the legacy x32 range. Numbers 548 and above are +# not special and are not to be used for x32-specific syscalls. From ab8bf5f2e0321f254590ad81c6e230185d88b4e5 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 16 Oct 2020 14:47:18 +0300 Subject: [PATCH 53/75] perf tools: Fix crash with non-jited bpf progs The addr in PERF_RECORD_KSYMBOL events for non-jited bpf progs points to the bpf interpreter, ie. within kernel text section. When processing the unregister event, this causes unexpected removal of vmlinux_map, crashing perf later in cleanup: # perf record -- timeout --signal=INT 2s /usr/share/bcc/tools/execsnoop PCOMM PID PPID RET ARGS [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.208 MB perf.data (5155 samples) ] perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed. Aborted (core dumped) # perf script -D|grep KSYM 0 0xa40 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6 0 0xab0 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_8c42dee26e8cd4c2 0 0xb20 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6 108563691893 0x33d98 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3b0 len 0 type 1 flags 0x0 name bpf_prog_bc5697a410556fc2_syscall__execve 108568518458 0x34098 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3f0 len 0 type 1 flags 0x0 name bpf_prog_45e2203c2928704d_do_ret_sys_execve 109301967895 0x34830 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3b0 len 0 type 1 flags 0x1 name bpf_prog_bc5697a410556fc2_syscall__execve 109302007356 0x348b0 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3f0 len 0 type 1 flags 0x1 name bpf_prog_45e2203c2928704d_do_ret_sys_execve perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed. Here the addresses match the bpf interpreter: # grep -e ffffffffa9b6b530 -e ffffffffa9b6b3b0 -e ffffffffa9b6b3f0 /proc/kallsyms ffffffffa9b6b3b0 t __bpf_prog_run224 ffffffffa9b6b3f0 t __bpf_prog_run192 ffffffffa9b6b530 t __bpf_prog_run32 Fix by not allowing vmlinux_map to be removed by PERF_RECORD_KSYMBOL unregister event. Signed-off-by: Tommi Rantala Acked-by: Jiri Olsa Tested-by: Jiri Olsa Link: https://lore.kernel.org/r/20201016114718.54332-1-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 11 ++++++++++- tools/perf/util/symbol.c | 7 +++++++ tools/perf/util/symbol.h | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7d4194ffc5b0..15385ea00190 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -786,11 +786,20 @@ static int machine__process_ksymbol_unregister(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { + struct symbol *sym; struct map *map; map = maps__find(&machine->kmaps, event->ksymbol.addr); - if (map) + if (!map) + return 0; + + if (map != machine->vmlinux_map) maps__remove(&machine->kmaps, map); + else { + sym = dso__find_symbol(map->dso, map->map_ip(map, map->start)); + if (sym) + dso__delete_symbol(map->dso, sym); + } return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 6138866665df..0d14abdf3d72 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -515,6 +515,13 @@ void dso__insert_symbol(struct dso *dso, struct symbol *sym) } } +void dso__delete_symbol(struct dso *dso, struct symbol *sym) +{ + rb_erase_cached(&sym->rb_node, &dso->symbols); + symbol__delete(sym); + dso__reset_find_symbol_cache(dso); +} + struct symbol *dso__find_symbol(struct dso *dso, u64 addr) { if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f4801c488def..954d6a049ee2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -131,6 +131,8 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map); void dso__insert_symbol(struct dso *dso, struct symbol *sym); +void dso__delete_symbol(struct dso *dso, + struct symbol *sym); struct symbol *dso__find_symbol(struct dso *dso, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); From a6293f36ac92ab513771a98efe486477be2f981f Mon Sep 17 00:00:00 2001 From: Stanislav Ivanichkin Date: Tue, 27 Oct 2020 12:43:57 +0300 Subject: [PATCH 54/75] perf trace: Fix segfault when trying to trace events by cgroup # ./perf trace -e sched:sched_switch -G test -a sleep 1 perf: Segmentation fault Obtained 11 stack frames. ./perf(sighandler_dump_stack+0x43) [0x55cfdc636db3] /lib/x86_64-linux-gnu/libc.so.6(+0x3efcf) [0x7fd23eecafcf] ./perf(parse_cgroups+0x36) [0x55cfdc673f36] ./perf(+0x3186ed) [0x55cfdc70d6ed] ./perf(parse_options_subcommand+0x629) [0x55cfdc70e999] ./perf(cmd_trace+0x9c2) [0x55cfdc5ad6d2] ./perf(+0x1e8ae0) [0x55cfdc5ddae0] ./perf(+0x1e8ded) [0x55cfdc5ddded] ./perf(main+0x370) [0x55cfdc556f00] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe6) [0x7fd23eeadb96] ./perf(_start+0x29) [0x55cfdc557389] Segmentation fault # It happens because "struct trace" in option->value is passed to the parse_cgroups function instead of "struct evlist". Fixes: 9ea42ba4411ac ("perf trace: Support setting cgroups as targets") Signed-off-by: Stanislav Ivanichkin Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Dmitry Monakhov Link: http://lore.kernel.org/lkml/20201027094357.94881-1-sivanichkin@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 44a75f234db1..de80534473af 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4639,9 +4639,9 @@ do_concat: err = 0; if (lists[0]) { - struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", - "event selector. use 'perf list' to list available events", - parse_events_option); + struct option o = { + .value = &trace->evlist, + }; err = parse_events_option(&o, lists[0], 0); } out: @@ -4655,9 +4655,12 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u { struct trace *trace = opt->value; - if (!list_empty(&trace->evlist->core.entries)) - return parse_cgroups(opt, str, unset); - + if (!list_empty(&trace->evlist->core.entries)) { + struct option o = { + .value = &trace->evlist, + }; + return parse_cgroups(&o, str, unset); + } trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); return 0; From 0dfbe4c646bf06a85c3d70572a8b8aa6ebffe3d5 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 23 Oct 2020 08:53:34 +0800 Subject: [PATCH 55/75] perf vendor events: Fix DRAM_BW_Use 0 issue for CLX/SKX Ian reports an issue that the metric DRAM_BW_Use often remains 0. The metric expression for DRAM_BW_Use on CLX/SKX: "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time" The counts of uncore_imc/cas_count_read/ and uncore_imc/cas_count_write/ are scaled up by 64, that is to turn a count of cache lines into bytes, the count is then divided by 1000000000 to give GB. However, the counts of uncore_imc/cas_count_read/ and uncore_imc/cas_count_write/ have been scaled yet. The scale values are from sysfs, such as /sys/devices/uncore_imc_0/events/cas_count_read.scale. It's 6.103515625e-5 (64 / 1024.0 / 1024.0). So if we use original metric expression, the result is not correct. But the difficulty is, for SKL client, the counts are not scaled. The metric expression for DRAM_BW_Use on SKL: "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000" root@kbl-ppc:~# perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 190 arb/event=0x84,umask=0x1/ # 1.86 DRAM_BW_Use 29,093,178 arb/event=0x81,umask=0x1/ 1,000,703,287 ns duration_time 1.000703287 seconds time elapsed The result is expected. So the easy way is just change the metric expression for CLX/SKX. This patch changes the metric expression to: "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time" 1048576 = 1024 * 1024. Before (tested on CLX): root@lkp-csl-2sp5 ~# perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 765.35 MiB uncore_imc/cas_count_read/ # 0.00 DRAM_BW_Use 5.42 MiB uncore_imc/cas_count_write/ 1001515088 ns duration_time 1.001515088 seconds time elapsed After: root@lkp-csl-2sp5 ~# perf stat -M DRAM_BW_Use -a -- sleep 1 Performance counter stats for 'system wide': 767.95 MiB uncore_imc/cas_count_read/ # 0.80 DRAM_BW_Use 5.02 MiB uncore_imc/cas_count_write/ 1001900010 ns duration_time 1.001900010 seconds time elapsed Fixes: 038d3b53c284 ("perf vendor events intel: Update CascadelakeX events to v1.08") Fixes: b5ff7f2799a4 ("perf vendor events: Update SkylakeX events to v1.21") Signed-off-by: Jin Yao Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201023005334.7869-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json | 2 +- tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index de3193552277..00f4fcffa815 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -329,7 +329,7 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time", "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index f31794d3b926..0dd8b13b5cfb 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -323,7 +323,7 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time", "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, From 9ae1e990f1ab522b98baefbfebf3cbac1a2cfac2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 28 Oct 2020 09:11:23 +0100 Subject: [PATCH 56/75] perf tools: Remove broken __no_tail_call attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GCC specific __attribute__((optimize)) attribute does not what is commonly expected and is explicitly recommended against using in production code by the GCC people. Unlike what is often expected, it doesn't add to the optimization flags, but it fully replaces them, loosing any and all optimization flags provided by the compiler commandline. The only guaranteed upon means of inhibiting tail-calls is by placing a volatile asm with side-effects after the call such that the tail-call simply cannot be done. Given the original commit wasn't specific on which calls were the problem, this removal might re-introduce the problem, which can then be re-analyzed and cured properly. Signed-off-by: Peter Zijlstra Acked-by: Ard Biesheuvel Acked-by: Miguel Ojeda Cc: Alexei Starovoitov Cc: Arnd Bergmann Cc: Arvind Sankar Cc: Daniel Borkmann Cc: Geert Uytterhoeven Cc: Ian Rogers Cc: Josh Poimboeuf Cc: Kees Kook Cc: Martin Liška Cc: Nick Desaulniers Cc: Randy Dunlap Cc: Thomas Gleixner Link: http://lore.kernel.org/lkml/20201028081123.GT2628@hirez.programming.kicks-ass.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 12 ------------ tools/include/linux/compiler.h | 3 --- tools/perf/tests/dwarf-unwind.c | 10 +++++----- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index b9d4322e1e65..95c072b70d0e 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -27,18 +27,6 @@ #define __pure __attribute__((pure)) #endif #define noinline __attribute__((noinline)) -#ifdef __has_attribute -#if __has_attribute(disable_tail_calls) -#define __no_tail_call __attribute__((disable_tail_calls)) -#endif -#endif -#ifndef __no_tail_call -#if GCC_VERSION > 40201 -#define __no_tail_call __attribute__((optimize("no-optimize-sibling-calls"))) -#else -#define __no_tail_call -#endif -#endif #ifndef __packed #define __packed __attribute__((packed)) #endif diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 2b3f7353e891..d22a974372c0 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -47,9 +47,6 @@ #ifndef noinline #define noinline #endif -#ifndef __no_tail_call -#define __no_tail_call -#endif /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2491d167bf76..83638097c3bc 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -95,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return strcmp((const char *) symbol, funcs[idx]); } -__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) +noinline int test_dwarf_unwind__thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -126,7 +126,7 @@ __no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) static int global_unwind_retval = -INT_MAX; -__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) +noinline int test_dwarf_unwind__compare(void *p1, void *p2) { /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; @@ -145,7 +145,7 @@ __no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) return p1 - p2; } -__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) +noinline int test_dwarf_unwind__krava_3(struct thread *thread) { struct thread *array[2] = {thread, thread}; void *fp = &bsearch; @@ -164,12 +164,12 @@ __no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) return global_unwind_retval; } -__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread) +noinline int test_dwarf_unwind__krava_2(struct thread *thread) { return test_dwarf_unwind__krava_3(thread); } -__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread) +noinline int test_dwarf_unwind__krava_1(struct thread *thread) { return test_dwarf_unwind__krava_2(thread); } From d0e7b0c71fbb653de90a7163ef46912a96f0bdaf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 Oct 2020 08:24:38 -0300 Subject: [PATCH 57/75] perf scripting python: Avoid declaring function pointers with a visibility attribute To avoid this: util/scripting-engines/trace-event-python.c: In function 'python_start_script': util/scripting-engines/trace-event-python.c:1595:2: error: 'visibility' attribute ignored [-Werror=attributes] 1595 | PyMODINIT_FUNC (*initfunc)(void); | ^~~~~~~~~~~~~~ That started breaking when building with PYTHON=python3 and these gcc versions (I haven't checked with the clang ones, maybe it breaks there as well): # export PERF_TARBALL=http://192.168.86.5/perf/perf-5.9.0.tar.xz # dm fedora:33 fedora:rawhide 1 107.80 fedora:33 : Ok gcc (GCC) 10.2.1 20201005 (Red Hat 10.2.1-5), clang version 11.0.0 (Fedora 11.0.0-1.fc33) 2 92.47 fedora:rawhide : Ok gcc (GCC) 10.2.1 20201016 (Red Hat 10.2.1-6), clang version 11.0.0 (Fedora 11.0.0-1.fc34) # Avoid that by ditching that 'initfunc' function pointer with its: #define Py_EXPORTED_SYMBOL _attribute_ ((visibility ("default"))) #define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* And just call PyImport_AppendInittab() at the end of the ifdef python3 block with the functions that were being attributed to that initfunc. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 7cbd024e3e63..c83c2c6564e0 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1592,7 +1592,6 @@ static void _free_command_line(wchar_t **command_line, int num) static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; - PyMODINIT_FUNC (*initfunc)(void); #if PY_MAJOR_VERSION < 3 const char **command_line; #else @@ -1607,20 +1606,18 @@ static int python_start_script(const char *script, int argc, const char **argv) FILE *fp; #if PY_MAJOR_VERSION < 3 - initfunc = initperf_trace_context; command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; + PyImport_AppendInittab(name, initperf_trace_context); #else - initfunc = PyInit_perf_trace_context; command_line = malloc((argc + 1) * sizeof(wchar_t *)); command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); + PyImport_AppendInittab(name, PyInit_perf_trace_context); #endif - - PyImport_AppendInittab(name, initfunc); Py_Initialize(); #if PY_MAJOR_VERSION < 3 From ad6330ac2c5a38e5573cb6ae8ff75288bfd96325 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:38:16 -0300 Subject: [PATCH 58/75] tools headers UAPI: Sync prctl.h with the kernel sources To get the changes in: 1c101da8b971a366 ("arm64: mte: Allow user control of the tag check mode via prctl()") af5ce95282dc99d0 ("arm64: mte: Allow user control of the generated random tags via prctl()") Which don't cause any change in tooling, only addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' diff -u tools/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Cc: Adrian Hunter Cc: Catalin Marinas Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 07b4f8131e36..7f0827705c9a 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -233,6 +233,15 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 From 9e228f48980635c187720c0956b39c04db5e8f56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 12:41:58 -0300 Subject: [PATCH 59/75] tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the changes in: 13149e8bafc46572 ("drm/i915: add syncobj timeline support") cda9edd02425d790 ("drm/i915: introduce a mechanism to extend execbuf2") That don't result in any changes in tooling, just silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Lionel Landwerlin Cc: Namhyung Kim Cc: Rodrigo Vivi Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 59 +++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 00546062e023..fa1f3d62f9a6 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1046,6 +1052,38 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; +/** + * See drm_i915_gem_execbuffer_ext_timeline_fences. + */ +#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 + +/** + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { + struct i915_user_extension base; + + /** + * Number of element in the handles_ptr & value_ptr arrays. + */ + __u64 fence_count; + + /** + * Pointer to an array of struct drm_i915_gem_exec_fence of length + * fence_count. + */ + __u64 handles_ptr; + + /** + * Pointer to an array of u64 values of length fence_count. Values + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. + */ + __u64 values_ptr; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -1062,8 +1100,14 @@ struct drm_i915_gem_execbuffer2 { __u32 num_cliprects; /** * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY - * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a - * struct drm_i915_gem_exec_fence *fences. + * & I915_EXEC_USE_EXTENSIONS are not set. + * + * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array + * of struct drm_i915_gem_exec_fence and num_cliprects is the length + * of the array. + * + * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a + * single struct i915_user_extension and num_cliprects is 0. */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (0x3f) @@ -1181,7 +1225,16 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_SUBMIT (1 << 20) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) +/* + * Setting I915_EXEC_USE_EXTENSIONS implies that + * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked + * list of i915_user_extension. Each i915_user_extension node is the base of a + * larger structure. The list of supported structures are listed in the + * drm_i915_gem_execbuffer_ext enum. + */ +#define I915_EXEC_USE_EXTENSIONS (1 << 21) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ From d0448d6a249b6fc4518181b214d3403dfe2c8075 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:12:52 -0300 Subject: [PATCH 60/75] tools headers UAPI: Update fscrypt.h copy To get the changes from: c7f0207b613033c5 ("fscrypt: make "#define fscrypt_policy" user-only") That don't cause any changes in tools/perf, only addresses this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h' diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h Cc: Adrian Hunter Cc: Eric Biggers Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fscrypt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7875709ccfeb..e5de60336938 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -45,7 +45,6 @@ struct fscrypt_policy_v1 { __u8 flags; __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; }; -#define fscrypt_policy fscrypt_policy_v1 /* * Process-subscribed "logon" key description prefix and payload format. @@ -156,9 +155,9 @@ struct fscrypt_get_key_status_arg { __u32 __out_reserved[13]; }; -#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */ #define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) @@ -170,6 +169,7 @@ struct fscrypt_get_key_status_arg { /* old names; don't add anything new here! */ #ifndef __KERNEL__ +#define fscrypt_policy fscrypt_policy_v1 #define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE #define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4 #define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8 From 40a6bbf5149c7302bd7515fb5e2c3d12bac462f5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:21:04 -0300 Subject: [PATCH 61/75] tools x86 headers: Update cpufeatures.h headers copies To pick the changes from: 5866e9205b47a983 ("x86/cpu: Add hardware-enforced cache coherency as a CPUID feature") ff4f82816dff28ff ("x86/cpufeatures: Enumerate ENQCMD and ENQCMDS instructions") 360e7c5c4ca4fd8e ("x86/cpufeatures: Add SEV-ES CPU feature") 18ec63faefb3fd31 ("x86/cpufeatures: Enumerate TSX suspend load address tracking instructions") e48cb1a3fb916500 ("x86/resctrl: Enumerate per-thread MBA controls") Which don't cause any changes in tooling, just addresses these build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Fenghua Yu Cc: Ian Rogers Cc: Jiri Olsa Cc: Krish Sadhukhan Cc: Kyung Min Park Cc: Namhyung Kim Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 6 +++++- tools/arch/x86/include/asm/disabled-features.h | 9 ++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 2901d5df4366..dad350d42ecf 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -/* free ( 3*32+17) */ +#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -236,6 +236,7 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -288,6 +289,7 @@ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -353,6 +355,7 @@ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ @@ -368,6 +371,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 4ea8584682f9..5861d34f9771 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,12 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_IOMMU_SUPPORT +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -75,7 +81,8 @@ #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ + DISABLE_ENQCMD) #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) From 8b2fc25a945b125c7ee4c36b048ad65f7c04105e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:24:52 -0300 Subject: [PATCH 62/75] tools x86 headers: Update required-features.h header from the kernel To pick the changes from: ecac71816a1829c0 ("x86/paravirt: Use CONFIG_PARAVIRT_XXL instead of CONFIG_PARAVIRT") That don entail any changes in tooling, just addressing these perf tools build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/required-features.h' differs from latest version at 'arch/x86/include/asm/required-features.h' diff -u tools/arch/x86/include/asm/required-features.h arch/x86/include/asm/required-features.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Juergen Gross Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/required-features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 6847d85400a8..3ff0d48469f2 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -54,7 +54,7 @@ #endif #ifdef CONFIG_X86_64 -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL /* Paravirtualized systems may not have PSE or PGE available */ #define NEED_PSE 0 #define NEED_PGE 0 From 32b734e09ec38a0bb81d05d37056a95584d14c99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:28:04 -0300 Subject: [PATCH 63/75] tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: 29dcc60f6a19fb0a ("x86/boot/compressed/64: Add stage1 #VC handler") 36e1be8ada994d50 ("perf/x86/amd/ibs: Fix raw sample data accumulation") 59a854e2f3b90ad2 ("perf/x86/intel: Support TopDown metrics on Ice Lake") 7b2c05a15d29d057 ("perf/x86/intel: Generic support for hardware TopDown metrics") 99e40204e014e066 ("x86/msr: Move the F15h MSRs where they belong") b57de6cd16395be1 ("x86/sev-es: Add SEV-ES Feature Detection") ed7bde7a6dab521e ("cpufreq: intel_pstate: Allow enable/disable energy efficiency") f0f2f9feb4ee6f28 ("x86/msr-index: Define an IA32_PASID MSR") That cause these changes in tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2020-10-19 13:27:33.195274425 -0300 +++ after 2020-10-19 13:27:44.144507610 -0300 @@ -113,6 +113,8 @@ [0x00000309] = "CORE_PERF_FIXED_CTR0", [0x0000030a] = "CORE_PERF_FIXED_CTR1", [0x0000030b] = "CORE_PERF_FIXED_CTR2", + [0x0000030c] = "CORE_PERF_FIXED_CTR3", + [0x00000329] = "PERF_METRICS", [0x00000345] = "IA32_PERF_CAPABILITIES", [0x0000038d] = "CORE_PERF_FIXED_CTR_CTRL", [0x0000038e] = "CORE_PERF_GLOBAL_STATUS", @@ -222,6 +224,7 @@ [0x00000774] = "HWP_REQUEST", [0x00000777] = "HWP_STATUS", [0x00000d90] = "IA32_BNDCFGS", + [0x00000d93] = "IA32_PASID", [0x00000da0] = "IA32_XSS", [0x00000dc0] = "LBR_INFO_0", [0x00000ffc] = "IA32_BNDCFGS_RSVD", @@ -279,6 +282,7 @@ [0xc0010115 - x86_AMD_V_KVM_MSRs_offset] = "VM_IGNNE", [0xc0010117 - x86_AMD_V_KVM_MSRs_offset] = "VM_HSAVE_PA", [0xc001011f - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VIRT_SPEC_CTRL", + [0xc0010130 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV_ES_GHCB", [0xc0010131 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV", [0xc0010140 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_ID_LENGTH", [0xc0010141 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_STATUS", $ Which causes these parts of tools/perf/ to be rebuilt: CC /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o DESCEND plugins GEN /tmp/build/perf/python/perf.so INSTALL trace_plugins LD /tmp/build/perf/trace/beauty/tracepoints/perf-in.o LD /tmp/build/perf/trace/beauty/perf-in.o LD /tmp/build/perf/perf-in.o LINK /tmp/build/perf/per At some point these should just be tables read by perf on demand. This addresses this perf tools build warning: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' Cc: Adrian Hunter Cc: Borislav Petkov Cc: Fenghua Yu Cc: Ian Rogers Cc: Jiri Olsa Cc: Joerg Roedel Cc: Kan Liang Cc: Kim Phillips Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Srinivas Pandruvada Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 2859ee4f39a8..972a34d93505 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -257,6 +257,9 @@ #define MSR_IA32_LASTINTFROMIP 0x000001dd #define MSR_IA32_LASTINTTOIP 0x000001de +#define MSR_IA32_PASID 0x00000d93 +#define MSR_IA32_PASID_VALID BIT_ULL(31) + /* DEBUGCTLMSR bits (others vary by model): */ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 @@ -464,11 +467,15 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL< Date: Mon, 19 Oct 2020 13:36:41 -0300 Subject: [PATCH 64/75] tools UAPI: Update copy of linux/mman.h from the kernel sources e47168f3d1b14af5 ("powerpc/8xx: Support 16k hugepages with 4k pages") That don't cause any changes in tooling, just addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/mman.h' differs from latest version at 'include/uapi/linux/mman.h' diff -u tools/include/uapi/linux/mman.h include/uapi/linux/mman.h Cc: Adrian Hunter Cc: Christophe Leroy Cc: Ian Rogers Cc: Jiri Olsa Cc: Michael Ellerman Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mman.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index 923cc162609c..f55bc680b5b0 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -27,6 +27,7 @@ #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK +#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB From aa04899a13078e4181146212555a1bbaa387d2c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2020 13:42:55 -0300 Subject: [PATCH 65/75] tools kvm headers: Update KVM headers from the kernel sources Some should cause changes in tooling, like the one adding LAST_EXCP, but the way it is structured end up not making that happen. The new SVM_EXIT_INVPCID should get used by arch/x86/util/kvm-stat.c, in the svm_exit_reasons table. The tools/perf/trace/beauty part has scripts to catch changes and automagically create tables, like tools/perf/trace/beauty/kvm_ioctl.sh, but changes are needed to make tools/perf/arch/x86/util/kvm-stat.c catch those automatically. These were handled by the existing scripts: $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2020-11-03 08:43:52.910728608 -0300 +++ after 2020-11-03 08:44:04.273959984 -0300 @@ -89,6 +89,7 @@ [0xbf] = "SET_NESTED_STATE", [0xc0] = "CLEAR_DIRTY_LOG", [0xc1] = "GET_SUPPORTED_HV_CPUID", + [0xc6] = "X86_SET_MSR_FILTER", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h $ $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after $ diff -u before after --- before 2020-11-03 08:45:55.522225198 -0300 +++ after 2020-11-03 08:46:12.881578666 -0300 @@ -37,4 +37,5 @@ [0x71] = "VDPA_GET_STATUS", [0x73] = "VDPA_GET_CONFIG", [0x76] = "VDPA_GET_VRING_NUM", + [0x78] = "VDPA_GET_IOVA_RANGE", }; $ This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/sie.h' differs from latest version at 'arch/s390/include/uapi/asm/sie.h' diff -u tools/arch/s390/include/uapi/asm/sie.h arch/s390/include/uapi/asm/sie.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h' diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h' diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h Cc: Adrian Hunter Cc: Alexander Yarygin Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Cornelia Huck Cc: David Ahern Cc: Ian Rogers Cc: Jiri Olsa Cc: Joerg Roedel Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/kvm.h | 25 +++++++++++++++++++++++++ tools/arch/s390/include/uapi/asm/sie.h | 2 +- tools/arch/x86/include/uapi/asm/kvm.h | 20 ++++++++++++++++++++ tools/arch/x86/include/uapi/asm/svm.h | 13 +++++++++++++ tools/include/uapi/linux/kvm.h | 19 +++++++++++++++++++ tools/include/uapi/linux/vhost.h | 4 ++++ 6 files changed, 82 insertions(+), 1 deletion(-) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index ba85bb23f060..1c17c3a24411 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -159,6 +159,21 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. + */ +struct kvm_pmu_event_filter { + __u16 base_event; + __u16 nevents; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + + __u8 action; + __u8 pad[3]; +}; + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { @@ -242,6 +257,15 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 + +/* + * Only two states can be presented by the host kernel: + * - NOT_REQUIRED: the guest doesn't need to do anything + * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available) + * + * All the other values are deprecated. The host still accepts all + * values (they are ABI), but will narrow them to the above two. + */ #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 @@ -329,6 +353,7 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h index 6ca1e68d7103..ede318653c87 100644 --- a/tools/arch/s390/include/uapi/asm/sie.h +++ b/tools/arch/s390/include/uapi/asm/sie.h @@ -29,7 +29,7 @@ { 0x13, "SIGP conditional emergency signal" }, \ { 0x15, "SIGP sense running" }, \ { 0x16, "SIGP set multithreading"}, \ - { 0x17, "SIGP store additional status ait address"} + { 0x17, "SIGP store additional status at address"} #define icpt_prog_codes \ { 0x0001, "Prog Operation" }, \ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 0780f97c1850..89e5f3d1bba8 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -192,6 +192,26 @@ struct kvm_msr_list { __u32 indices[0]; }; +/* Maximum size of any access bitmap in bytes */ +#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600 + +/* for KVM_X86_SET_MSR_FILTER */ +struct kvm_msr_filter_range { +#define KVM_MSR_FILTER_READ (1 << 0) +#define KVM_MSR_FILTER_WRITE (1 << 1) + __u32 flags; + __u32 nmsrs; /* number of msrs in bitmap */ + __u32 base; /* MSR index the bitmap starts at */ + __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */ +}; + +#define KVM_MSR_FILTER_MAX_RANGES 16 +struct kvm_msr_filter { +#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) + __u32 flags; + struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; +}; struct kvm_cpuid_entry { __u32 function; diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 2e8a30f06c74..f1d8307454e0 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -29,6 +29,7 @@ #define SVM_EXIT_WRITE_DR6 0x036 #define SVM_EXIT_WRITE_DR7 0x037 #define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_LAST_EXCP 0x05f #define SVM_EXIT_INTR 0x060 #define SVM_EXIT_NMI 0x061 #define SVM_EXIT_SMI 0x062 @@ -76,10 +77,21 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +/* SEV-ES software-defined VMGEXIT events */ +#define SVM_VMGEXIT_MMIO_READ 0x80000001 +#define SVM_VMGEXIT_MMIO_WRITE 0x80000002 +#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003 +#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004 +#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 +#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 +#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff + #define SVM_EXIT_ERR -1 #define SVM_EXIT_REASONS \ @@ -171,6 +183,7 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7d8eced6f459..ca41220b40b8 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -248,6 +248,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_IOAPIC_EOI 26 #define KVM_EXIT_HYPERV 27 #define KVM_EXIT_ARM_NISV 28 +#define KVM_EXIT_X86_RDMSR 29 +#define KVM_EXIT_X86_WRMSR 30 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -413,6 +415,17 @@ struct kvm_run { __u64 esr_iss; __u64 fault_ipa; } arm_nisv; + /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ + struct { + __u8 error; /* user -> kernel */ + __u8 pad[7]; +#define KVM_MSR_EXIT_REASON_INVAL (1 << 0) +#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) +#define KVM_MSR_EXIT_REASON_FILTER (1 << 2) + __u32 reason; /* kernel -> user */ + __u32 index; /* kernel -> user */ + __u64 data; /* kernel <-> user */ + } msr; /* Fix the size of the union. */ char padding[256]; }; @@ -1037,6 +1050,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 #define KVM_CAP_STEAL_TIME 187 +#define KVM_CAP_X86_USER_SPACE_MSR 188 +#define KVM_CAP_X86_MSR_FILTER 189 +#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #ifdef KVM_CAP_IRQ_ROUTING @@ -1538,6 +1554,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) +/* Available with KVM_CAP_X86_MSR_FILTER */ +#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 75232185324a..c998860d7bbc 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -146,4 +146,8 @@ /* Set event fd for config interrupt*/ #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) + +/* Get the valid iova range */ +#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ + struct vhost_vdpa_iova_range) #endif From a9e27f5f9827eab25b76155fddcc22ddeeed58d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:49:59 -0300 Subject: [PATCH 66/75] tools headers UAPI: Update tools's copy of linux/perf_event.h The diff is just tabs versus spaces, trivial. This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3e5dcdd48a49..b95d3c485d27 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1196,7 +1196,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ /* 1 free */ -#define PERF_MEM_SNOOPX_SHIFT 38 +#define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ From 42cc0e70a21faa8e7d7ea8713a3f9cd64bd3f60a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 08:52:11 -0300 Subject: [PATCH 67/75] tools include UAPI: Update linux/mount.h copy To pick the changes from: dab741e0e02bd3c4 ("Add a "nosymfollow" mount option.") That ends up adding support for the new MS_NOSYMFOLLOW mount flag: $ tools/perf/trace/beauty/mount_flags.sh > before $ cp include/uapi/linux/mount.h tools/include/uapi/linux/mount.h $ tools/perf/trace/beauty/mount_flags.sh > after $ diff -u before after --- before 2020-11-03 08:51:28.117997454 -0300 +++ after 2020-11-03 08:51:38.992218869 -0300 @@ -7,6 +7,7 @@ [32 ? (ilog2(32) + 1) : 0] = "REMOUNT", [64 ? (ilog2(64) + 1) : 0] = "MANDLOCK", [128 ? (ilog2(128) + 1) : 0] = "DIRSYNC", + [256 ? (ilog2(256) + 1) : 0] = "NOSYMFOLLOW", [1024 ? (ilog2(1024) + 1) : 0] = "NOATIME", [2048 ? (ilog2(2048) + 1) : 0] = "NODIRATIME", [4096 ? (ilog2(4096) + 1) : 0] = "BIND", $ So now one can use it in --filter expressions for tracepoints. This silences this perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/linux/mount.h' differs from latest version at 'include/uapi/linux/mount.h' diff -u tools/include/uapi/linux/mount.h include/uapi/linux/mount.h Cc: Adrian Hunter Cc: Al Viro Cc: Ian Rogers Cc: Jiri Olsa Cc: Mattias Nissler Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 96a0240f23fe..dd8306ea336c 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -16,6 +16,7 @@ #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ #define MS_NOATIME 1024 /* Do not update access times. */ #define MS_NODIRATIME 2048 /* Do not update directory access times */ #define MS_BIND 4096 From 86449b12f626a65d2a2ecfada1e024488471f9e2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 30 Oct 2020 16:54:31 -0700 Subject: [PATCH 68/75] perf hists browser: Increase size of 'buf' in perf_evsel__hists_browse() Making perf with gcc-9.1.1 generates the following warning: CC ui/browsers/hists.o ui/browsers/hists.c: In function 'perf_evsel__hists_browse': ui/browsers/hists.c:3078:61: error: '%d' directive output may be \ truncated writing between 1 and 11 bytes into a region of size \ between 2 and 12 [-Werror=format-truncation=] 3078 | "Max event group index to sort is %d (index from 0 to %d)", | ^~ ui/browsers/hists.c:3078:7: note: directive argument in the range [-2147483648, 8] 3078 | "Max event group index to sort is %d (index from 0 to %d)", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from /usr/include/stdio.h:937, from ui/browsers/hists.c:5: IOW, the string in line 3078 might be too long for buf[] of 64 bytes. Fix this by increasing the size of buf[] to 128. Fixes: dbddf1747441 ("perf report/top TUI: Support hotkeys to let user select any event for sorting") Signed-off-by: Song Liu Acked-by: Jiri Olsa Cc: Jin Yao Cc: stable@vger.kernel.org # v5.7+ Link: http://lore.kernel.org/lkml/20201030235431.534417-1-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a07626f07208..b0e1880cf992 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2963,7 +2963,7 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, struct popup_action actions[MAX_OPTIONS]; int nr_options = 0; int key = -1; - char buf[64]; + char buf[128]; int delay_secs = hbt ? hbt->refresh : 0; #define HIST_BROWSER_HELP_COMMON \ From 6311951d4f8f28c43b554ff0719027884bedd7e3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Nov 2020 00:31:02 +0100 Subject: [PATCH 69/75] perf tools: Initialize output buffer in build_id__sprintf We display garbage for undefined build_id objects, because we don't initialize the output buffer. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201101233103.3537427-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 8763772f1095..6b410c3d52dc 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -102,6 +102,8 @@ int build_id__sprintf(const struct build_id *build_id, char *bf) const u8 *raw = build_id->data; size_t i; + bf[0] = 0x0; + for (i = 0; i < build_id->size; ++i) { sprintf(bid, "%02x", *raw); ++raw; From fe01adb72356a4e2f8735e4128af85921ca98fa1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Nov 2020 00:31:03 +0100 Subject: [PATCH 70/75] perf tools: Add missing swap for ino_generation We are missing swap for ino_generation field. Fixes: 5c5e854bc760 ("perf tools: Add attr->mmap2 support") Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201101233103.3537427-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7a5f03764702..d20b16ee7377 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -595,6 +595,7 @@ static void perf_event__mmap2_swap(union perf_event *event, event->mmap2.maj = bswap_32(event->mmap2.maj); event->mmap2.min = bswap_32(event->mmap2.min); event->mmap2.ino = bswap_64(event->mmap2.ino); + event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); if (sample_id_all) { void *data = &event->mmap2.filename; From 2c589d933e54d183ee2a052971b730e423c62031 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 2 Nov 2020 23:02:28 +0900 Subject: [PATCH 71/75] perf tools: Add missing swap for cgroup events It was missed to add a swap function for PERF_RECORD_CGROUP. Fixes: ba78c1c5461c ("perf tools: Basic support for CGROUP event") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201102140228.303657-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d20b16ee7377..098080287c68 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -711,6 +711,18 @@ static void perf_event__namespaces_swap(union perf_event *event, swap_sample_id_all(event, &event->namespaces.link_info[i]); } +static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all) +{ + event->cgroup.id = bswap_64(event->cgroup.id); + + if (sample_id_all) { + void *data = &event->cgroup.path; + + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } +} + static u8 revbyte(u8 b) { int rev = (b >> 4) | ((b & 0xf) << 4); @@ -953,6 +965,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_SWITCH] = perf_event__switch_swap, [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, + [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, From 5d020cbd86204e51da05628623a6f9729d4b04c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2020 09:24:20 -0300 Subject: [PATCH 72/75] tools feature: Fixup fast path feature detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 22dd1ac91a776752 ("tools: Remove feature-libelf-mmap feature detection") correctly simplified the this feature detection, but forgot to remove the call to the removed function in the main() function for the test-all.c fast path feature detection, making it fail and thus do all the feature detection individually, fix it. $ cat /tmp/build/perf/feature/test-all.make.output test-all.c: In function ‘main’: test-all.c:188:2: error: implicit declaration of function ‘main_test_libelf_mmap’; did you mean ‘main_test_libelf’? [-Werror=implicit-function-declaration] 188 | main_test_libelf_mmap(); | ^~~~~~~~~~~~~~~~~~~~~ | main_test_libelf cc1: all warnings being treated as errors $ vim tools/build/feature/test-all.c $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ;make V=1 -k O=/tmp/build/perf -C tools/perf install-bin ; perf test python $ cat /tmp/build/perf/feature/test-all.make.output $ Fixes: 22dd1ac91a776752 ("tools: Remove feature-libelf-mmap feature detection") Cc: Alexei Starovoitov Cc: Andrii Nakryiko Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-all.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index a04e81321c66..464873883396 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -185,7 +185,6 @@ int main(int argc, char *argv[]) main_test_libperl(); main_test_hello(); main_test_libelf(); - main_test_libelf_mmap(); main_test_get_current_dir_name(); main_test_gettid(); main_test_glibc(); From 4f3e69060dc9cc8f14ad9e172ada7120dc76445b Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 30 Oct 2020 09:35:39 -0600 Subject: [PATCH 73/75] docs: fix automarkup regression on Python 2 It turns out that the Python 2 re module lacks the ASCII flag, so don't try to use it there. Fixes: f66e47f98c1e ("docs: automarkup.py: Fix regexes to solve sphinx 3 warnings") Reported-by: Dafna Hirschfeld Signed-off-by: Jonathan Corbet --- Documentation/sphinx/automarkup.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Documentation/sphinx/automarkup.py b/Documentation/sphinx/automarkup.py index 409dbc4100de..3e81ebab26ed 100644 --- a/Documentation/sphinx/automarkup.py +++ b/Documentation/sphinx/automarkup.py @@ -15,6 +15,14 @@ else: import re from itertools import chain +# +# Python 2 lacks re.ASCII... +# +try: + ascii_p3 = re.ASCII +except AttributeError: + ascii_p3 = 0 + # # Regex nastiness. Of course. # Try to identify "function()" that's not already marked up some @@ -22,22 +30,22 @@ from itertools import chain # :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last # bit tries to restrict matches to things that won't create trouble. # -RE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=re.ASCII) +RE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=ascii_p3) # # Sphinx 2 uses the same :c:type role for struct, union, enum and typedef # RE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)', - flags=re.ASCII) + flags=ascii_p3) # # Sphinx 3 uses a different C role for each one of struct, union, enum and # typedef # -RE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=re.ASCII) -RE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=re.ASCII) -RE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=re.ASCII) -RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=re.ASCII) +RE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=ascii_p3) +RE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=ascii_p3) +RE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=ascii_p3) +RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3) # # Detects a reference to a documentation page of the form Documentation/... with From c80afa1d9c3603d5eddeb8d63368823b1982f3f0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 3 Nov 2020 16:32:58 +0000 Subject: [PATCH 74/75] afs: Fix warning due to unadvanced marshalling pointer When using the afs.yfs.acl xattr to change an AuriStor ACL, a warning can be generated when the request is marshalled because the buffer pointer isn't increased after adding the last element, thereby triggering the check at the end if the ACL wasn't empty. This just causes something like the following warning, but doesn't stop the call from happening successfully: kAFS: YFS.StoreOpaqueACL2: Request buffer underflow (36<108) Fix this simply by increasing the count prior to the check. Fixes: f5e4546347bc ("afs: Implement YFS ACL setting") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/yfsclient.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 3b1239b7e90d..bd787e71a657 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -1990,6 +1990,7 @@ void yfs_fs_store_opaque_acl2(struct afs_operation *op) memcpy(bp, acl->data, acl->size); if (acl->size != size) memset((void *)bp + acl->size, 0, size - acl->size); + bp += size / sizeof(__be32); yfs_check_req(call, bp); trace_afs_make_fs_call(call, &vp->fid); From f4c79144edd8a49ffca8fa737a31d606be742a34 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 3 Nov 2020 16:33:07 +0000 Subject: [PATCH 75/75] afs: Fix incorrect freeing of the ACL passed to the YFS ACL store op The cleanup for the yfs_store_opaque_acl2_operation calls the wrong function to destroy the ACL content buffer. It's an afs_acl struct, not a yfs_acl struct - and the free function for latter may pass invalid pointers to kfree(). Fix this by using the afs_acl_put() function. The yfs_acl_put() function is then no longer used and can be removed. general protection fault, probably for non-canonical address 0x7ebde00000000: 0000 [#1] SMP PTI ... RIP: 0010:compound_head+0x0/0x11 ... Call Trace: virt_to_cache+0x8/0x51 kfree+0x5d/0x79 yfs_free_opaque_acl+0x16/0x29 afs_put_operation+0x60/0x114 __vfs_setxattr+0x67/0x72 __vfs_setxattr_noperm+0x66/0xe9 vfs_setxattr+0x67/0xce setxattr+0x14e/0x184 __do_sys_fsetxattr+0x66/0x8f do_syscall_64+0x2d/0x3a entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/xattr.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 38884d6c57cd..95c573dcda11 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -148,11 +148,6 @@ static const struct xattr_handler afs_xattr_afs_acl_handler = { .set = afs_xattr_set_acl, }; -static void yfs_acl_put(struct afs_operation *op) -{ - yfs_free_opaque_acl(op->yacl); -} - static const struct afs_operation_ops yfs_fetch_opaque_acl_operation = { .issue_yfs_rpc = yfs_fs_fetch_opaque_acl, .success = afs_acl_success, @@ -246,7 +241,7 @@ error: static const struct afs_operation_ops yfs_store_opaque_acl2_operation = { .issue_yfs_rpc = yfs_fs_store_opaque_acl2, .success = afs_acl_success, - .put = yfs_acl_put, + .put = afs_acl_put, }; /*