From 9e766b86a9ef653a8ca48a9f70d3dbb580284594 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 22 Jul 2021 16:40:40 +0200 Subject: [PATCH 01/65] PCI: pci-bridge-emul: Add PCIe Root Capabilities Register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e902bb7c24a7099d0eb0eb4cba06f2d91e9299f3 upstream. The 16-bit Root Capabilities register is at offset 0x1e in the PCIe Capability. Rename current 'rsvd' struct member to 'rootcap'. Link: https://lore.kernel.org/r/20210722144041.12661-4-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marek Behún Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-bridge-emul.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-bridge-emul.h b/drivers/pci/pci-bridge-emul.h index b31883022a8e..49bbd37ee318 100644 --- a/drivers/pci/pci-bridge-emul.h +++ b/drivers/pci/pci-bridge-emul.h @@ -54,7 +54,7 @@ struct pci_bridge_emul_pcie_conf { __le16 slotctl; __le16 slotsta; __le16 rootctl; - __le16 rsvd; + __le16 rootcap; __le32 rootsta; __le32 devcap2; __le16 devctl2; From 9493e92a395330787def2b1980e930f5082de2f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 22 Jul 2021 16:40:41 +0200 Subject: [PATCH 02/65] PCI: aardvark: Fix reporting CRS value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 43f5c77bcbd27cce70bf33c2b86d6726ce95dd66 upstream. Set CRSVIS flag in emulated root PCI bridge to indicate support for Completion Retry Status. Add check for CRSSVE flag from root PCI brige when issuing Configuration Read Request via PIO to correctly returns fabricated CRS value as it is required by PCIe spec. Link: https://lore.kernel.org/r/20210722144041.12661-5-pali@kernel.org Fixes: 8a3ebd8de328 ("PCI: aardvark: Implement emulated root PCI bridge config space") Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org # e0d9d30b7354 ("PCI: pci-bridge-emul: Fix big-endian support") Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pci-aardvark.c | 67 +++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 88e19ad54f64..f175cff39b46 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -225,6 +225,8 @@ #define MSI_IRQ_NUM 32 +#define CFG_RD_CRS_VAL 0xffff0001 + struct advk_pcie { struct platform_device *pdev; void __iomem *base; @@ -587,7 +589,7 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG); } -static int advk_pcie_check_pio_status(struct advk_pcie *pcie, u32 *val) +static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u32 *val) { struct device *dev = &pcie->pdev->dev; u32 reg; @@ -629,9 +631,30 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, u32 *val) strcomp_status = "UR"; break; case PIO_COMPLETION_STATUS_CRS: + if (allow_crs && val) { + /* PCIe r4.0, sec 2.3.2, says: + * If CRS Software Visibility is enabled: + * For a Configuration Read Request that includes both + * bytes of the Vendor ID field of a device Function's + * Configuration Space Header, the Root Complex must + * complete the Request to the host by returning a + * read-data value of 0001h for the Vendor ID field and + * all '1's for any additional bytes included in the + * request. + * + * So CRS in this case is not an error status. + */ + *val = CFG_RD_CRS_VAL; + strcomp_status = NULL; + break; + } /* PCIe r4.0, sec 2.3.2, says: * If CRS Software Visibility is not enabled, the Root Complex * must re-issue the Configuration Request as a new Request. + * If CRS Software Visibility is enabled: For a Configuration + * Write Request or for any other Configuration Read Request, + * the Root Complex must re-issue the Configuration Request as + * a new Request. * A Root Complex implementation may choose to limit the number * of Configuration Request/CRS Completion Status loops before * determining that something is wrong with the target of the @@ -700,6 +723,7 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, case PCI_EXP_RTCTL: { u32 val = advk_readl(pcie, PCIE_ISR0_MASK_REG); *value = (val & PCIE_MSG_PM_PME_MASK) ? 0 : PCI_EXP_RTCTL_PMEIE; + *value |= PCI_EXP_RTCAP_CRSVIS << 16; return PCI_BRIDGE_EMUL_HANDLED; } @@ -781,6 +805,7 @@ static struct pci_bridge_emul_ops advk_pci_bridge_emul_ops = { static int advk_sw_pci_bridge_init(struct advk_pcie *pcie) { struct pci_bridge_emul *bridge = &pcie->bridge; + int ret; bridge->conf.vendor = cpu_to_le16(advk_readl(pcie, PCIE_CORE_DEV_ID_REG) & 0xffff); @@ -804,7 +829,15 @@ static int advk_sw_pci_bridge_init(struct advk_pcie *pcie) bridge->data = pcie; bridge->ops = &advk_pci_bridge_emul_ops; - return pci_bridge_emul_init(bridge, 0); + /* PCIe config space can be initialized after pci_bridge_emul_init() */ + ret = pci_bridge_emul_init(bridge, 0); + if (ret < 0) + return ret; + + /* Indicates supports for Completion Retry Status */ + bridge->pcie_conf.rootcap = cpu_to_le16(PCI_EXP_RTCAP_CRSVIS); + + return 0; } static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus, @@ -856,6 +889,7 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where, int size, u32 *val) { struct advk_pcie *pcie = bus->sysdata; + bool allow_crs; u32 reg; int ret; @@ -868,7 +902,24 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, return pci_bridge_emul_conf_read(&pcie->bridge, where, size, val); + /* + * Completion Retry Status is possible to return only when reading all + * 4 bytes from PCI_VENDOR_ID and PCI_DEVICE_ID registers at once and + * CRSSVE flag on Root Bridge is enabled. + */ + allow_crs = (where == PCI_VENDOR_ID) && (size == 4) && + (le16_to_cpu(pcie->bridge.pcie_conf.rootctl) & + PCI_EXP_RTCTL_CRSSVE); + if (advk_pcie_pio_is_running(pcie)) { + /* + * If it is possible return Completion Retry Status so caller + * tries to issue the request again instead of failing. + */ + if (allow_crs) { + *val = CFG_RD_CRS_VAL; + return PCIBIOS_SUCCESSFUL; + } *val = 0xffffffff; return PCIBIOS_SET_FAILED; } @@ -896,12 +947,20 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, ret = advk_pcie_wait_pio(pcie); if (ret < 0) { + /* + * If it is possible return Completion Retry Status so caller + * tries to issue the request again instead of failing. + */ + if (allow_crs) { + *val = CFG_RD_CRS_VAL; + return PCIBIOS_SUCCESSFUL; + } *val = 0xffffffff; return PCIBIOS_SET_FAILED; } /* Check PIO status and get the read result */ - ret = advk_pcie_check_pio_status(pcie, val); + ret = advk_pcie_check_pio_status(pcie, allow_crs, val); if (ret < 0) { *val = 0xffffffff; return PCIBIOS_SET_FAILED; @@ -970,7 +1029,7 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn, if (ret < 0) return PCIBIOS_SET_FAILED; - ret = advk_pcie_check_pio_status(pcie, NULL); + ret = advk_pcie_check_pio_status(pcie, false, NULL); if (ret < 0) return PCIBIOS_SET_FAILED; From 74d54e5ceba3f50b9b695e4740172d876327815a Mon Sep 17 00:00:00 2001 From: nick black Date: Mon, 30 Aug 2021 04:56:15 -0400 Subject: [PATCH 03/65] console: consume APC, DM, DCS commit 3a2b2eb55681158d3e3ef464fbf47574cf0c517c upstream. The Linux console's VT102 implementation already consumes OSC ("Operating System Command") sequences, probably because that's how palette changes are transmitted. In addition to OSC, there are three other major clases of ANSI control strings: APC ("Application Program Command"), PM ("Privacy Message"), and DCS ("Device Control String"). They are handled similarly to OSC in terms of termination. Source: vt100.net Add three new enumerated states, one for each of these types. All three are handled the same way right now--they simply consume input until terminated. I hope to expand upon this firmament in the future. Add new predicate ansi_control_string(), returning true for any of these states. Replace explicit checks against ESosc with calls to this function. Transition to these states appropriately from the escape initiation (ESesc) state. This was motivated by the following Notcurses bugs: https://github.com/dankamongmen/notcurses/issues/2050 https://github.com/dankamongmen/notcurses/issues/1828 https://github.com/dankamongmen/notcurses/issues/2069 where standard VT sequences are not consumed by the Linux console. It's not necessary that the Linux console *support* these sequences, but it ought *consume* these well-specified classes of sequences. Tested by sending a variety of escape sequences to the console, and verifying that they still worked, or were now properly consumed. Verified that the escapes were properly terminated at a generic level. Verified that the Notcurses tools continued to show expected output on the Linux console, except now without escape bleedthrough. Link: https://lore.kernel.org/lkml/YSydL0q8iaUfkphg@schwarzgerat.orthanc/ Signed-off-by: nick black Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Tetsuo Handa Cc: Daniel Vetter Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 06757b1d4aec..cea40ef090b7 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2060,7 +2060,7 @@ static void restore_cur(struct vc_data *vc) enum { ESnormal, ESesc, ESsquare, ESgetpars, ESfunckey, EShash, ESsetG0, ESsetG1, ESpercent, EScsiignore, ESnonstd, - ESpalette, ESosc }; + ESpalette, ESosc, ESapc, ESpm, ESdcs }; /* console_lock is held (except via vc_init()) */ static void reset_terminal(struct vc_data *vc, int do_clear) @@ -2134,20 +2134,28 @@ static void vc_setGx(struct vc_data *vc, unsigned int which, int c) vc->vc_translate = set_translate(*charset, vc); } +/* is this state an ANSI control string? */ +static bool ansi_control_string(unsigned int state) +{ + if (state == ESosc || state == ESapc || state == ESpm || state == ESdcs) + return true; + return false; +} + /* console_lock is held */ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) { /* * Control characters can be used in the _middle_ - * of an escape sequence. + * of an escape sequence, aside from ANSI control strings. */ - if (vc->vc_state == ESosc && c>=8 && c<=13) /* ... except for OSC */ + if (ansi_control_string(vc->vc_state) && c >= 8 && c <= 13) return; switch (c) { case 0: return; case 7: - if (vc->vc_state == ESosc) + if (ansi_control_string(vc->vc_state)) vc->vc_state = ESnormal; else if (vc->vc_bell_duration) kd_mksound(vc->vc_bell_pitch, vc->vc_bell_duration); @@ -2208,6 +2216,12 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) case ']': vc->vc_state = ESnonstd; return; + case '_': + vc->vc_state = ESapc; + return; + case '^': + vc->vc_state = ESpm; + return; case '%': vc->vc_state = ESpercent; return; @@ -2225,6 +2239,9 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) if (vc->state.x < VC_TABSTOPS_COUNT) set_bit(vc->state.x, vc->vc_tab_stop); return; + case 'P': + vc->vc_state = ESdcs; + return; case 'Z': respond_ID(tty); return; @@ -2521,8 +2538,14 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) vc_setGx(vc, 1, c); vc->vc_state = ESnormal; return; + case ESapc: + return; case ESosc: return; + case ESpm: + return; + case ESdcs: + return; default: vc->vc_state = ESnormal; } From 79286ea830b3922b38bcefbd150b2010f91a55ce Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 16:59:42 +0200 Subject: [PATCH 04/65] s390/pci_mmio: fully validate the VMA before calling follow_pte() commit a8b92b8c1eac8d655a97b1e90f4d83c25d9b9a18 upstream. We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). find_vma() does not check if the address is >= the VMA start address; use vma_lookup() instead. Reviewed-by: Niklas Schnelle Reviewed-by: Liam R. Howlett Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap") Signed-off-by: David Hildenbrand Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 401cf670a243..37b1bbd1a27c 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -128,7 +128,7 @@ static long get_pfn(unsigned long user_addr, unsigned long access, mmap_read_lock(current->mm); ret = -EINVAL; vma = find_vma(current->mm, user_addr); - if (!vma) + if (!vma || user_addr < vma->vm_start) goto out; ret = -EACCES; if (!(vma->vm_flags & access)) From eb46d7c8ae838b37ad4acee44fa3828af3062ce7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 18 Mar 2021 21:03:33 -0700 Subject: [PATCH 05/65] ARM: Qualify enabling of swiotlb_init() commit fcf044891c84e38fc90eb736b818781bccf94e38 upstream. We do not need a SWIOTLB unless we have DRAM that is addressable beyond the arm_dma_limit. Compare max_pfn with arm_dma_pfn_limit to determine whether we do need a SWIOTLB to be initialized. Fixes: ad3c7b18c5b3 ("arm: use swiotlb for bounce buffering on LPAE configs") Signed-off-by: Florian Fainelli Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/init.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d54d69cf1732..75f3ab531bdf 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -378,7 +378,11 @@ static void __init free_highpages(void) void __init mem_init(void) { #ifdef CONFIG_ARM_LPAE - swiotlb_init(1); + if (swiotlb_force == SWIOTLB_FORCE || + max_pfn > arm_dma_pfn_limit) + swiotlb_init(1); + else + swiotlb_force = SWIOTLB_NO_FORCE; #endif set_max_mapnr(pfn_to_page(max_pfn) - mem_map); From ce90c6706d5a95ddda8d3cea01768bd0b4445851 Mon Sep 17 00:00:00 2001 From: Alex Sverdlin Date: Wed, 22 Sep 2021 09:59:55 -0700 Subject: [PATCH 06/65] ARM: 9077/1: PLT: Move struct plt_entries definition to header commit 4e271701c17dee70c6e1351c4d7d42e70405c6a9 upstream upstream No functional change, later it will be re-used in several files. Signed-off-by: Alexander Sverdlin Signed-off-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/module.h | 9 +++++++++ arch/arm/kernel/module-plts.c | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 4b0df09cbe67..09b9ad55b83d 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -19,6 +19,15 @@ enum { }; #endif +#define PLT_ENT_STRIDE L1_CACHE_BYTES +#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) +#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) + +struct plt_entries { + u32 ldr[PLT_ENT_COUNT]; + u32 lit[PLT_ENT_COUNT]; +}; + struct mod_plt_sec { struct elf32_shdr *plt; int plt_count; diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 6e626abaefc5..d330e9ec2de3 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -12,10 +12,6 @@ #include #include -#define PLT_ENT_STRIDE L1_CACHE_BYTES -#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) -#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) - #ifdef CONFIG_THUMB2_KERNEL #define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \ (PLT_ENT_STRIDE - 4)) @@ -24,11 +20,6 @@ (PLT_ENT_STRIDE - 8)) #endif -struct plt_entries { - u32 ldr[PLT_ENT_COUNT]; - u32 lit[PLT_ENT_COUNT]; -}; - static bool in_init(const struct module *mod, unsigned long loc) { return loc - (u32)mod->init_layout.base < mod->init_layout.size; From ad00533858f7b1436f7dc9c555ccac85c0e28c9a Mon Sep 17 00:00:00 2001 From: Alex Sverdlin Date: Wed, 22 Sep 2021 09:59:56 -0700 Subject: [PATCH 07/65] ARM: 9078/1: Add warn suppress parameter to arm_gen_branch_link() commit 890cb057a46d323fd8c77ebecb6485476614cd21 upstream Will be used in the following patch. No functional change. Signed-off-by: Alexander Sverdlin Signed-off-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/insn.h | 8 ++++---- arch/arm/kernel/ftrace.c | 2 +- arch/arm/kernel/insn.c | 19 ++++++++++--------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h index f20e08ac85ae..5475cbf9fb6b 100644 --- a/arch/arm/include/asm/insn.h +++ b/arch/arm/include/asm/insn.h @@ -13,18 +13,18 @@ arm_gen_nop(void) } unsigned long -__arm_gen_branch(unsigned long pc, unsigned long addr, bool link); +__arm_gen_branch(unsigned long pc, unsigned long addr, bool link, bool warn); static inline unsigned long arm_gen_branch(unsigned long pc, unsigned long addr) { - return __arm_gen_branch(pc, addr, false); + return __arm_gen_branch(pc, addr, false, true); } static inline unsigned long -arm_gen_branch_link(unsigned long pc, unsigned long addr) +arm_gen_branch_link(unsigned long pc, unsigned long addr, bool warn) { - return __arm_gen_branch(pc, addr, true); + return __arm_gen_branch(pc, addr, true, warn); } #endif diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 9a79ef6b1876..61de8172a044 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -70,7 +70,7 @@ int ftrace_arch_code_modify_post_process(void) static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) { - return arm_gen_branch_link(pc, addr); + return arm_gen_branch_link(pc, addr, true); } static int ftrace_modify_code(unsigned long pc, unsigned long old, diff --git a/arch/arm/kernel/insn.c b/arch/arm/kernel/insn.c index 2e844b70386b..db0acbb7d7a0 100644 --- a/arch/arm/kernel/insn.c +++ b/arch/arm/kernel/insn.c @@ -3,8 +3,9 @@ #include #include -static unsigned long -__arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) +static unsigned long __arm_gen_branch_thumb2(unsigned long pc, + unsigned long addr, bool link, + bool warn) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -12,7 +13,7 @@ __arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) offset = (long)addr - (long)(pc + 4); if (offset < -16777216 || offset > 16777214) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(warn); return 0; } @@ -33,8 +34,8 @@ __arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) return __opcode_thumb32_compose(first, second); } -static unsigned long -__arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) +static unsigned long __arm_gen_branch_arm(unsigned long pc, unsigned long addr, + bool link, bool warn) { unsigned long opcode = 0xea000000; long offset; @@ -44,7 +45,7 @@ __arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(warn); return 0; } @@ -54,10 +55,10 @@ __arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) } unsigned long -__arm_gen_branch(unsigned long pc, unsigned long addr, bool link) +__arm_gen_branch(unsigned long pc, unsigned long addr, bool link, bool warn) { if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) - return __arm_gen_branch_thumb2(pc, addr, link); + return __arm_gen_branch_thumb2(pc, addr, link, warn); else - return __arm_gen_branch_arm(pc, addr, link); + return __arm_gen_branch_arm(pc, addr, link, warn); } From f91d25a7c89e2f246a541603430ce2420bab58c5 Mon Sep 17 00:00:00 2001 From: Alex Sverdlin Date: Wed, 22 Sep 2021 09:59:57 -0700 Subject: [PATCH 08/65] ARM: 9079/1: ftrace: Add MODULE_PLTS support commit 79f32b221b18c15a98507b101ef4beb52444cc6f upstream Teach ftrace_make_call() and ftrace_make_nop() about PLTs. Teach PLT code about FTRACE and all its callbacks. Otherwise the following might happen: ------------[ cut here ]------------ WARNING: CPU: 14 PID: 2265 at .../arch/arm/kernel/insn.c:14 __arm_gen_branch+0x83/0x8c() ... Hardware name: LSI Axxia AXM55XX [] (unwind_backtrace) from [] (show_stack+0x11/0x14) [] (show_stack) from [] (dump_stack+0x81/0xa8) [] (dump_stack) from [] (warn_slowpath_common+0x69/0x90) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x17/0x1c) [] (warn_slowpath_null) from [] (__arm_gen_branch+0x83/0x8c) [] (__arm_gen_branch) from [] (ftrace_make_nop+0xf/0x24) [] (ftrace_make_nop) from [] (ftrace_process_locs+0x27b/0x3e8) [] (ftrace_process_locs) from [] (load_module+0x11e9/0x1a44) [] (load_module) from [] (SyS_finit_module+0x59/0x84) [] (SyS_finit_module) from [] (ret_fast_syscall+0x1/0x18) ---[ end trace e1b64ced7a89adcc ]--- ------------[ cut here ]------------ WARNING: CPU: 14 PID: 2265 at .../kernel/trace/ftrace.c:1979 ftrace_bug+0x1b1/0x234() ... Hardware name: LSI Axxia AXM55XX [] (unwind_backtrace) from [] (show_stack+0x11/0x14) [] (show_stack) from [] (dump_stack+0x81/0xa8) [] (dump_stack) from [] (warn_slowpath_common+0x69/0x90) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x17/0x1c) [] (warn_slowpath_null) from [] (ftrace_bug+0x1b1/0x234) [] (ftrace_bug) from [] (ftrace_process_locs+0x285/0x3e8) [] (ftrace_process_locs) from [] (load_module+0x11e9/0x1a44) [] (load_module) from [] (SyS_finit_module+0x59/0x84) [] (SyS_finit_module) from [] (ret_fast_syscall+0x1/0x18) ---[ end trace e1b64ced7a89adcd ]--- ftrace failed to modify [] 0xe9ef7006 actual: 02:f0:3b:fa ftrace record flags: 0 (0) expected tramp: c0314265 Signed-off-by: Alexander Sverdlin Signed-off-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/ftrace.h | 3 +++ arch/arm/include/asm/module.h | 1 + arch/arm/kernel/ftrace.c | 46 +++++++++++++++++++++++++++++------ arch/arm/kernel/module-plts.c | 44 ++++++++++++++++++++++++++++++--- 4 files changed, 82 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 48ec1d0337da..a4dbac07e4ef 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -15,6 +15,9 @@ extern void __gnu_mcount_nc(void); #ifdef CONFIG_DYNAMIC_FTRACE struct dyn_arch_ftrace { +#ifdef CONFIG_ARM_MODULE_PLTS + struct module *mod; +#endif }; static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 09b9ad55b83d..cfffae67c04e 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -30,6 +30,7 @@ struct plt_entries { struct mod_plt_sec { struct elf32_shdr *plt; + struct plt_entries *plt_ent; int plt_count; }; diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 61de8172a044..3c83b5d29697 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -68,9 +68,10 @@ int ftrace_arch_code_modify_post_process(void) return 0; } -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr, + bool warn) { - return arm_gen_branch_link(pc, addr, true); + return arm_gen_branch_link(pc, addr, warn); } static int ftrace_modify_code(unsigned long pc, unsigned long old, @@ -104,14 +105,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int ret; pc = (unsigned long)&ftrace_call; - new = ftrace_call_replace(pc, (unsigned long)func); + new = ftrace_call_replace(pc, (unsigned long)func, true); ret = ftrace_modify_code(pc, 0, new, false); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS if (!ret) { pc = (unsigned long)&ftrace_regs_call; - new = ftrace_call_replace(pc, (unsigned long)func); + new = ftrace_call_replace(pc, (unsigned long)func, true); ret = ftrace_modify_code(pc, 0, new, false); } @@ -124,10 +125,22 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long new, old; unsigned long ip = rec->ip; + unsigned long aaddr = adjust_address(rec, addr); + struct module *mod = NULL; + +#ifdef CONFIG_ARM_MODULE_PLTS + mod = rec->arch.mod; +#endif old = ftrace_nop_replace(rec); - new = ftrace_call_replace(ip, adjust_address(rec, addr)); + new = ftrace_call_replace(ip, aaddr, !mod); +#ifdef CONFIG_ARM_MODULE_PLTS + if (!new && mod) { + aaddr = get_module_plt(mod, ip, aaddr); + new = ftrace_call_replace(ip, aaddr, true); + } +#endif return ftrace_modify_code(rec->ip, old, new, true); } @@ -140,9 +153,9 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long new, old; unsigned long ip = rec->ip; - old = ftrace_call_replace(ip, adjust_address(rec, old_addr)); + old = ftrace_call_replace(ip, adjust_address(rec, old_addr), true); - new = ftrace_call_replace(ip, adjust_address(rec, addr)); + new = ftrace_call_replace(ip, adjust_address(rec, addr), true); return ftrace_modify_code(rec->ip, old, new, true); } @@ -152,12 +165,29 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { + unsigned long aaddr = adjust_address(rec, addr); unsigned long ip = rec->ip; unsigned long old; unsigned long new; int ret; - old = ftrace_call_replace(ip, adjust_address(rec, addr)); +#ifdef CONFIG_ARM_MODULE_PLTS + /* mod is only supplied during module loading */ + if (!mod) + mod = rec->arch.mod; + else + rec->arch.mod = mod; +#endif + + old = ftrace_call_replace(ip, aaddr, + !IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || !mod); +#ifdef CONFIG_ARM_MODULE_PLTS + if (!old && mod) { + aaddr = get_module_plt(mod, ip, aaddr); + old = ftrace_call_replace(ip, aaddr, true); + } +#endif + new = ftrace_nop_replace(rec); ret = ftrace_modify_code(ip, old, new, true); diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index d330e9ec2de3..a0524ad84e4d 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -20,19 +21,52 @@ (PLT_ENT_STRIDE - 8)) #endif +static const u32 fixed_plts[] = { +#ifdef CONFIG_FUNCTION_TRACER + FTRACE_ADDR, + MCOUNT_ADDR, +#endif +}; + static bool in_init(const struct module *mod, unsigned long loc) { return loc - (u32)mod->init_layout.base < mod->init_layout.size; } +static void prealloc_fixed(struct mod_plt_sec *pltsec, struct plt_entries *plt) +{ + int i; + + if (!ARRAY_SIZE(fixed_plts) || pltsec->plt_count) + return; + pltsec->plt_count = ARRAY_SIZE(fixed_plts); + + for (i = 0; i < ARRAY_SIZE(plt->ldr); ++i) + plt->ldr[i] = PLT_ENT_LDR; + + BUILD_BUG_ON(sizeof(fixed_plts) > sizeof(plt->lit)); + memcpy(plt->lit, fixed_plts, sizeof(fixed_plts)); +} + u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) { struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : &mod->arch.init; + struct plt_entries *plt; + int idx; - struct plt_entries *plt = (struct plt_entries *)pltsec->plt->sh_addr; - int idx = 0; + /* cache the address, ELF header is available only during module load */ + if (!pltsec->plt_ent) + pltsec->plt_ent = (struct plt_entries *)pltsec->plt->sh_addr; + plt = pltsec->plt_ent; + prealloc_fixed(pltsec, plt); + + for (idx = 0; idx < ARRAY_SIZE(fixed_plts); ++idx) + if (plt->lit[idx] == val) + return (u32)&plt->ldr[idx]; + + idx = 0; /* * Look for an existing entry pointing to 'val'. Given that the * relocations are sorted, this will be the last entry we allocated. @@ -180,8 +214,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned long core_plts = 0; - unsigned long init_plts = 0; + unsigned long core_plts = ARRAY_SIZE(fixed_plts); + unsigned long init_plts = ARRAY_SIZE(fixed_plts); Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; Elf32_Sym *syms = NULL; @@ -236,6 +270,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, sizeof(struct plt_entries)); mod->arch.core.plt_count = 0; + mod->arch.core.plt_ent = NULL; mod->arch.init.plt->sh_type = SHT_NOBITS; mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; @@ -243,6 +278,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.init.plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, sizeof(struct plt_entries)); mod->arch.init.plt_count = 0; + mod->arch.init.plt_ent = NULL; pr_debug("%s: plt=%x, init.plt=%x\n", __func__, mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size); From 5ce134e65f3b8034220c416ab8cecd8652fdeb2d Mon Sep 17 00:00:00 2001 From: Alex Sverdlin Date: Wed, 22 Sep 2021 09:59:58 -0700 Subject: [PATCH 09/65] ARM: 9098/1: ftrace: MODULE_PLT: Fix build problem without DYNAMIC_FTRACE commit 6fa630bf473827aee48cbf0efbbdf6f03134e890 upstream FTRACE_ADDR is only defined when CONFIG_DYNAMIC_FTRACE is defined, the latter is even stronger requirement than CONFIG_FUNCTION_TRACER (which is enough for MCOUNT_ADDR). Link: https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org/thread/ZUVCQBHDMFVR7CCB7JPESLJEWERZDJ3T/ Fixes: 1f12fb25c5c5d22f ("ARM: 9079/1: ftrace: Add MODULE_PLTS support") Reported-by: kernel test robot Signed-off-by: Alexander Sverdlin Signed-off-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/module-plts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index a0524ad84e4d..1fc309b41f94 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -22,7 +22,7 @@ #endif static const u32 fixed_plts[] = { -#ifdef CONFIG_FUNCTION_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE FTRACE_ADDR, MCOUNT_ADDR, #endif From 473cea4983b582fedb10f84b43e8924716ebc4fc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 23 Sep 2021 13:01:44 +0200 Subject: [PATCH 10/65] Revert "net/mlx5: Register to devlink ingress VLAN filter trap" This reverts commit fe6322774ca28669868a7e231e173e09f7422118 which was commit 82e6c96f04e13c72d91777455836ffd012853caa upstream. It has been reported to cause regressions so should be dropped. Reported-by: Link: https://lore.kernel.org/r/BY5PR13MB3604D3031E984CA34A57B7C9EEA09@BY5PR13MB3604.namprd13.prod.outlook.com Cc: Aya Levin Cc: Tariq Toukan Cc: Tariq Toukan Cc: Saeed Mahameed Cc: Jakub Kicinski Cc: Sasha Levin Cc: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 51 ------------------- 1 file changed, 51 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 4cba110f6ef8..0e699330ae77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -376,48 +376,6 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) #endif } -#define MLX5_TRAP_DROP(_id, _group_id) \ - DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ - DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ - DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) - -static const struct devlink_trap mlx5_traps_arr[] = { - MLX5_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), -}; - -static const struct devlink_trap_group mlx5_trap_groups_arr[] = { - DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0), -}; - -static int mlx5_devlink_traps_register(struct devlink *devlink) -{ - struct mlx5_core_dev *core_dev = devlink_priv(devlink); - int err; - - err = devlink_trap_groups_register(devlink, mlx5_trap_groups_arr, - ARRAY_SIZE(mlx5_trap_groups_arr)); - if (err) - return err; - - err = devlink_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr), - &core_dev->priv); - if (err) - goto err_trap_group; - return 0; - -err_trap_group: - devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr, - ARRAY_SIZE(mlx5_trap_groups_arr)); - return err; -} - -static void mlx5_devlink_traps_unregister(struct devlink *devlink) -{ - devlink_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr)); - devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr, - ARRAY_SIZE(mlx5_trap_groups_arr)); -} - int mlx5_devlink_register(struct devlink *devlink, struct device *dev) { int err; @@ -432,16 +390,8 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev) goto params_reg_err; mlx5_devlink_set_params_init_values(devlink); devlink_params_publish(devlink); - - err = mlx5_devlink_traps_register(devlink); - if (err) - goto traps_reg_err; - return 0; -traps_reg_err: - devlink_params_unregister(devlink, mlx5_devlink_params, - ARRAY_SIZE(mlx5_devlink_params)); params_reg_err: devlink_unregister(devlink); return err; @@ -449,7 +399,6 @@ params_reg_err: void mlx5_devlink_unregister(struct devlink *devlink) { - mlx5_devlink_traps_unregister(devlink); devlink_params_unpublish(devlink); devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); From ffca46766850d4b96a26ad511a7997f74da2df8c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 28 Jun 2021 16:13:43 -0300 Subject: [PATCH 11/65] sctp: validate chunk size in __rcv_asconf_lookup commit b6ffe7671b24689c09faa5675dd58f93758a97ae upstream. In one of the fallbacks that SCTP has for identifying an association for an incoming packet, it looks for AddIp chunk (from ASCONF) and take a peek. Thing is, at this stage nothing was validating that the chunk actually had enough content for that, allowing the peek to happen over uninitialized memory. Similar check already exists in actual asconf handling in sctp_verify_asconf(). Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/input.c b/net/sctp/input.c index ddb5b5c2550e..49c49a4d203f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1168,6 +1168,9 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( union sctp_addr_param *param; union sctp_addr paddr; + if (ntohs(ch->length) < sizeof(*asconf) + sizeof(struct sctp_paramhdr)) + return NULL; + /* Skip over the ADDIP header and find the Address parameter */ param = (union sctp_addr_param *)(asconf + 1); From ccb79116c37242c07c34c991868acded87509e4c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 28 Jun 2021 16:13:44 -0300 Subject: [PATCH 12/65] sctp: add param size validation for SCTP_PARAM_SET_PRIMARY commit ef6c8d6ccf0c1dccdda092ebe8782777cd7803c9 upstream. When SCTP handles an INIT chunk, it calls for example: sctp_sf_do_5_1B_init sctp_verify_init sctp_verify_param sctp_process_init sctp_process_param handling of SCTP_PARAM_SET_PRIMARY sctp_verify_init() wasn't doing proper size validation and neither the later handling, allowing it to work over the chunk itself, possibly being uninitialized memory. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_make_chunk.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 7411fa442821..fa0d96320baa 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2150,9 +2150,16 @@ static enum sctp_ierror sctp_verify_param(struct net *net, break; case SCTP_PARAM_SET_PRIMARY: - if (ep->asconf_enable) - break; - goto unhandled; + if (!ep->asconf_enable) + goto unhandled; + + if (ntohs(param.p->length) < sizeof(struct sctp_addip_param) + + sizeof(struct sctp_paramhdr)) { + sctp_process_inv_paramlength(asoc, param.p, + chunk, err_chunk); + retval = SCTP_IERROR_ABORT; + } + break; case SCTP_PARAM_HOST_NAME_ADDRESS: /* Tell the peer, we won't support this param. */ From 6b24588708fe7cb680d433a0492abb651bb1d5b0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 14 Aug 2021 16:56:26 -0700 Subject: [PATCH 13/65] staging: rtl8192u: Fix bitwise vs logical operator in TranslateRxSignalStuff819xUsb() commit 099ec97ac92911abfb102bb5c68ed270fc12e0dd upstream. clang warns: drivers/staging/rtl8192u/r8192U_core.c:4268:20: warning: bitwise and of boolean expressions; did you mean logical and? [-Wbool-operation-and] bpacket_toself = bpacket_match_bssid & ^~~~~~~~~~~~~~~~~~~~~ && 1 warning generated. Replace the bitwise AND with a logical one to clear up the warning, as that is clearly what was intended. Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging") Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20210814235625.1780033-1-nathan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192u/r8192U_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index 03d31e52b399..4523e825a61a 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -4271,7 +4271,7 @@ static void TranslateRxSignalStuff819xUsb(struct sk_buff *skb, bpacket_match_bssid = (type != IEEE80211_FTYPE_CTL) && (ether_addr_equal(priv->ieee80211->current_network.bssid, (fc & IEEE80211_FCTL_TODS) ? hdr->addr1 : (fc & IEEE80211_FCTL_FROMDS) ? hdr->addr2 : hdr->addr3)) && (!pstats->bHwError) && (!pstats->bCRC) && (!pstats->bICV); - bpacket_toself = bpacket_match_bssid & + bpacket_toself = bpacket_match_bssid && (ether_addr_equal(praddr, priv->ieee80211->dev->dev_addr)); if (WLAN_FC_GET_FRAMETYPE(fc) == IEEE80211_STYPE_BEACON) From 5d0e6a5e44416cdfd8b391fa46c69f7d08f9dd0d Mon Sep 17 00:00:00 2001 From: QiuXi Date: Tue, 7 Sep 2021 20:00:32 -0700 Subject: [PATCH 14/65] coredump: fix memleak in dump_vma_snapshot() commit 6fcac87e1f9e5b27805a2a404f4849194bb51de8 upstream. dump_vma_snapshot() allocs memory for *vma_meta, when dump_vma_snapshot() returns -EFAULT, the memory will be leaked, so we free it correctly. Link: https://lkml.kernel.org/r/20210810020441.62806-1-qiuxi1@huawei.com Fixes: a07279c9a8cd7 ("binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot") Signed-off-by: QiuXi Cc: Al Viro Cc: Jann Horn Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/coredump.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/coredump.c b/fs/coredump.c index c6acfc694f65..c56a3bdce7cd 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1111,8 +1111,10 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, mmap_write_unlock(mm); - if (WARN_ON(i != *vma_count)) + if (WARN_ON(i != *vma_count)) { + kvfree(*vma_meta); return -EFAULT; + } *vma_data_size_ptr = vma_data_size; return 0; From 9d49973b08488f59dd0ada269d9dca210d1fd365 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jun 2021 10:34:37 +0200 Subject: [PATCH 15/65] um: virtio_uml: fix memory leak on init failures commit 7ad28e0df7ee9dbcb793bb88dd81d4d22bb9a10e upstream. If initialization fails, e.g. because the connection failed, we leak the 'vu_dev'. Fix that. Reported by smatch. Fixes: 5d38f324993f ("um: drivers: Add virtio vhost-user driver") Signed-off-by: Johannes Berg Acked-By: Anton Ivanov Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/um/drivers/virtio_uml.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index c17b8e5ec186..d11b3d41c378 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1113,7 +1113,7 @@ static int virtio_uml_probe(struct platform_device *pdev) rc = os_connect_socket(pdata->socket_path); } while (rc == -EINTR); if (rc < 0) - return rc; + goto error_free; vu_dev->sock = rc; spin_lock_init(&vu_dev->sock_lock); @@ -1132,6 +1132,8 @@ static int virtio_uml_probe(struct platform_device *pdev) error_init: os_close_file(vu_dev->sock); +error_free: + kfree(vu_dev); return rc; } From fa64b08931ee27bec3513aed8f7936e99b8d1212 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Jul 2021 23:27:15 +0300 Subject: [PATCH 16/65] dmaengine: acpi: Avoid comparison GSI with Linux vIRQ commit 67db87dc8284070adb15b3c02c1c31d5cf51c5d6 upstream. Currently the CRST parsing relies on the fact that on most of x86 devices the IRQ mapping is 1:1 with Linux vIRQ. However, it may be not true for some. Fix this by converting GSI to Linux vIRQ before checking it. Fixes: ee8209fd026b ("dma: acpi-dma: parse CSRT to extract additional resources") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210730202715.24375-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/acpi-dma.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c index 235f1396f968..52768dc8ce12 100644 --- a/drivers/dma/acpi-dma.c +++ b/drivers/dma/acpi-dma.c @@ -70,10 +70,14 @@ static int acpi_dma_parse_resource_group(const struct acpi_csrt_group *grp, si = (const struct acpi_csrt_shared_info *)&grp[1]; - /* Match device by MMIO and IRQ */ + /* Match device by MMIO */ if (si->mmio_base_low != lower_32_bits(mem) || - si->mmio_base_high != upper_32_bits(mem) || - si->gsi_interrupt != irq) + si->mmio_base_high != upper_32_bits(mem)) + return 0; + + /* Match device by Linux vIRQ */ + ret = acpi_register_gsi(NULL, si->gsi_interrupt, si->interrupt_mode, si->interrupt_polarity); + if (ret != irq) return 0; dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n", From ebcd3fd9207c98f0c01c317ef3aca7f2df41b72d Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Thu, 5 Aug 2021 18:06:11 +0200 Subject: [PATCH 17/65] perf test: Fix bpf test sample mismatch reporting commit 3e11300cdfd5f1bc13a05dfc6dccf69aca5dd1dc upstream. When the expected sample count in the condition changed, the message needs to be changed too, otherwise we'll get: 0x1001f2091d8: mmap mask[0]: BPF filter result incorrect, expected 56, got 56 samples Fixes: 4b04e0decd2518e5 ("perf test: Fix basic bpf filtering test") Signed-off-by: Michael Petlan Cc: Jiri Olsa Cc: Sumanth Korikkar Link: https //lore.kernel.org/r/20210805160611.5542-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/tests/bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 8345ff4acedf..e5832b74a845 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -199,7 +199,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), } if (count != expect * evlist->core.nr_entries) { - pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); + pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect * evlist->core.nr_entries, count); goto out_delete_evlist; } From 87c4144450e6f09afeb2adc336f35b5f5274205c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 26 Nov 2020 18:00:06 +0100 Subject: [PATCH 18/65] tools lib: Adopt memchr_inv() from kernel commit b3e453272d436aab8adbe810c6d7043670281487 upstream. We'll use it to check for undefined/zero data. Signed-off-by: Jiri Olsa Suggested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20201126170026.2619053-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/include/linux/string.h | 1 + tools/lib/string.c | 58 ++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 5e9e781905ed..db5c99318c79 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -46,4 +46,5 @@ extern char * __must_check skip_spaces(const char *); extern char *strim(char *); +extern void *memchr_inv(const void *start, int c, size_t bytes); #endif /* _TOOLS_LINUX_STRING_H_ */ diff --git a/tools/lib/string.c b/tools/lib/string.c index f645343815de..8b6892f959ab 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -168,3 +168,61 @@ char *strreplace(char *s, char old, char new) *s = new; return s; } + +static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes) +{ + while (bytes) { + if (*start != value) + return (void *)start; + start++; + bytes--; + } + return NULL; +} + +/** + * memchr_inv - Find an unmatching character in an area of memory. + * @start: The memory area + * @c: Find a character other than c + * @bytes: The size of the area. + * + * returns the address of the first character other than @c, or %NULL + * if the whole buffer contains just @c. + */ +void *memchr_inv(const void *start, int c, size_t bytes) +{ + u8 value = c; + u64 value64; + unsigned int words, prefix; + + if (bytes <= 16) + return check_bytes8(start, value, bytes); + + value64 = value; + value64 |= value64 << 8; + value64 |= value64 << 16; + value64 |= value64 << 32; + + prefix = (unsigned long)start % 8; + if (prefix) { + u8 *r; + + prefix = 8 - prefix; + r = check_bytes8(start, value, prefix); + if (r) + return r; + start += prefix; + bytes -= prefix; + } + + words = bytes / 8; + + while (words) { + if (*(u64 *)start != value64) + return check_bytes8(start, value, 8); + start += 8; + words--; + } + + return check_bytes8(start, value, bytes % 8); +} From 38ab04186fb3f3b50f33e5cfde38066c5227e89d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 10 Sep 2021 15:46:30 -0700 Subject: [PATCH 19/65] perf tools: Allow build-id with trailing zeros commit 4a86d41404005a3c7e7b6065e8169ac6202887a9 upstream. Currently perf saves a build-id with size but old versions assumes the size of 20. In case the build-id is less than 20 (like for MD5), it'd fill the rest with 0s. I saw a problem when old version of perf record saved a binary in the build-id cache and new version of perf reads the data. The symbols should be read from the build-id cache (as the path no longer has the same binary) but it failed due to mismatch in the build-id. symsrc__init: build id mismatch for /home/namhyung/.debug/.build-id/53/e4c2f42a4c61a2d632d92a72afa08f00000000/elf. The build-id event in the data has 20 byte build-ids, but it saw a different size (16) when it reads the build-id of the elf file in the build-id cache. $ readelf -n ~/.debug/.build-id/53/e4c2f42a4c61a2d632d92a72afa08f00000000/elf Displaying notes found in: .note.gnu.build-id Owner Data size Description GNU 0x00000010 NT_GNU_BUILD_ID (unique build ID bitstring) Build ID: 53e4c2f42a4c61a2d632d92a72afa08f Let's fix this by allowing trailing zeros if the size is different. Fixes: 39be8d0115b321ed ("perf tools: Pass build_id object to dso__build_id_equal()") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Ian Rogers Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210910224630.1084877-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/dso.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index b1ff0c9f32da..5e9902fa1dc8 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1336,6 +1336,16 @@ void dso__set_build_id(struct dso *dso, struct build_id *bid) bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) { + if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) { + /* + * For the backward compatibility, it allows a build-id has + * trailing zeros. + */ + return !memcmp(dso->bid.data, bid->data, bid->size) && + !memchr_inv(&dso->bid.data[bid->size], 0, + dso->bid.size - bid->size); + } + return dso->bid.size == bid->size && memcmp(dso->bid.data, bid->data, dso->bid.size) == 0; } From 375e779ec32a87edeb6bb26582c1c9a1f59876f9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 10 Aug 2021 11:44:13 +0300 Subject: [PATCH 20/65] thermal/drivers/exynos: Fix an error code in exynos_tmu_probe() commit 02d438f62c05f0d055ceeedf12a2f8796b258c08 upstream. This error path return success but it should propagate the negative error code from devm_clk_get(). Fixes: 6c247393cfdd ("thermal: exynos: Add TMU support for Exynos7 SoC") Signed-off-by: Dan Carpenter Reviewed-by: Krzysztof Kozlowski Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210810084413.GA23810@kili Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/samsung/exynos_tmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index e9a90bc23b11..f4ab4c5b4b62 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -1073,6 +1073,7 @@ static int exynos_tmu_probe(struct platform_device *pdev) data->sclk = devm_clk_get(&pdev->dev, "tmu_sclk"); if (IS_ERR(data->sclk)) { dev_err(&pdev->dev, "Failed to get sclk\n"); + ret = PTR_ERR(data->sclk); goto err_clk; } else { ret = clk_prepare_enable(data->sclk); From e464b3876b022c0c4e0eb21518c491c6eb38130c Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Mon, 17 May 2021 16:35:57 +0800 Subject: [PATCH 21/65] 9p/trans_virtio: Remove sysfs file on probe failure commit f997ea3b7afc108eb9761f321b57de2d089c7c48 upstream. This ensures we don't leak the sysfs file if we failed to allocate chan->vc_wq during probe. Link: http://lkml.kernel.org/r/20210517083557.172-1-xieyongji@bytedance.com Fixes: 86c8437383ac ("net/9p: Add sysfs mount_tag file for virtio 9P device") Signed-off-by: Xie Yongji Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_virtio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index a3cd90a74012..f582351d84ec 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -605,7 +605,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); if (!chan->vc_wq) { err = -ENOMEM; - goto out_free_tag; + goto out_remove_file; } init_waitqueue_head(chan->vc_wq); chan->ring_bufs_avail = 1; @@ -623,6 +623,8 @@ static int p9_virtio_probe(struct virtio_device *vdev) return 0; +out_remove_file: + sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr); out_free_tag: kfree(tag); out_free_vq: From 30417cbeccffeaaf5d7fa5a9e9feffe246a1430f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 7 Sep 2021 20:00:41 -0700 Subject: [PATCH 22/65] prctl: allow to setup brk for et_dyn executables commit e1fbbd073137a9d63279f6bf363151a938347640 upstream. Keno Fischer reported that when a binray loaded via ld-linux-x the prctl(PR_SET_MM_MAP) doesn't allow to setup brk value because it lays before mm:end_data. For example a test program shows | # ~/t | | start_code 401000 | end_code 401a15 | start_stack 7ffce4577dd0 | start_data 403e10 | end_data 40408c | start_brk b5b000 | sbrk(0) b5b000 and when executed via ld-linux | # /lib64/ld-linux-x86-64.so.2 ~/t | | start_code 7fc25b0a4000 | end_code 7fc25b0c4524 | start_stack 7fffcc6b2400 | start_data 7fc25b0ce4c0 | end_data 7fc25b0cff98 | start_brk 55555710c000 | sbrk(0) 55555710c000 This of course prevent criu from restoring such programs. Looking into how kernel operates with brk/start_brk inside brk() syscall I don't see any problem if we allow to setup brk/start_brk without checking for end_data. Even if someone pass some weird address here on a purpose then the worst possible result will be an unexpected unmapping of existing vma (own vma, since prctl works with the callers memory) but test for RLIMIT_DATA is still valid and a user won't be able to gain more memory in case of expanding VMAs via new values shipped with prctl call. Link: https://lkml.kernel.org/r/20210121221207.GB2174@grain Fixes: bbdc6076d2e5 ("binfmt_elf: move brk out of mmap when doing direct loader exec") Signed-off-by: Cyrill Gorcunov Reported-by: Keno Fischer Acked-by: Andrey Vagin Tested-by: Andrey Vagin Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Kirill Tkhai Cc: Eric W. Biederman Cc: Pavel Tikhomirov Cc: Alexander Mikhalitsyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sys.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index a730c03ee607..24a3a28ae228 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1941,13 +1941,6 @@ static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map) error = -EINVAL; - /* - * @brk should be after @end_data in traditional maps. - */ - if (prctl_map->start_brk <= prctl_map->end_data || - prctl_map->brk <= prctl_map->end_data) - goto out; - /* * Neither we should allow to override limits if they set. */ From 0796d99c1b16f03fd34024a26da2e81ec1c728c1 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 7 Sep 2021 20:00:26 -0700 Subject: [PATCH 23/65] nilfs2: use refcount_dec_and_lock() to fix potential UAF commit 98e2e409e76ef7781d8511f997359e9c504a95c1 upstream. When the refcount is decreased to 0, the resource reclamation branch is entered. Before CPU0 reaches the race point (1), CPU1 may obtain the spinlock and traverse the rbtree to find 'root', see nilfs_lookup_root(). Although CPU1 will call refcount_inc() to increase the refcount, it is obviously too late. CPU0 will release 'root' directly, CPU1 then accesses 'root' and triggers UAF. Use refcount_dec_and_lock() to ensure that both the operations of decrease refcount to 0 and link deletion are lock protected eliminates this risk. CPU0 CPU1 nilfs_put_root(): <-------- (1) spin_lock(&nilfs->ns_cptree_lock); rb_erase(&root->rb_node, &nilfs->ns_cptree); spin_unlock(&nilfs->ns_cptree_lock); kfree(root); <-------- use-after-free refcount_t: underflow; use-after-free. WARNING: CPU: 2 PID: 9476 at lib/refcount.c:28 \ refcount_warn_saturate+0x1cf/0x210 lib/refcount.c:28 Modules linked in: CPU: 2 PID: 9476 Comm: syz-executor.0 Not tainted 5.10.45-rc1+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... RIP: 0010:refcount_warn_saturate+0x1cf/0x210 lib/refcount.c:28 ... ... Call Trace: __refcount_sub_and_test include/linux/refcount.h:283 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] nilfs_put_root+0xc1/0xd0 fs/nilfs2/the_nilfs.c:795 nilfs_segctor_destroy fs/nilfs2/segment.c:2749 [inline] nilfs_detach_log_writer+0x3fa/0x570 fs/nilfs2/segment.c:2812 nilfs_put_super+0x2f/0xf0 fs/nilfs2/super.c:467 generic_shutdown_super+0xcd/0x1f0 fs/super.c:464 kill_block_super+0x4a/0x90 fs/super.c:1446 deactivate_locked_super+0x6a/0xb0 fs/super.c:335 deactivate_super+0x85/0x90 fs/super.c:366 cleanup_mnt+0x277/0x2e0 fs/namespace.c:1118 __cleanup_mnt+0x15/0x20 fs/namespace.c:1125 task_work_run+0x8e/0x110 kernel/task_work.c:151 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:164 [inline] exit_to_user_mode_prepare+0x13c/0x170 kernel/entry/common.c:191 syscall_exit_to_user_mode+0x16/0x30 kernel/entry/common.c:266 do_syscall_64+0x45/0x80 arch/x86/entry/common.c:56 entry_SYSCALL_64_after_hwframe+0x44/0xa9 There is no reproduction program, and the above is only theoretical analysis. Link: https://lkml.kernel.org/r/1629859428-5906-1-git-send-email-konishi.ryusuke@gmail.com Fixes: ba65ae4729bf ("nilfs2: add checkpoint tree to nilfs object") Link: https://lkml.kernel.org/r/20210723012317.4146-1-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/the_nilfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 221a1cc597f0..c20ebecd7bc2 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -792,14 +792,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) void nilfs_put_root(struct nilfs_root *root) { - if (refcount_dec_and_test(&root->count)) { - struct the_nilfs *nilfs = root->nilfs; + struct the_nilfs *nilfs = root->nilfs; - nilfs_sysfs_delete_snapshot_group(root); - - spin_lock(&nilfs->ns_cptree_lock); + if (refcount_dec_and_lock(&root->count, &nilfs->ns_cptree_lock)) { rb_erase(&root->rb_node, &nilfs->ns_cptree); spin_unlock(&nilfs->ns_cptree_lock); + + nilfs_sysfs_delete_snapshot_group(root); iput(root->ifile); kfree(root); From 74190973ab01fb88e8b685f417651e30112ed4a1 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 7 Sep 2021 19:58:21 -0700 Subject: [PATCH 24/65] profiling: fix shift-out-of-bounds bugs commit 2d186afd04d669fe9c48b994c41a7405a3c9f16d upstream. Syzbot reported shift-out-of-bounds bug in profile_init(). The problem was in incorrect prof_shift. Since prof_shift value comes from userspace we need to clamp this value into [0, BITS_PER_LONG -1] boundaries. Second possible shiht-out-of-bounds was found by Tetsuo: sample_step local variable in read_profile() had "unsigned int" type, but prof_shift allows to make a BITS_PER_LONG shift. So, to prevent possible shiht-out-of-bounds sample_step type was changed to "unsigned long". Also, "unsigned short int" will be sufficient for storing [0, BITS_PER_LONG] value, that's why there is no need for "unsigned long" prof_shift. Link: https://lkml.kernel.org/r/20210813140022.5011-1-paskripkin@gmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+e68c89a9510c159d9684@syzkaller.appspotmail.com Suggested-by: Tetsuo Handa Signed-off-by: Pavel Skripkin Cc: Thomas Gleixner Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/profile.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/kernel/profile.c b/kernel/profile.c index 6f69a4195d56..b47fe52f0ade 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -41,7 +41,8 @@ struct profile_hit { #define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) static atomic_t *prof_buffer; -static unsigned long prof_len, prof_shift; +static unsigned long prof_len; +static unsigned short int prof_shift; int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); @@ -67,8 +68,8 @@ int profile_setup(char *str) if (str[strlen(sleepstr)] == ',') str += strlen(sleepstr) + 1; if (get_option(&str, &par)) - prof_shift = par; - pr_info("kernel sleep profiling enabled (shift: %ld)\n", + prof_shift = clamp(par, 0, BITS_PER_LONG - 1); + pr_info("kernel sleep profiling enabled (shift: %u)\n", prof_shift); #else pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); @@ -78,21 +79,21 @@ int profile_setup(char *str) if (str[strlen(schedstr)] == ',') str += strlen(schedstr) + 1; if (get_option(&str, &par)) - prof_shift = par; - pr_info("kernel schedule profiling enabled (shift: %ld)\n", + prof_shift = clamp(par, 0, BITS_PER_LONG - 1); + pr_info("kernel schedule profiling enabled (shift: %u)\n", prof_shift); } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { prof_on = KVM_PROFILING; if (str[strlen(kvmstr)] == ',') str += strlen(kvmstr) + 1; if (get_option(&str, &par)) - prof_shift = par; - pr_info("kernel KVM profiling enabled (shift: %ld)\n", + prof_shift = clamp(par, 0, BITS_PER_LONG - 1); + pr_info("kernel KVM profiling enabled (shift: %u)\n", prof_shift); } else if (get_option(&str, &par)) { - prof_shift = par; + prof_shift = clamp(par, 0, BITS_PER_LONG - 1); prof_on = CPU_PROFILING; - pr_info("kernel profiling enabled (shift: %ld)\n", + pr_info("kernel profiling enabled (shift: %u)\n", prof_shift); } return 1; @@ -468,7 +469,7 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) unsigned long p = *ppos; ssize_t read; char *pnt; - unsigned int sample_step = 1 << prof_shift; + unsigned long sample_step = 1UL << prof_shift; profile_flip_buffers(); if (p >= (prof_len+1)*sizeof(unsigned int)) From c63df77c40ca40979ed8bb02b4a07500eef6cd28 Mon Sep 17 00:00:00 2001 From: Prasad Sodagudi Date: Tue, 7 Sep 2021 04:24:23 -0700 Subject: [PATCH 25/65] PM: sleep: core: Avoid setting power.must_resume to false commit 4a9344cd0aa4499beb3772bbecb40bb78888c0e1 upstream. There are variables(power.may_skip_resume and dev->power.must_resume) and DPM_FLAG_MAY_SKIP_RESUME flags to control the resume of devices after a system wide suspend transition. Setting the DPM_FLAG_MAY_SKIP_RESUME flag means that the driver allows its "noirq" and "early" resume callbacks to be skipped if the device can be left in suspend after a system-wide transition into the working state. PM core determines that the driver's "noirq" and "early" resume callbacks should be skipped or not with dev_pm_skip_resume() function by checking power.may_skip_resume variable. power.must_resume variable is getting set to false in __device_suspend() function without checking device's DPM_FLAG_MAY_SKIP_RESUME settings. In problematic scenario, where all the devices in the suspend_late stage are successful and some device can fail to suspend in suspend_noirq phase. So some devices successfully suspended in suspend_late stage are not getting chance to execute __device_suspend_noirq() to set dev->power.must_resume variable to true and not getting resumed in early_resume phase. Add a check for device's DPM_FLAG_MAY_SKIP_RESUME flag before setting power.must_resume variable in __device_suspend function. Fixes: 6e176bf8d461 ("PM: sleep: core: Do not skip callbacks in the resume phase") Signed-off-by: Prasad Sodagudi Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c7ac49042cee..192b1c7286b3 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1644,7 +1644,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) } dev->power.may_skip_resume = true; - dev->power.must_resume = false; + dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME); dpm_watchdog_set(&wd, dev); device_lock(dev); From 322b70b522abe03cd59712bb47a72eddd835d19d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 7 Jul 2021 18:27:49 +0200 Subject: [PATCH 26/65] pwm: lpc32xx: Don't modify HW state in .probe() after the PWM chip was registered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3d2813fb17e5fd0d73c1d1442ca0192bde4af10e upstream. This fixes a race condition: After pwmchip_add() is called there might already be a consumer and then modifying the hardware behind the consumer's back is bad. So set the default before. (Side-note: I don't know what this register setting actually does, if this modifies the polarity there is an inconsistency because the inversed polarity isn't considered if the PWM is already running during .probe().) Fixes: acfd92fdfb93 ("pwm: lpc32xx: Set PWM_PIN_LEVEL bit to default value") Cc: Sylvain Lemieux Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-lpc32xx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c index 710d9a207d2b..522f862eca52 100644 --- a/drivers/pwm/pwm-lpc32xx.c +++ b/drivers/pwm/pwm-lpc32xx.c @@ -120,17 +120,17 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev) lpc32xx->chip.npwm = 1; lpc32xx->chip.base = -1; + /* If PWM is disabled, configure the output to the default value */ + val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); + val &= ~PWM_PIN_LEVEL; + writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); + ret = pwmchip_add(&lpc32xx->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to add PWM chip, error %d\n", ret); return ret; } - /* When PWM is disable, configure the output to the default value */ - val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); - val &= ~PWM_PIN_LEVEL; - writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); - platform_set_drvdata(pdev, lpc32xx); return 0; From 777344da345a374eb74f2a743b5399e6e60d566b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 7 Jul 2021 18:27:50 +0200 Subject: [PATCH 27/65] pwm: mxs: Don't modify HW state in .probe() after the PWM chip was registered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 020162d6f49f2963062229814a56a89c86cbeaa8 upstream. This fixes a race condition: After pwmchip_add() is called there might already be a consumer and then modifying the hardware behind the consumer's back is bad. So reset before calling pwmchip_add(). Note that reseting the hardware isn't the right thing to do if the PWM is already running as it might e.g. disable (or even enable) a backlight that is supposed to be on (or off). Fixes: 4dce82c1e840 ("pwm: add pwm-mxs support") Cc: Sascha Hauer Cc: Shawn Guo Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-mxs.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index 7ce616923c52..41bdbe71ae46 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -148,6 +148,11 @@ static int mxs_pwm_probe(struct platform_device *pdev) return ret; } + /* FIXME: Only do this if the PWM isn't already running */ + ret = stmp_reset_block(mxs->base); + if (ret) + return dev_err_probe(&pdev->dev, ret, "failed to reset PWM\n"); + ret = pwmchip_add(&mxs->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to add pwm chip %d\n", ret); @@ -156,15 +161,7 @@ static int mxs_pwm_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mxs); - ret = stmp_reset_block(mxs->base); - if (ret) - goto pwm_remove; - return 0; - -pwm_remove: - pwmchip_remove(&mxs->chip); - return ret; } static int mxs_pwm_remove(struct platform_device *pdev) From 48271d10bf4cbdd4693ad4f94fb14c2aa7fcc4c5 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 22 Jul 2021 10:54:10 -0700 Subject: [PATCH 28/65] dmaengine: idxd: fix wq slot allocation index check [ Upstream commit 673d812d30be67942762bb9e8548abb26a3ba4a7 ] The sbitmap wait and allocate routine checks the index that is returned from sbitmap_queue_get(). It should be idxd >= 0 as 0 is also a valid index. This fixes issue where submission path hangs when WQ size is 1. Fixes: 0705107fcc80 ("dmaengine: idxd: move submission to sbitmap_queue") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162697645067.3478714.506720687816951762.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/idxd/submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 417048e3c42a..0368c5490788 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -45,7 +45,7 @@ struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) if (signal_pending_state(TASK_INTERRUPTIBLE, current)) break; idx = sbitmap_queue_get(sbq, &cpu); - if (idx > 0) + if (idx >= 0) break; schedule(); } From 113a69460de5e82ae3be12b01ec1daee81fd9929 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Thu, 13 May 2021 17:57:33 -0700 Subject: [PATCH 29/65] platform/chrome: sensorhub: Add trace events for sample [ Upstream commit d453ceb6549af8798913de6a20444cb7200fdb69 ] Add trace event to report samples and their timestamp coming from the EC. It allows to check if the timestamps are correct and the filter is working correctly without introducing too much latency. To enable these events: cd /sys/kernel/debug/tracing/ echo 1 > events/cros_ec/enable echo 0 > events/cros_ec/cros_ec_request_start/enable echo 0 > events/cros_ec/cros_ec_request_done/enable echo 1 > tracing_on cat trace_pipe Observe event flowing: irq/105-chromeo-95 [000] .... 613.659758: cros_ec_sensorhub_timestamp: ... irq/105-chromeo-95 [000] .... 613.665219: cros_ec_sensorhub_filter: dx: ... Signed-off-by: Gwendal Grignou Signed-off-by: Enric Balletbo i Serra Signed-off-by: Sasha Levin --- drivers/platform/chrome/Makefile | 2 +- .../platform/chrome/cros_ec_sensorhub_ring.c | 14 +++ drivers/platform/chrome/cros_ec_trace.h | 94 +++++++++++++++++++ 3 files changed, 109 insertions(+), 1 deletion(-) diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index 41baccba033f..f901d2e43166 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -20,7 +20,7 @@ obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_chardev.o obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o obj-$(CONFIG_CROS_EC_DEBUGFS) += cros_ec_debugfs.o -cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o +cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o cros_ec_trace.o obj-$(CONFIG_CROS_EC_SENSORHUB) += cros-ec-sensorhub.o obj-$(CONFIG_CROS_EC_SYSFS) += cros_ec_sysfs.o obj-$(CONFIG_CROS_USBPD_LOGGER) += cros_usbpd_logger.o diff --git a/drivers/platform/chrome/cros_ec_sensorhub_ring.c b/drivers/platform/chrome/cros_ec_sensorhub_ring.c index 8921f24e83ba..98e37080f760 100644 --- a/drivers/platform/chrome/cros_ec_sensorhub_ring.c +++ b/drivers/platform/chrome/cros_ec_sensorhub_ring.c @@ -17,6 +17,8 @@ #include #include +#include "cros_ec_trace.h" + /* Precision of fixed point for the m values from the filter */ #define M_PRECISION BIT(23) @@ -291,6 +293,7 @@ cros_ec_sensor_ring_ts_filter_update(struct cros_ec_sensors_ts_filter_state state->median_m = 0; state->median_error = 0; } + trace_cros_ec_sensorhub_filter(state, dx, dy); } /** @@ -427,6 +430,11 @@ cros_ec_sensor_ring_process_event(struct cros_ec_sensorhub *sensorhub, if (new_timestamp - *current_timestamp > 0) *current_timestamp = new_timestamp; } + trace_cros_ec_sensorhub_timestamp(in->timestamp, + fifo_info->timestamp, + fifo_timestamp, + *current_timestamp, + now); } if (in->flags & MOTIONSENSE_SENSOR_FLAG_ODR) { @@ -460,6 +468,12 @@ cros_ec_sensor_ring_process_event(struct cros_ec_sensorhub *sensorhub, /* Regular sample */ out->sensor_id = in->sensor_num; + trace_cros_ec_sensorhub_data(in->sensor_num, + fifo_info->timestamp, + fifo_timestamp, + *current_timestamp, + now); + if (*current_timestamp - now > 0) { /* * This fix is needed to overcome the timestamp filter putting diff --git a/drivers/platform/chrome/cros_ec_trace.h b/drivers/platform/chrome/cros_ec_trace.h index f744b21bc655..f50b9f9b8610 100644 --- a/drivers/platform/chrome/cros_ec_trace.h +++ b/drivers/platform/chrome/cros_ec_trace.h @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -70,6 +71,99 @@ TRACE_EVENT(cros_ec_request_done, __entry->retval) ); +TRACE_EVENT(cros_ec_sensorhub_timestamp, + TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp, + s64 current_timestamp, s64 current_time), + TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp, + current_time), + TP_STRUCT__entry( + __field(u32, ec_sample_timestamp) + __field(u32, ec_fifo_timestamp) + __field(s64, fifo_timestamp) + __field(s64, current_timestamp) + __field(s64, current_time) + __field(s64, delta) + ), + TP_fast_assign( + __entry->ec_sample_timestamp = ec_sample_timestamp; + __entry->ec_fifo_timestamp = ec_fifo_timestamp; + __entry->fifo_timestamp = fifo_timestamp; + __entry->current_timestamp = current_timestamp; + __entry->current_time = current_time; + __entry->delta = current_timestamp - current_time; + ), + TP_printk("ec_ts: %12lld, ec_fifo_ts: %12lld, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", + __entry->ec_sample_timestamp, + __entry->ec_fifo_timestamp, + __entry->fifo_timestamp, + __entry->current_timestamp, + __entry->current_time, + __entry->delta + ) +); + +TRACE_EVENT(cros_ec_sensorhub_data, + TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp, + s64 current_timestamp, s64 current_time), + TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time), + TP_STRUCT__entry( + __field(u32, ec_sensor_num) + __field(u32, ec_fifo_timestamp) + __field(s64, fifo_timestamp) + __field(s64, current_timestamp) + __field(s64, current_time) + __field(s64, delta) + ), + TP_fast_assign( + __entry->ec_sensor_num = ec_sensor_num; + __entry->ec_fifo_timestamp = ec_fifo_timestamp; + __entry->fifo_timestamp = fifo_timestamp; + __entry->current_timestamp = current_timestamp; + __entry->current_time = current_time; + __entry->delta = current_timestamp - current_time; + ), + TP_printk("ec_num: %4d, ec_fifo_ts: %12lld, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", + __entry->ec_sensor_num, + __entry->ec_fifo_timestamp, + __entry->fifo_timestamp, + __entry->current_timestamp, + __entry->current_time, + __entry->delta + ) +); + +TRACE_EVENT(cros_ec_sensorhub_filter, + TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy), + TP_ARGS(state, dx, dy), + TP_STRUCT__entry( + __field(s64, dx) + __field(s64, dy) + __field(s64, median_m) + __field(s64, median_error) + __field(s64, history_len) + __field(s64, x) + __field(s64, y) + ), + TP_fast_assign( + __entry->dx = dx; + __entry->dy = dy; + __entry->median_m = state->median_m; + __entry->median_error = state->median_error; + __entry->history_len = state->history_len; + __entry->x = state->x_offset; + __entry->y = state->y_offset; + ), + TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %d x: %12lld y: %12lld", + __entry->dx, + __entry->dy, + __entry->median_m, + __entry->median_error, + __entry->history_len, + __entry->x, + __entry->y + ) +); + #endif /* _CROS_EC_TRACE_H_ */ From 41aa215734868ba7cb195e2cc5f00cdba5bd1578 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 30 Aug 2021 11:00:50 -0700 Subject: [PATCH 30/65] platform/chrome: cros_ec_trace: Fix format warnings [ Upstream commit 4665584888ad2175831c972c004115741ec799e9 ] Fix printf format issues in new tracing events. Fixes: 814318242687 ("platform/chrome: cros_ec_trace: Add fields to command traces") Signed-off-by: Gwendal Grignou Link: https://lore.kernel.org/r/20210830180050.2077261-1-gwendal@chromium.org Signed-off-by: Benson Leung Signed-off-by: Sasha Levin --- drivers/platform/chrome/cros_ec_trace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_trace.h b/drivers/platform/chrome/cros_ec_trace.h index f50b9f9b8610..7e7cfc98657a 100644 --- a/drivers/platform/chrome/cros_ec_trace.h +++ b/drivers/platform/chrome/cros_ec_trace.h @@ -92,7 +92,7 @@ TRACE_EVENT(cros_ec_sensorhub_timestamp, __entry->current_time = current_time; __entry->delta = current_timestamp - current_time; ), - TP_printk("ec_ts: %12lld, ec_fifo_ts: %12lld, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", + TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", __entry->ec_sample_timestamp, __entry->ec_fifo_timestamp, __entry->fifo_timestamp, @@ -122,7 +122,7 @@ TRACE_EVENT(cros_ec_sensorhub_data, __entry->current_time = current_time; __entry->delta = current_timestamp - current_time; ), - TP_printk("ec_num: %4d, ec_fifo_ts: %12lld, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", + TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", __entry->ec_sensor_num, __entry->ec_fifo_timestamp, __entry->fifo_timestamp, @@ -153,7 +153,7 @@ TRACE_EVENT(cros_ec_sensorhub_filter, __entry->x = state->x_offset; __entry->y = state->y_offset; ), - TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %d x: %12lld y: %12lld", + TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld", __entry->dx, __entry->dy, __entry->median_m, From 8193ad306ea0005bbe1f2f8fc600ab270d27c8dd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 9 Jun 2021 14:09:52 -0400 Subject: [PATCH 31/65] ceph: allow ceph_put_mds_session to take NULL or ERR_PTR [ Upstream commit 7e65624d32b6e0429b1d3559e5585657f34f74a1 ] ...to simplify some error paths. Signed-off-by: Jeff Layton Reviewed-by: Luis Henriques Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/dir.c | 3 +-- fs/ceph/inode.c | 6 ++---- fs/ceph/mds_client.c | 6 ++++-- fs/ceph/metric.c | 3 +-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a4d48370b2b3..f63c1a090139 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1797,8 +1797,7 @@ static void ceph_d_release(struct dentry *dentry) dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); - if (di->lease_session) - ceph_put_mds_session(di->lease_session); + ceph_put_mds_session(di->lease_session); kmem_cache_free(ceph_dentry_cachep, di); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 57cd78e942c0..63e781e4f7e4 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1121,8 +1121,7 @@ static inline void update_dentry_lease(struct inode *dir, struct dentry *dentry, __update_dentry_lease(dir, dentry, lease, session, from_time, &old_lease_session); spin_unlock(&dentry->d_lock); - if (old_lease_session) - ceph_put_mds_session(old_lease_session); + ceph_put_mds_session(old_lease_session); } /* @@ -1167,8 +1166,7 @@ static void update_dentry_lease_careful(struct dentry *dentry, from_time, &old_lease_session); out_unlock: spin_unlock(&dentry->d_lock); - if (old_lease_session) - ceph_put_mds_session(old_lease_session); + ceph_put_mds_session(old_lease_session); } /* diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 816cea497537..8cbbb611e0ca 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -661,6 +661,9 @@ struct ceph_mds_session *ceph_get_mds_session(struct ceph_mds_session *s) void ceph_put_mds_session(struct ceph_mds_session *s) { + if (IS_ERR_OR_NULL(s)) + return; + dout("mdsc put_session %p %d -> %d\n", s, refcount_read(&s->s_ref), refcount_read(&s->s_ref)-1); if (refcount_dec_and_test(&s->s_ref)) { @@ -1435,8 +1438,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc, for (i = 0; i < mi->num_export_targets; i++) { ts = __open_export_target_session(mdsc, mi->export_targets[i]); - if (!IS_ERR(ts)) - ceph_put_mds_session(ts); + ceph_put_mds_session(ts); } } diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index fee4c4778313..3b2ef8ee544e 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -233,8 +233,7 @@ void ceph_metric_destroy(struct ceph_client_metric *m) cancel_delayed_work_sync(&m->delayed_work); - if (m->session) - ceph_put_mds_session(m->session); + ceph_put_mds_session(m->session); } static inline void __update_latency(ktime_t *totalp, ktime_t *lsump, From e418ce8b8dfd47c3555b6e53757382d1fb609b54 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 27 Jul 2021 15:47:12 -0400 Subject: [PATCH 32/65] ceph: cancel delayed work instead of flushing on mdsc teardown [ Upstream commit b4002173b7989588b6feaefc42edaf011b596782 ] The first thing metric_delayed_work does is check mdsc->stopping, and then return immediately if it's set. That's good since we would have already torn down the metric structures at this point, otherwise, but there is no locking around mdsc->stopping. It's possible that the ceph_metric_destroy call could race with the delayed_work, in which case we could end up with the delayed_work accessing destroyed percpu variables. At this point in the mdsc teardown, the "stopping" flag has already been set, so there's no benefit to flushing the work. Move the work cancellation in ceph_metric_destroy ahead of the percpu variable destruction, and eliminate the flush_delayed_work call in ceph_mdsc_destroy. Fixes: 18f473b384a6 ("ceph: periodically send perf metrics to MDSes") Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/mds_client.c | 1 - fs/ceph/metric.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8cbbb611e0ca..46606fb5b886 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4859,7 +4859,6 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) ceph_metric_destroy(&mdsc->metric); - flush_delayed_work(&mdsc->metric.delayed_work); fsc->mdsc = NULL; kfree(mdsc); dout("mdsc_destroy %p done\n", mdsc); diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 3b2ef8ee544e..9e0a0e26294e 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -224,6 +224,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m) if (!m) return; + cancel_delayed_work_sync(&m->delayed_work); + percpu_counter_destroy(&m->total_inodes); percpu_counter_destroy(&m->opened_inodes); percpu_counter_destroy(&m->i_caps_mis); @@ -231,8 +233,6 @@ void ceph_metric_destroy(struct ceph_client_metric *m) percpu_counter_destroy(&m->d_lease_mis); percpu_counter_destroy(&m->d_lease_hit); - cancel_delayed_work_sync(&m->delayed_work); - ceph_put_mds_session(m->session); } From 912afe602eacf4c9cf75a74d3cdb5c9f9c7c4698 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 7 Sep 2021 20:00:47 -0700 Subject: [PATCH 33/65] Kconfig.debug: drop selecting non-existing HARDLOCKUP_DETECTOR_ARCH [ Upstream commit 6fe26259b4884b657cbc233fb9cdade9d704976e ] Commit 05a4a9527931 ("kernel/watchdog: split up config options") adds a new config HARDLOCKUP_DETECTOR, which selects the non-existing config HARDLOCKUP_DETECTOR_ARCH. Hence, ./scripts/checkkconfigsymbols.py warns: HARDLOCKUP_DETECTOR_ARCH Referencing files: lib/Kconfig.debug Simply drop selecting the non-existing HARDLOCKUP_DETECTOR_ARCH. Link: https://lkml.kernel.org/r/20210806115618.22088-1-lukas.bulwahn@gmail.com Fixes: 05a4a9527931 ("kernel/watchdog: split up config options") Signed-off-by: Lukas Bulwahn Cc: Nicholas Piggin Cc: Masahiro Yamada Cc: Babu Moger Cc: Don Zickus Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index bf174798afcb..95f909540587 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -981,7 +981,6 @@ config HARDLOCKUP_DETECTOR depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH select LOCKUP_DETECTOR select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF - select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH help Say Y here to enable the kernel to act as a watchdog to detect hard lockups. From d1f9ecc00da1bb6af558251d3eb88ea33aa0c307 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 5 Sep 2021 00:54:31 +0900 Subject: [PATCH 34/65] tools/bootconfig: Fix tracing_on option checking in ftrace2bconf.sh [ Upstream commit 32ba9f0fb027cc43074e3ea26fcf831adeee8e03 ] Since tracing_on indicates only "1" (default) or "0", ftrace2bconf.sh only need to check the value is "0". Link: https://lkml.kernel.org/r/163077087144.222577.6888011847727968737.stgit@devnote2 Fixes: 55ed4560774d ("tools/bootconfig: Add tracing_on support to helper scripts") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- tools/bootconfig/scripts/ftrace2bconf.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh index a0c3bcc6da4f..fb201d5afe2c 100755 --- a/tools/bootconfig/scripts/ftrace2bconf.sh +++ b/tools/bootconfig/scripts/ftrace2bconf.sh @@ -222,8 +222,8 @@ instance_options() { # [instance-name] emit_kv $PREFIX.cpumask = $val fi val=`cat $INSTANCE/tracing_on` - if [ `echo $val | sed -e s/f//g`x != x ]; then - emit_kv $PREFIX.tracing_on = $val + if [ "$val" = "0" ]; then + emit_kv $PREFIX.tracing_on = 0 fi val= From c9538018cb2d61c269c4c622131545763f24a9ee Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Jul 2021 11:06:44 +0200 Subject: [PATCH 35/65] thermal/core: Fix thermal_cooling_device_register() prototype [ Upstream commit fb83610762dd5927212aa62a468dd3b756b57a88 ] There are two pairs of declarations for thermal_cooling_device_register() and thermal_of_cooling_device_register(), and only one set was changed in a recent patch, so the other one now causes a compile-time warning: drivers/net/wireless/mediatek/mt76/mt7915/init.c: In function 'mt7915_thermal_init': drivers/net/wireless/mediatek/mt76/mt7915/init.c:134:48: error: passing argument 1 of 'thermal_cooling_device_register' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] 134 | cdev = thermal_cooling_device_register(wiphy_name(wiphy), phy, | ^~~~~~~~~~~~~~~~~ In file included from drivers/net/wireless/mediatek/mt76/mt7915/init.c:7: include/linux/thermal.h:407:39: note: expected 'char *' but argument is of type 'const char *' 407 | thermal_cooling_device_register(char *type, void *devdata, | ~~~~~~^~~~ Change the dummy helper functions to have the same arguments as the normal version. Fixes: f991de53a8ab ("thermal: make device_register's type argument const") Signed-off-by: Arnd Bergmann Reviewed-by: Jean-Francois Dagenais Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210722090717.1116748-1-arnd@kernel.org Signed-off-by: Sasha Levin --- include/linux/thermal.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index d07ea27e72a9..176d9454e8f3 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -410,12 +410,13 @@ static inline void thermal_zone_device_unregister( struct thermal_zone_device *tz) { } static inline struct thermal_cooling_device * -thermal_cooling_device_register(char *type, void *devdata, +thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) { return ERR_PTR(-ENODEV); } static inline struct thermal_cooling_device * thermal_of_cooling_device_register(struct device_node *np, - char *type, void *devdata, const struct thermal_cooling_device_ops *ops) + const char *type, void *devdata, + const struct thermal_cooling_device_ops *ops) { return ERR_PTR(-ENODEV); } static inline struct thermal_cooling_device * devm_thermal_of_cooling_device_register(struct device *dev, From 45bd9dd1bee8aedc4cbd409b1ba7f9b4f941eea6 Mon Sep 17 00:00:00 2001 From: Koba Ko Date: Mon, 30 Aug 2021 10:02:00 +0800 Subject: [PATCH 36/65] drm/amdgpu: Disable PCIE_DPM on Intel RKL Platform [ Upstream commit b3dc549986eb7b38eba4a144e979dc93f386751f ] Due to high latency in PCIE clock switching on RKL platforms, switching the PCIE clock dynamically at runtime can lead to HDMI/DP audio problems. On newer asics this is handled in the SMU firmware. For SMU7-based asics, disable PCIE clock switching to avoid the issue. AMD provide a parameter to disable PICE_DPM. modprobe amdgpu ppfeaturemask=0xfff7bffb It's better to contorl PCIE_DPM in amd gpu driver, switch PCI_DPM by determining intel RKL platform for SMU7-based asics. Fixes: 1a31474cdb48 ("drm/amd/pm: workaround for audio noise issue") Ref: https://lists.freedesktop.org/archives/amd-gfx/2021-August/067413.html Signed-off-by: Koba Ko Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index b76425164e29..7931528bc864 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -27,6 +27,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_X86_64) +#include +#endif #include #include "ppatomctrl.h" #include "atombios.h" @@ -1606,6 +1609,17 @@ static int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr) return result; } +static bool intel_core_rkl_chk(void) +{ +#if IS_ENABLED(CONFIG_X86_64) + struct cpuinfo_x86 *c = &cpu_data(0); + + return (c->x86 == 6 && c->x86_model == INTEL_FAM6_ROCKETLAKE); +#else + return false; +#endif +} + static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -1629,7 +1643,8 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr) data->mclk_dpm_key_disabled = hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true; data->sclk_dpm_key_disabled = hwmgr->feature_mask & PP_SCLK_DPM_MASK ? false : true; - data->pcie_dpm_key_disabled = hwmgr->feature_mask & PP_PCIE_DPM_MASK ? false : true; + data->pcie_dpm_key_disabled = + intel_core_rkl_chk() || !(hwmgr->feature_mask & PP_PCIE_DPM_MASK); /* need to set voltage control types before EVV patching */ data->voltage_control = SMU7_VOLTAGE_CONTROL_NONE; data->vddci_control = SMU7_VOLTAGE_CONTROL_NONE; From b9a1526d51744075a6245d3f3a5544b10a5405c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Aug 2021 13:48:34 +0200 Subject: [PATCH 37/65] drivers: base: cacheinfo: Get rid of DEFINE_SMP_CALL_CACHE_FUNCTION() [ Upstream commit 4b92d4add5f6dcf21275185c997d6ecb800054cd ] DEFINE_SMP_CALL_CACHE_FUNCTION() was usefel before the CPU hotplug rework to ensure that the cache related functions are called on the upcoming CPU because the notifier itself could run on any online CPU. The hotplug state machine guarantees that the callbacks are invoked on the upcoming CPU. So there is no need to have this SMP function call obfuscation. That indirection was missed when the hotplug notifiers were converted. This also solves the problem of ARM64 init_cache_level() invoking ACPI functions which take a semaphore in that context. That's invalid as SMP function calls run with interrupts disabled. Running it just from the callback in context of the CPU hotplug thread solves this. Fixes: 8571890e1513 ("arm64: Add support for ACPI based firmware tables") Reported-by: Guenter Roeck Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Acked-by: Will Deacon Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/871r69ersb.ffs@tglx Signed-off-by: Sasha Levin --- arch/arm64/kernel/cacheinfo.c | 7 ++----- arch/mips/kernel/cacheinfo.c | 7 ++----- arch/riscv/kernel/cacheinfo.c | 7 ++----- arch/x86/kernel/cpu/cacheinfo.c | 7 ++----- include/linux/cacheinfo.h | 18 ------------------ 5 files changed, 8 insertions(+), 38 deletions(-) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 7fa6828bb488..587543c6c51c 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -43,7 +43,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->type = type; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { unsigned int ctype, level, leaves, fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -78,7 +78,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int level, idx; enum cache_type type; @@ -97,6 +97,3 @@ static int __populate_cache_leaves(unsigned int cpu) } return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index 47312c529410..529dab855aac 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -17,7 +17,7 @@ do { \ leaf++; \ } while (0) -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -69,7 +69,7 @@ static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) cpumask_set_cpu(cpu1, cpu_map); } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -98,6 +98,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index d86781357044..90deabfe63ea 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -113,7 +113,7 @@ static void fill_cacheinfo(struct cacheinfo **this_leaf, } } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct device_node *np = of_cpu_device_node_get(cpu); @@ -155,7 +155,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; @@ -187,6 +187,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index f9ac682e75e7..b458b0fd98bf 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -985,7 +985,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->priv = base->nb; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1014,7 +1014,7 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) id4_regs->id = c->apicid >> index_msb; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1033,6 +1033,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 4f72b47973c3..2f909ed084c6 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -79,24 +79,6 @@ struct cpu_cacheinfo { bool cpu_map_populated; }; -/* - * Helpers to make sure "func" is executed on the cpu whose cache - * attributes are being detected - */ -#define DEFINE_SMP_CALL_CACHE_FUNCTION(func) \ -static inline void _##func(void *ret) \ -{ \ - int cpu = smp_processor_id(); \ - *(int *)ret = __##func(cpu); \ -} \ - \ -int func(unsigned int cpu) \ -{ \ - int ret; \ - smp_call_function_single(cpu, _##func, &ret, true); \ - return ret; \ -} - struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); From ca907291e11619843ea8b555ca30cf322a8d2514 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 2 Sep 2021 14:49:11 +0200 Subject: [PATCH 38/65] dma-buf: DMABUF_MOVE_NOTIFY should depend on DMA_SHARED_BUFFER [ Upstream commit c4f3a3460a5daebc772d9263500e4099b11e7300 ] Move notify between drivers is an option of DMA-BUF. Enabling DMABUF_MOVE_NOTIFY without DMA_SHARED_BUFFER does not have any impact, as drivers/dma-buf/ is not entered during the build when DMA_SHARED_BUFFER is disabled. Fixes: bb42df4662a44765 ("dma-buf: add dynamic DMA-buf handling v15") Signed-off-by: Geert Uytterhoeven Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210902124913.2698760-2-geert@linux-m68k.org Signed-off-by: Sasha Levin --- drivers/dma-buf/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index 4f8224a6ac95..3ca7de37dd8f 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -42,6 +42,7 @@ config UDMABUF config DMABUF_MOVE_NOTIFY bool "Move notify between drivers (EXPERIMENTAL)" default n + depends on DMA_SHARED_BUFFER help Don't pin buffers if the dynamic DMA-buf interface is available on both the exporter as well as the importer. This fixes a security From 2a07348e9821d7014b27ba3cd7a27318cbc5f4c9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Sep 2021 08:30:41 -0700 Subject: [PATCH 39/65] parisc: Move pci_dev_is_behind_card_dino to where it is used [ Upstream commit 907872baa9f1538eed02ec737b8e89eba6c6e4b9 ] parisc build test images fail to compile with the following error. drivers/parisc/dino.c:160:12: error: 'pci_dev_is_behind_card_dino' defined but not used Move the function just ahead of its only caller to avoid the error. Fixes: 5fa1659105fa ("parisc: Disable HP HSC-PCI Cards to prevent kernel crash") Cc: Helge Deller Signed-off-by: Guenter Roeck Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/parisc/dino.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 889d7ce282eb..952a92504df6 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -156,15 +156,6 @@ static inline struct dino_device *DINO_DEV(struct pci_hba_data *hba) return container_of(hba, struct dino_device, hba); } -/* Check if PCI device is behind a Card-mode Dino. */ -static int pci_dev_is_behind_card_dino(struct pci_dev *dev) -{ - struct dino_device *dino_dev; - - dino_dev = DINO_DEV(parisc_walk_tree(dev->bus->bridge)); - return is_card_dino(&dino_dev->hba.dev->id); -} - /* * Dino Configuration Space Accessor Functions */ @@ -447,6 +438,15 @@ static void quirk_cirrus_cardbus(struct pci_dev *dev) DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_6832, quirk_cirrus_cardbus ); #ifdef CONFIG_TULIP +/* Check if PCI device is behind a Card-mode Dino. */ +static int pci_dev_is_behind_card_dino(struct pci_dev *dev) +{ + struct dino_device *dino_dev; + + dino_dev = DINO_DEV(parisc_walk_tree(dev->bus->bridge)); + return is_card_dino(&dino_dev->hba.dev->id); +} + static void pci_fixup_tulip(struct pci_dev *dev) { if (!pci_dev_is_behind_card_dino(dev)) From b4bb0b171b6ef63474bbbcfcb0b90660c27f8fcc Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Fri, 20 Aug 2021 15:29:55 -0500 Subject: [PATCH 40/65] iommu/amd: Relocate GAMSup check to early_enable_iommus [ Upstream commit c3811a50addd23b9bb5a36278609ee1638debcf6 ] Currently, iommu_init_ga() checks and disables IOMMU VAPIC support (i.e. AMD AVIC support in IOMMU) when GAMSup feature bit is not set. However it forgets to clear IRQ_POSTING_CAP from the previously set amd_iommu_irq_ops.capability. This triggers an invalid page fault bug during guest VM warm reboot if AVIC is enabled since the irq_remapping_cap(IRQ_POSTING_CAP) is incorrectly set, and crash the system with the following kernel trace. BUG: unable to handle page fault for address: 0000000000400dd8 RIP: 0010:amd_iommu_deactivate_guest_mode+0x19/0xbc Call Trace: svm_set_pi_irte_mode+0x8a/0xc0 [kvm_amd] ? kvm_make_all_cpus_request_except+0x50/0x70 [kvm] kvm_request_apicv_update+0x10c/0x150 [kvm] svm_toggle_avic_for_irq_window+0x52/0x90 [kvm_amd] svm_enable_irq_window+0x26/0xa0 [kvm_amd] vcpu_enter_guest+0xbbe/0x1560 [kvm] ? avic_vcpu_load+0xd5/0x120 [kvm_amd] ? kvm_arch_vcpu_load+0x76/0x240 [kvm] ? svm_get_segment_base+0xa/0x10 [kvm_amd] kvm_arch_vcpu_ioctl_run+0x103/0x590 [kvm] kvm_vcpu_ioctl+0x22a/0x5d0 [kvm] __x64_sys_ioctl+0x84/0xc0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes by moving the initializing of AMD IOMMU interrupt remapping mode (amd_iommu_guest_ir) earlier before setting up the amd_iommu_irq_ops.capability with appropriate IRQ_POSTING_CAP flag. [joro: Squashed the two patches and limited check_features_on_all_iommus() to CONFIG_IRQ_REMAP to fix a compile warning.] Signed-off-by: Wei Huang Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20210820202957.187572-2-suravee.suthikulpanit@amd.com Link: https://lore.kernel.org/r/20210820202957.187572-3-suravee.suthikulpanit@amd.com Fixes: 8bda0cfbdc1a ("iommu/amd: Detect and initialize guest vAPIC log") Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd/init.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index fa57986c2309..28de889aa516 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -298,6 +298,22 @@ int amd_iommu_get_num_iommus(void) return amd_iommus_present; } +#ifdef CONFIG_IRQ_REMAP +static bool check_feature_on_all_iommus(u64 mask) +{ + bool ret = false; + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + ret = iommu_feature(iommu, mask); + if (!ret) + return false; + } + + return true; +} +#endif + /* * For IVHD type 0x11/0x40, EFR is also available via IVHD. * Default to IVHD EFR since it is available sooner @@ -854,13 +870,6 @@ static int iommu_init_ga(struct amd_iommu *iommu) int ret = 0; #ifdef CONFIG_IRQ_REMAP - /* Note: We have already checked GASup from IVRS table. - * Now, we need to make sure that GAMSup is set. - */ - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && - !iommu_feature(iommu, FEATURE_GAM_VAPIC)) - amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; - ret = iommu_init_ga_log(iommu); #endif /* CONFIG_IRQ_REMAP */ @@ -2396,6 +2405,14 @@ static void early_enable_iommus(void) } #ifdef CONFIG_IRQ_REMAP + /* + * Note: We have already checked GASup from IVRS table. + * Now, we need to make sure that GAMSup is set. + */ + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && + !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); #endif From c12cf7f9afe23eef40cb5cff5fd28a0f39745232 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jun 2021 10:38:10 +0200 Subject: [PATCH 41/65] dmaengine: idxd: depends on !UML [ Upstream commit b2296eeac91555bd13f774efa7ab7d4b12fb71ef ] Now that UML has PCI support, this driver must depend also on !UML since it pokes at X86_64 architecture internals that don't exist on ARCH=um. Reported-by: kernel test robot Signed-off-by: Johannes Berg Acked-by: Dave Jiang Acked-By: Anton Ivanov Link: https://lore.kernel.org/r/20210625103810.fe877ae0aef4.If240438e3f50ae226f3f755fc46ea498c6858393@changeid Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index f28bb2334e74..3a745e8a0f42 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -285,7 +285,7 @@ config INTEL_IDMA64 config INTEL_IDXD tristate "Intel Data Accelerators support" - depends on PCI && X86_64 + depends on PCI && X86_64 && !UML depends on PCI_MSI depends on SBITMAP select DMA_ENGINE From 35492619e010b312d0d3fe94bbe182bce0bae5ee Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 4 May 2021 10:22:57 +0800 Subject: [PATCH 42/65] dmaengine: sprd: Add missing MODULE_DEVICE_TABLE [ Upstream commit 4faee8b65ec32346f8096e64c5fa1d5a73121742 ] This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Zou Wei Reviewed-by: Baolin Wang Link: https://lore.kernel.org/r/1620094977-70146-1-git-send-email-zou_wei@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/sprd-dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c index 0ef5ca81ba4d..4357d2395e6b 100644 --- a/drivers/dma/sprd-dma.c +++ b/drivers/dma/sprd-dma.c @@ -1265,6 +1265,7 @@ static const struct of_device_id sprd_dma_match[] = { { .compatible = "sprd,sc9860-dma", }, {}, }; +MODULE_DEVICE_TABLE(of, sprd_dma_match); static int __maybe_unused sprd_dma_runtime_suspend(struct device *dev) { From 9c1ea8537730790ee183fa32379932f777c95d17 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Aug 2021 11:24:09 +0200 Subject: [PATCH 43/65] dmaengine: ioat: depends on !UML [ Upstream commit bbac7a92a46f0876e588722ebe552ddfe6fd790f ] Now that UML has PCI support, this driver must depend also on !UML since it pokes at X86_64 architecture internals that don't exist on ARCH=um. Reported-by: Geert Uytterhoeven Signed-off-by: Johannes Berg Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20210809112409.a3a0974874d2.I2ffe3d11ed37f735da2f39884a74c953b258b995@changeid Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 3a745e8a0f42..08013345d1f2 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -299,7 +299,7 @@ config INTEL_IDXD config INTEL_IOATDMA tristate "Intel I/OAT DMA support" - depends on PCI && X86_64 + depends on PCI && X86_64 && !UML select DMA_ENGINE select DMA_ENGINE_RAID select DCA From b26ced26251cc26cc6c8327535687b77af0c55db Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Thu, 19 Aug 2021 14:28:48 +0530 Subject: [PATCH 44/65] dmaengine: xilinx_dma: Set DMA mask for coherent APIs [ Upstream commit aac6c0f90799d66b8989be1e056408f33fd99fe6 ] The xilinx dma driver uses the consistent allocations, so for correct operation also set the DMA mask for coherent APIs. It fixes the below kernel crash with dmatest client when DMA IP is configured with 64-bit address width and linux is booted from high (>4GB) memory. Call trace: [ 489.531257] dma_alloc_from_pool+0x8c/0x1c0 [ 489.535431] dma_direct_alloc+0x284/0x330 [ 489.539432] dma_alloc_attrs+0x80/0xf0 [ 489.543174] dma_pool_alloc+0x160/0x2c0 [ 489.547003] xilinx_cdma_prep_memcpy+0xa4/0x180 [ 489.551524] dmatest_func+0x3cc/0x114c [ 489.555266] kthread+0x124/0x130 [ 489.558486] ret_from_fork+0x10/0x3c [ 489.562051] ---[ end trace 248625b2d596a90a ]--- Signed-off-by: Radhey Shyam Pandey Reviewed-by: Harini Katakam Link: https://lore.kernel.org/r/1629363528-30347-1-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 9ffdbeec436b..cab4719e4cf9 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -3070,7 +3070,7 @@ static int xilinx_dma_probe(struct platform_device *pdev) xdev->ext_addr = false; /* Set the dma mask bits */ - dma_set_mask(xdev->dev, DMA_BIT_MASK(addr_width)); + dma_set_mask_and_coherent(xdev->dev, DMA_BIT_MASK(addr_width)); /* Initialize the DMA engine */ xdev->common.dev = &pdev->dev; From 386fd6fd01bd1ee92640f97c0d0e06e204641c9f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 11 Aug 2021 06:40:42 -0400 Subject: [PATCH 45/65] ceph: request Fw caps before updating the mtime in ceph_write_iter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b11ed50346683a749632ea664959b28d524d7395 ] The current code will update the mtime and then try to get caps to handle the write. If we end up having to request caps from the MDS, then the mtime in the cap grant will clobber the updated mtime and it'll be lost. This is most noticable when two clients are alternately writing to the same file. Fw caps are continually being granted and revoked, and the mtime ends up stuck because the updated mtimes are always being overwritten with the old one. Fix this by changing the order of operations in ceph_write_iter to get the caps before updating the times. Also, make sure we check the pool full conditions before even getting any caps or uninlining. URL: https://tracker.ceph.com/issues/46574 Reported-by: Jozef Kováč Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-by: Luis Henriques Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/file.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3d2e3dd4ee01..f1895f78ab45 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1723,22 +1723,6 @@ retry_snap: goto out; } - err = file_remove_privs(file); - if (err) - goto out; - - err = file_update_time(file); - if (err) - goto out; - - inode_inc_iversion_raw(inode); - - if (ci->i_inline_version != CEPH_INLINE_NONE) { - err = ceph_uninline_data(file, NULL); - if (err < 0) - goto out; - } - down_read(&osdc->lock); map_flags = osdc->osdmap->flags; pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id); @@ -1749,6 +1733,16 @@ retry_snap: goto out; } + err = file_remove_privs(file); + if (err) + goto out; + + if (ci->i_inline_version != CEPH_INLINE_NONE) { + err = ceph_uninline_data(file, NULL); + if (err < 0) + goto out; + } + dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", inode, ceph_vinop(inode), pos, count, i_size_read(inode)); if (fi->fmode & CEPH_FILE_MODE_LAZY) @@ -1761,6 +1755,12 @@ retry_snap: if (err < 0) goto out; + err = file_update_time(file); + if (err) + goto out_caps; + + inode_inc_iversion_raw(inode); + dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n", inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); @@ -1844,6 +1844,8 @@ retry_snap: } goto out_unlocked; +out_caps: + ceph_put_cap_refs(ci, got); out: if (direct_lock) ceph_end_io_direct(inode); From 487ead34a208c1da9da7fc9f776b517680255ca1 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 25 Aug 2021 21:45:43 +0800 Subject: [PATCH 46/65] ceph: remove the capsnaps when removing caps [ Upstream commit a6d37ccdd240e80f26aaea0e62cda310e0e184d7 ] capsnaps will take inode references via ihold when queueing to flush. When force unmounting, the client will just close the sessions and may never get a flush reply, causing a leak and inode ref leak. Fix this by removing the capsnaps for an inode when removing the caps. URL: https://tracker.ceph.com/issues/52295 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 68 +++++++++++++++++++++++++++++++++----------- fs/ceph/mds_client.c | 31 +++++++++++++++++++- fs/ceph/super.h | 6 ++++ 3 files changed, 87 insertions(+), 18 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 678dac8365ed..f303e0d87c3f 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3169,7 +3169,16 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, break; } } - BUG_ON(!found); + + if (!found) { + /* + * The capsnap should already be removed when removing + * auth cap in the case of a forced unmount. + */ + WARN_ON_ONCE(ci->i_auth_cap); + goto unlock; + } + capsnap->dirty_pages -= nr; if (capsnap->dirty_pages == 0) { complete_capsnap = true; @@ -3191,6 +3200,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, complete_capsnap ? " (complete capsnap)" : ""); } +unlock: spin_unlock(&ci->i_ceph_lock); if (last) { @@ -3657,6 +3667,43 @@ out: iput(inode); } +void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, + bool *wake_ci, bool *wake_mdsc) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + bool ret; + + lockdep_assert_held(&ci->i_ceph_lock); + + dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci); + + list_del_init(&capsnap->ci_item); + ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush); + if (wake_ci) + *wake_ci = ret; + + spin_lock(&mdsc->cap_dirty_lock); + if (list_empty(&ci->i_cap_flush_list)) + list_del_init(&ci->i_flushing_item); + + ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush); + if (wake_mdsc) + *wake_mdsc = ret; + spin_unlock(&mdsc->cap_dirty_lock); +} + +void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, + bool *wake_ci, bool *wake_mdsc) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + + lockdep_assert_held(&ci->i_ceph_lock); + + WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing); + __ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc); +} + /* * Handle FLUSHSNAP_ACK. MDS has flushed snap data to disk and we can * throw away our cap_snap. @@ -3694,23 +3741,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, capsnap, capsnap->follows); } } - if (flushed) { - WARN_ON(capsnap->dirty_pages || capsnap->writing); - dout(" removing %p cap_snap %p follows %lld\n", - inode, capsnap, follows); - list_del(&capsnap->ci_item); - wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush); - - spin_lock(&mdsc->cap_dirty_lock); - - if (list_empty(&ci->i_cap_flush_list)) - list_del_init(&ci->i_flushing_item); - - wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc, - &capsnap->cap_flush); - spin_unlock(&mdsc->cap_dirty_lock); - } + if (flushed) + ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc); spin_unlock(&ci->i_ceph_lock); + if (flushed) { ceph_put_snap_context(capsnap->context); ceph_put_cap_snap(capsnap); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 46606fb5b886..0f57b7d09457 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1587,14 +1587,39 @@ out: return ret; } +static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_cap_snap *capsnap; + int capsnap_release = 0; + + lockdep_assert_held(&ci->i_ceph_lock); + + dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode); + + while (!list_empty(&ci->i_cap_snaps)) { + capsnap = list_first_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, ci_item); + __ceph_remove_capsnap(inode, capsnap, NULL, NULL); + ceph_put_snap_context(capsnap->context); + ceph_put_cap_snap(capsnap); + capsnap_release++; + } + wake_up_all(&ci->i_cap_wq); + wake_up_all(&mdsc->cap_flushing_wq); + return capsnap_release; +} + static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) { struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg; + struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); LIST_HEAD(to_remove); bool dirty_dropped = false; bool invalidate = false; + int capsnap_release = 0; dout("removing cap %p, ci is %p, inode is %p\n", cap, ci, &ci->vfs_inode); @@ -1602,7 +1627,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, __ceph_remove_cap(cap, false); if (!ci->i_auth_cap) { struct ceph_cap_flush *cf; - struct ceph_mds_client *mdsc = fsc->mdsc; if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (inode->i_data.nrpages > 0) @@ -1666,6 +1690,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); ci->i_prealloc_cap_flush = NULL; } + + if (!list_empty(&ci->i_cap_snaps)) + capsnap_release = remove_capsnaps(mdsc, inode); } spin_unlock(&ci->i_ceph_lock); while (!list_empty(&to_remove)) { @@ -1682,6 +1709,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, ceph_queue_invalidate(inode); if (dirty_dropped) iput(inode); + while (capsnap_release--) + iput(inode); return 0; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a8c460393b01..9362eeb5812d 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1134,6 +1134,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had); extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc); +extern void __ceph_remove_capsnap(struct inode *inode, + struct ceph_cap_snap *capsnap, + bool *wake_ci, bool *wake_mdsc); +extern void ceph_remove_capsnap(struct inode *inode, + struct ceph_cap_snap *capsnap, + bool *wake_ci, bool *wake_mdsc); extern void ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession); extern bool __ceph_should_report_size(struct ceph_inode_info *ci); From 921ef7cfef15a7e25511089c9941ee37c3dc9fec Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 2 Sep 2021 08:31:03 -0400 Subject: [PATCH 47/65] ceph: lockdep annotations for try_nonblocking_invalidate [ Upstream commit 3eaf5aa1cfa8c97c72f5824e2e9263d6cc977b03 ] Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f303e0d87c3f..48ea95b81df8 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1868,6 +1868,8 @@ static u64 __mark_caps_flushing(struct inode *inode, * try to invalidate mapping pages without blocking. */ static int try_nonblocking_invalidate(struct inode *inode) + __releases(ci->i_ceph_lock) + __acquires(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); u32 invalidating_gen = ci->i_rdcache_gen; From c43803c1aa76f2cc15ee641564412741a18028ff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 27 Jul 2021 17:01:16 -0400 Subject: [PATCH 48/65] btrfs: update the bdev time directly when closing [ Upstream commit 8f96a5bfa1503e0a5f3c78d51e993a1794d4aff1 ] We update the ctime/mtime of a block device when we remove it so that blkid knows the device changed. However we do this by re-opening the block device and calling filp_update_time. This is more correct because it'll call the inode->i_op->update_time if it exists, but the block dev inodes do not do this. Instead call generic_update_time() on the bd_inode in order to avoid the blkdev_open path and get rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 5.14.0-rc2+ #406 Not tainted ------------------------------------------------------ losetup/11596 is trying to acquire lock: ffff939640d2f538 ((wq_completion)loop0){+.+.}-{0:0}, at: flush_workqueue+0x67/0x5e0 but task is already holding lock: ffff939655510c68 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x660 [loop] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&lo->lo_mutex){+.+.}-{3:3}: __mutex_lock+0x7d/0x750 lo_open+0x28/0x60 [loop] blkdev_get_whole+0x25/0xf0 blkdev_get_by_dev.part.0+0x168/0x3c0 blkdev_open+0xd2/0xe0 do_dentry_open+0x161/0x390 path_openat+0x3cc/0xa20 do_filp_open+0x96/0x120 do_sys_openat2+0x7b/0x130 __x64_sys_openat+0x46/0x70 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #3 (&disk->open_mutex){+.+.}-{3:3}: __mutex_lock+0x7d/0x750 blkdev_get_by_dev.part.0+0x56/0x3c0 blkdev_open+0xd2/0xe0 do_dentry_open+0x161/0x390 path_openat+0x3cc/0xa20 do_filp_open+0x96/0x120 file_open_name+0xc7/0x170 filp_open+0x2c/0x50 btrfs_scratch_superblocks.part.0+0x10f/0x170 btrfs_rm_device.cold+0xe8/0xed btrfs_ioctl+0x2a31/0x2e70 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #2 (sb_writers#12){.+.+}-{0:0}: lo_write_bvec+0xc2/0x240 [loop] loop_process_work+0x238/0xd00 [loop] process_one_work+0x26b/0x560 worker_thread+0x55/0x3c0 kthread+0x140/0x160 ret_from_fork+0x1f/0x30 -> #1 ((work_completion)(&lo->rootcg_work)){+.+.}-{0:0}: process_one_work+0x245/0x560 worker_thread+0x55/0x3c0 kthread+0x140/0x160 ret_from_fork+0x1f/0x30 -> #0 ((wq_completion)loop0){+.+.}-{0:0}: __lock_acquire+0x10ea/0x1d90 lock_acquire+0xb5/0x2b0 flush_workqueue+0x91/0x5e0 drain_workqueue+0xa0/0x110 destroy_workqueue+0x36/0x250 __loop_clr_fd+0x9a/0x660 [loop] block_ioctl+0x3f/0x50 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae other info that might help us debug this: Chain exists of: (wq_completion)loop0 --> &disk->open_mutex --> &lo->lo_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&lo->lo_mutex); lock(&disk->open_mutex); lock(&lo->lo_mutex); lock((wq_completion)loop0); *** DEADLOCK *** 1 lock held by losetup/11596: #0: ffff939655510c68 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x660 [loop] stack backtrace: CPU: 1 PID: 11596 Comm: losetup Not tainted 5.14.0-rc2+ #406 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x72 check_noncircular+0xcf/0xf0 ? stack_trace_save+0x3b/0x50 __lock_acquire+0x10ea/0x1d90 lock_acquire+0xb5/0x2b0 ? flush_workqueue+0x67/0x5e0 ? lockdep_init_map_type+0x47/0x220 flush_workqueue+0x91/0x5e0 ? flush_workqueue+0x67/0x5e0 ? verify_cpu+0xf0/0x100 drain_workqueue+0xa0/0x110 destroy_workqueue+0x36/0x250 __loop_clr_fd+0x9a/0x660 [loop] ? blkdev_ioctl+0x8d/0x2a0 block_ioctl+0x3f/0x50 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b4fcc48f255b..994ec44fc597 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1855,15 +1855,17 @@ out: * Function to update ctime/mtime for a given device path. * Mainly used for ctime/mtime based probe like libblkid. */ -static void update_dev_time(const char *path_name) +static void update_dev_time(struct block_device *bdev) { - struct file *filp; + struct inode *inode = bdev->bd_inode; + struct timespec64 now; - filp = filp_open(path_name, O_RDWR, 0); - if (IS_ERR(filp)) + /* Shouldn't happen but just in case. */ + if (!inode) return; - file_update_time(filp); - filp_close(filp, NULL); + + now = current_time(inode); + generic_update_time(inode, &now, S_MTIME | S_CTIME); } static int btrfs_rm_dev_item(struct btrfs_device *device) @@ -2038,7 +2040,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, btrfs_kobject_uevent(bdev, KOBJ_CHANGE); /* Update ctime/mtime for device path for libblkid */ - update_dev_time(device_path); + update_dev_time(bdev); } int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, @@ -2681,7 +2683,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_forget_devices(device_path); /* Update ctime/mtime for blkid or udev */ - update_dev_time(device_path); + update_dev_time(bdev); return ret; From aa1af89a6697ed8f1f0d3d62029036158a4486d9 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 31 Aug 2021 09:21:28 +0800 Subject: [PATCH 49/65] btrfs: fix lockdep warning while mounting sprout fs [ Upstream commit c124706900c20dee70f921bb3a90492431561a0a ] Following test case reproduces lockdep warning. Test case: $ mkfs.btrfs -f $ btrfstune -S 1 $ mount $ btrfs device add -f $ umount $ mount $ umount The warning claims a possible ABBA deadlock between the threads initiated by [#1] btrfs device add and [#0] the mount. [ 540.743122] WARNING: possible circular locking dependency detected [ 540.743129] 5.11.0-rc7+ #5 Not tainted [ 540.743135] ------------------------------------------------------ [ 540.743142] mount/2515 is trying to acquire lock: [ 540.743149] ffffa0c5544c2ce0 (&fs_devs->device_list_mutex){+.+.}-{4:4}, at: clone_fs_devices+0x6d/0x210 [btrfs] [ 540.743458] but task is already holding lock: [ 540.743461] ffffa0c54a7932b8 (btrfs-chunk-00){++++}-{4:4}, at: __btrfs_tree_read_lock+0x32/0x200 [btrfs] [ 540.743541] which lock already depends on the new lock. [ 540.743543] the existing dependency chain (in reverse order) is: [ 540.743546] -> #1 (btrfs-chunk-00){++++}-{4:4}: [ 540.743566] down_read_nested+0x48/0x2b0 [ 540.743585] __btrfs_tree_read_lock+0x32/0x200 [btrfs] [ 540.743650] btrfs_read_lock_root_node+0x70/0x200 [btrfs] [ 540.743733] btrfs_search_slot+0x6c6/0xe00 [btrfs] [ 540.743785] btrfs_update_device+0x83/0x260 [btrfs] [ 540.743849] btrfs_finish_chunk_alloc+0x13f/0x660 [btrfs] <--- device_list_mutex [ 540.743911] btrfs_create_pending_block_groups+0x18d/0x3f0 [btrfs] [ 540.743982] btrfs_commit_transaction+0x86/0x1260 [btrfs] [ 540.744037] btrfs_init_new_device+0x1600/0x1dd0 [btrfs] [ 540.744101] btrfs_ioctl+0x1c77/0x24c0 [btrfs] [ 540.744166] __x64_sys_ioctl+0xe4/0x140 [ 540.744170] do_syscall_64+0x4b/0x80 [ 540.744174] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 540.744180] -> #0 (&fs_devs->device_list_mutex){+.+.}-{4:4}: [ 540.744184] __lock_acquire+0x155f/0x2360 [ 540.744188] lock_acquire+0x10b/0x5c0 [ 540.744190] __mutex_lock+0xb1/0xf80 [ 540.744193] mutex_lock_nested+0x27/0x30 [ 540.744196] clone_fs_devices+0x6d/0x210 [btrfs] [ 540.744270] btrfs_read_chunk_tree+0x3c7/0xbb0 [btrfs] [ 540.744336] open_ctree+0xf6e/0x2074 [btrfs] [ 540.744406] btrfs_mount_root.cold.72+0x16/0x127 [btrfs] [ 540.744472] legacy_get_tree+0x38/0x90 [ 540.744475] vfs_get_tree+0x30/0x140 [ 540.744478] fc_mount+0x16/0x60 [ 540.744482] vfs_kern_mount+0x91/0x100 [ 540.744484] btrfs_mount+0x1e6/0x670 [btrfs] [ 540.744536] legacy_get_tree+0x38/0x90 [ 540.744537] vfs_get_tree+0x30/0x140 [ 540.744539] path_mount+0x8d8/0x1070 [ 540.744541] do_mount+0x8d/0xc0 [ 540.744543] __x64_sys_mount+0x125/0x160 [ 540.744545] do_syscall_64+0x4b/0x80 [ 540.744547] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 540.744551] other info that might help us debug this: [ 540.744552] Possible unsafe locking scenario: [ 540.744553] CPU0 CPU1 [ 540.744554] ---- ---- [ 540.744555] lock(btrfs-chunk-00); [ 540.744557] lock(&fs_devs->device_list_mutex); [ 540.744560] lock(btrfs-chunk-00); [ 540.744562] lock(&fs_devs->device_list_mutex); [ 540.744564] *** DEADLOCK *** [ 540.744565] 3 locks held by mount/2515: [ 540.744567] #0: ffffa0c56bf7a0e0 (&type->s_umount_key#42/1){+.+.}-{4:4}, at: alloc_super.isra.16+0xdf/0x450 [ 540.744574] #1: ffffffffc05a9628 (uuid_mutex){+.+.}-{4:4}, at: btrfs_read_chunk_tree+0x63/0xbb0 [btrfs] [ 540.744640] #2: ffffa0c54a7932b8 (btrfs-chunk-00){++++}-{4:4}, at: __btrfs_tree_read_lock+0x32/0x200 [btrfs] [ 540.744708] stack backtrace: [ 540.744712] CPU: 2 PID: 2515 Comm: mount Not tainted 5.11.0-rc7+ #5 But the device_list_mutex in clone_fs_devices() is redundant, as explained below. Two threads [1] and [2] (below) could lead to clone_fs_device(). [1] open_ctree <== mount sprout fs btrfs_read_chunk_tree() mutex_lock(&uuid_mutex) <== global lock read_one_dev() open_seed_devices() clone_fs_devices() <== seed fs_devices mutex_lock(&orig->device_list_mutex) <== seed fs_devices [2] btrfs_init_new_device() <== sprouting mutex_lock(&uuid_mutex); <== global lock btrfs_prepare_sprout() lockdep_assert_held(&uuid_mutex) clone_fs_devices(seed_fs_device) <== seed fs_devices Both of these threads hold uuid_mutex which is sufficient to protect getting the seed device(s) freed while we are trying to clone it for sprouting [2] or mounting a sprout [1] (as above). A mounted seed device can not free/write/replace because it is read-only. An unmounted seed device can be freed by btrfs_free_stale_devices(), but it needs uuid_mutex. So this patch removes the unnecessary device_list_mutex in clone_fs_devices(). And adds a lockdep_assert_held(&uuid_mutex) in clone_fs_devices(). Reported-by: Su Yue Tested-by: Su Yue Signed-off-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 994ec44fc597..509811aabb3f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -568,6 +568,8 @@ static int btrfs_free_stale_devices(const char *path, struct btrfs_device *device, *tmp_device; int ret = 0; + lockdep_assert_held(&uuid_mutex); + if (path) ret = -ENOENT; @@ -999,11 +1001,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) struct btrfs_device *orig_dev; int ret = 0; + lockdep_assert_held(&uuid_mutex); + fs_devices = alloc_fs_devices(orig->fsid, NULL); if (IS_ERR(fs_devices)) return fs_devices; - mutex_lock(&orig->device_list_mutex); fs_devices->total_devices = orig->total_devices; list_for_each_entry(orig_dev, &orig->devices, dev_list) { @@ -1035,10 +1038,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) device->fs_devices = fs_devices; fs_devices->num_devices++; } - mutex_unlock(&orig->device_list_mutex); return fs_devices; error: - mutex_unlock(&orig->device_list_mutex); free_fs_devices(fs_devices); return ERR_PTR(ret); } From d95b50ff07b831495a87eb47dd3a7f23eaec2a40 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Tue, 7 Sep 2021 20:00:09 -0700 Subject: [PATCH 50/65] nilfs2: fix memory leak in nilfs_sysfs_create_device_group [ Upstream commit 5f5dec07aca7067216ed4c1342e464e7307a9197 ] Patch series "nilfs2: fix incorrect usage of kobject". This patchset from Nanyong Sun fixes memory leak issues and a NULL pointer dereference issue caused by incorrect usage of kboject in nilfs2 sysfs implementation. This patch (of 6): Reported by syzkaller: BUG: memory leak unreferenced object 0xffff888100ca8988 (size 8): comm "syz-executor.1", pid 1930, jiffies 4294745569 (age 18.052s) hex dump (first 8 bytes): 6c 6f 6f 70 31 00 ff ff loop1... backtrace: kstrdup+0x36/0x70 mm/util.c:60 kstrdup_const+0x35/0x60 mm/util.c:83 kvasprintf_const+0xf1/0x180 lib/kasprintf.c:48 kobject_set_name_vargs+0x56/0x150 lib/kobject.c:289 kobject_add_varg lib/kobject.c:384 [inline] kobject_init_and_add+0xc9/0x150 lib/kobject.c:473 nilfs_sysfs_create_device_group+0x150/0x7d0 fs/nilfs2/sysfs.c:986 init_nilfs+0xa21/0xea0 fs/nilfs2/the_nilfs.c:637 nilfs_fill_super fs/nilfs2/super.c:1046 [inline] nilfs_mount+0x7b4/0xe80 fs/nilfs2/super.c:1316 legacy_get_tree+0x105/0x210 fs/fs_context.c:592 vfs_get_tree+0x8e/0x2d0 fs/super.c:1498 do_new_mount fs/namespace.c:2905 [inline] path_mount+0xf9b/0x1990 fs/namespace.c:3235 do_mount+0xea/0x100 fs/namespace.c:3248 __do_sys_mount fs/namespace.c:3456 [inline] __se_sys_mount fs/namespace.c:3433 [inline] __x64_sys_mount+0x14b/0x1f0 fs/namespace.c:3433 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae If kobject_init_and_add return with error, then the cleanup of kobject is needed because memory may be allocated in kobject_init_and_add without freeing. And the place of cleanup_dev_kobject should use kobject_put to free the memory associated with the kobject. As the section "Kobject removal" of "Documentation/core-api/kobject.rst" says, kobject_del() just makes the kobject "invisible", but it is not cleaned up. And no more cleanup will do after cleanup_dev_kobject, so kobject_put is needed here. Link: https://lkml.kernel.org/r/1625651306-10829-1-git-send-email-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/1625651306-10829-2-git-send-email-konishi.ryusuke@gmail.com Reported-by: Hulk Robot Link: https://lkml.kernel.org/r/20210629022556.3985106-2-sunnanyong@huawei.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/sysfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 9c6c0e2e5880..b6a48492fed2 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -999,7 +999,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb) err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL, "%s", sb->s_id); if (err) - goto free_dev_subgroups; + goto cleanup_dev_kobject; err = nilfs_sysfs_create_mounted_snapshots_group(nilfs); if (err) @@ -1036,9 +1036,7 @@ delete_mounted_snapshots_group: nilfs_sysfs_delete_mounted_snapshots_group(nilfs); cleanup_dev_kobject: - kobject_del(&nilfs->ns_dev_kobj); - -free_dev_subgroups: + kobject_put(&nilfs->ns_dev_kobj); kfree(nilfs->ns_dev_subgroups); failed_create_device_group: From 0480f7a480bccdd5e747e64365b703105ccae350 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Tue, 7 Sep 2021 20:00:12 -0700 Subject: [PATCH 51/65] nilfs2: fix NULL pointer in nilfs_##name##_attr_release [ Upstream commit dbc6e7d44a514f231a64d9d5676e001b660b6448 ] In nilfs_##name##_attr_release, kobj->parent should not be referenced because it is a NULL pointer. The release() method of kobject is always called in kobject_put(kobj), in the implementation of kobject_put(), the kobj->parent will be assigned as NULL before call the release() method. So just use kobj to get the subgroups, which is more efficient and can fix a NULL pointer reference problem. Link: https://lkml.kernel.org/r/20210629022556.3985106-3-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-3-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/sysfs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index b6a48492fed2..43f660beb9b4 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -64,11 +64,9 @@ static const struct sysfs_ops nilfs_##name##_attr_ops = { \ #define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \ static void nilfs_##name##_attr_release(struct kobject *kobj) \ { \ - struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \ - struct the_nilfs *nilfs = container_of(kobj->parent, \ - struct the_nilfs, \ - ns_##parent_name##_kobj); \ - subgroups = nilfs->ns_##parent_name##_subgroups; \ + struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \ + struct nilfs_sysfs_##parent_name##_subgroups, \ + sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ static struct kobj_type nilfs_##name##_ktype = { \ From 5acb21e30d85f90b658bae7fb3f53b5e42952292 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Tue, 7 Sep 2021 20:00:15 -0700 Subject: [PATCH 52/65] nilfs2: fix memory leak in nilfs_sysfs_create_##name##_group [ Upstream commit 24f8cb1ed057c840728167dab33b32e44147c86f ] If kobject_init_and_add return with error, kobject_put() is needed here to avoid memory leak, because kobject_init_and_add may return error without freeing the memory associated with the kobject it allocated. Link: https://lkml.kernel.org/r/20210629022556.3985106-4-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-4-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 43f660beb9b4..5dc468ff5903 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -92,8 +92,8 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \ #name); \ if (err) \ - return err; \ - return 0; \ + kobject_put(kobj); \ + return err; \ } \ static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \ { \ From 5770b54b112e3ea252fc0547dd1fbc5af5c7e7c3 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Tue, 7 Sep 2021 20:00:18 -0700 Subject: [PATCH 53/65] nilfs2: fix memory leak in nilfs_sysfs_delete_##name##_group [ Upstream commit a3e181259ddd61fd378390977a1e4e2316853afa ] The kobject_put() should be used to cleanup the memory associated with the kobject instead of kobject_del. See the section "Kobject removal" of "Documentation/core-api/kobject.rst". Link: https://lkml.kernel.org/r/20210629022556.3985106-5-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-5-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 5dc468ff5903..34893a57a7b9 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -97,7 +97,7 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \ } \ static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \ { \ - kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ + kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ } /************************************************************************ From 0f36028d01339976c0d3dd67f11dd99f374f2e49 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Tue, 7 Sep 2021 20:00:21 -0700 Subject: [PATCH 54/65] nilfs2: fix memory leak in nilfs_sysfs_create_snapshot_group [ Upstream commit b2fe39c248f3fa4bbb2a20759b4fdd83504190f7 ] If kobject_init_and_add returns with error, kobject_put() is needed here to avoid memory leak, because kobject_init_and_add may return error without freeing the memory associated with the kobject it allocated. Link: https://lkml.kernel.org/r/20210629022556.3985106-6-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-6-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 34893a57a7b9..44b9ad70a564 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -208,9 +208,9 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root) } if (err) - return err; + kobject_put(&root->snapshot_kobj); - return 0; + return err; } void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root) From d7736e2faa13b0c0b677b901044d1e21879bd314 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Tue, 7 Sep 2021 20:00:23 -0700 Subject: [PATCH 55/65] nilfs2: fix memory leak in nilfs_sysfs_delete_snapshot_group [ Upstream commit 17243e1c3072b8417a5ebfc53065d0a87af7ca77 ] kobject_put() should be used to cleanup the memory associated with the kobject instead of kobject_del(). See the section "Kobject removal" of "Documentation/core-api/kobject.rst". Link: https://lkml.kernel.org/r/20210629022556.3985106-7-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-7-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/nilfs2/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 44b9ad70a564..57afd06db62d 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -215,7 +215,7 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root) void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root) { - kobject_del(&root->snapshot_kobj); + kobject_put(&root->snapshot_kobj); } /************************************************************************ From db8838e48a0a7f30bc6d4d6f0f9cd4e2f06c289f Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 20 Jul 2021 09:16:05 +0300 Subject: [PATCH 56/65] habanalabs: add validity check for event ID received from F/W [ Upstream commit a6c849012b0f51c674f52384bd9a4f3dc0a33c31 ] Currently there is no validity check for event ID received from F/W, Thus exposing driver to memory overrun. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin --- drivers/misc/habanalabs/gaudi/gaudi.c | 6 ++++++ drivers/misc/habanalabs/goya/goya.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 37edd663603f..ebac53a73bd1 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -5723,6 +5723,12 @@ static void gaudi_handle_eqe(struct hl_device *hdev, u8 cause; bool reset_required; + if (event_type >= GAUDI_EVENT_SIZE) { + dev_err(hdev->dev, "Event type %u exceeds maximum of %u", + event_type, GAUDI_EVENT_SIZE - 1); + return; + } + gaudi->events_stat[event_type]++; gaudi->events_stat_aggregate[event_type]++; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 5b5d6275c249..c8023b4428c5 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4623,6 +4623,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) >> EQ_CTL_EVENT_TYPE_SHIFT); struct goya_device *goya = hdev->asic_specific; + if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) { + dev_err(hdev->dev, "Event type %u exceeds maximum of %u", + event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1); + return; + } + goya->events_stat[event_type]++; goya->events_stat_aggregate[event_type]++; From 0a2ea5c0e5d1076fc189f695f18f6341f58f469d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 7 Jul 2021 18:27:51 +0200 Subject: [PATCH 57/65] pwm: img: Don't modify HW state in .remove() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c68eb29c8e9067c08175dd0414f6984f236f719d ] A consumer is expected to disable a PWM before calling pwm_put(). And if they didn't there is hopefully a good reason (or the consumer needs fixing). Also if disabling an enabled PWM was the right thing to do, this should better be done in the framework instead of in each low level driver. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-img.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index 22c002e685b3..37f9b688661d 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -329,23 +329,7 @@ err_pm_disable: static int img_pwm_remove(struct platform_device *pdev) { struct img_pwm_chip *pwm_chip = platform_get_drvdata(pdev); - u32 val; - unsigned int i; - int ret; - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) { - pm_runtime_put(&pdev->dev); - return ret; - } - - for (i = 0; i < pwm_chip->chip.npwm; i++) { - val = img_pwm_readl(pwm_chip, PWM_CTRL_CFG); - val &= ~BIT(i); - img_pwm_writel(pwm_chip, PWM_CTRL_CFG, val); - } - - pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) img_pwm_runtime_suspend(&pdev->dev); From a6a2b36a8cc45bf9063cdc4bde7434e8eac66ecd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 7 Jul 2021 18:27:52 +0200 Subject: [PATCH 58/65] pwm: rockchip: Don't modify HW state in .remove() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9d768cd7fd42bb0be16f36aec48548fca5260759 ] A consumer is expected to disable a PWM before calling pwm_put(). And if they didn't there is hopefully a good reason (or the consumer needs fixing). Also if disabling an enabled PWM was the right thing to do, this should better be done in the framework instead of in each low level driver. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-rockchip.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 3b8da7b0091b..1f3079562b38 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -382,20 +382,6 @@ static int rockchip_pwm_remove(struct platform_device *pdev) { struct rockchip_pwm_chip *pc = platform_get_drvdata(pdev); - /* - * Disable the PWM clk before unpreparing it if the PWM device is still - * running. This should only happen when the last PWM user left it - * enabled, or when nobody requested a PWM that was previously enabled - * by the bootloader. - * - * FIXME: Maybe the core should disable all PWM devices in - * pwmchip_remove(). In this case we'd only have to call - * clk_unprepare() after pwmchip_remove(). - * - */ - if (pwm_is_enabled(pc->chip.pwms)) - clk_disable(pc->clk); - clk_unprepare(pc->pclk); clk_unprepare(pc->clk); From a2551d0a29e937515a9711b604d88cd1a31c386f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 7 Jul 2021 18:27:53 +0200 Subject: [PATCH 59/65] pwm: stm32-lp: Don't modify HW state in .remove() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d44084c93427bb0a9261432db1a8ca76a42d805e ] A consumer is expected to disable a PWM before calling pwm_put(). And if they didn't there is hopefully a good reason (or the consumer needs fixing). Also if disabling an enabled PWM was the right thing to do, this should better be done in the framework instead of in each low level driver. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-stm32-lp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c index 134c14621ee0..945a8b2b8564 100644 --- a/drivers/pwm/pwm-stm32-lp.c +++ b/drivers/pwm/pwm-stm32-lp.c @@ -225,8 +225,6 @@ static int stm32_pwm_lp_remove(struct platform_device *pdev) { struct stm32_pwm_lp *priv = platform_get_drvdata(pdev); - pwm_disable(&priv->chip.pwms[0]); - return pwmchip_remove(&priv->chip); } From 23dfb959c6cbb8346a5143f44a970373fcb4ce8f Mon Sep 17 00:00:00 2001 From: Li Jinlin Date: Tue, 7 Sep 2021 20:12:42 +0800 Subject: [PATCH 60/65] blk-throttle: fix UAF by deleteing timer in blk_throtl_exit() [ Upstream commit 884f0e84f1e3195b801319c8ec3d5774e9bf2710 ] The pending timer has been set up in blk_throtl_init(). However, the timer is not deleted in blk_throtl_exit(). This means that the timer handler may still be running after freeing the timer, which would result in a use-after-free. Fix by calling del_timer_sync() to delete the timer in blk_throtl_exit(). Signed-off-by: Li Jinlin Link: https://lore.kernel.org/r/20210907121242.2885564-1-lijinlin3@huawei.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-throttle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 63e9d00a0832..c53a254171a2 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2452,6 +2452,7 @@ int blk_throtl_init(struct request_queue *q) void blk_throtl_exit(struct request_queue *q) { BUG_ON(!q->td); + del_timer_sync(&q->td->service_queue.pending_timer); throtl_shutdown_wq(q); blkcg_deactivate_policy(q, &blkcg_policy_throtl); free_percpu(q->td->latency_buckets[READ]); From 9a14014df72d00d87a199e1f77896f7ea62cc126 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 7 Sep 2021 16:03:38 -0700 Subject: [PATCH 61/65] blk-mq: allow 4x BLK_MAX_REQUEST_COUNT at blk_plug for multiple_queues [ Upstream commit 7f2a6a69f7ced6db8220298e0497cf60482a9d4b ] Limiting number of request to BLK_MAX_REQUEST_COUNT at blk_plug hurts performance for large md arrays. [1] shows resync speed of md array drops for md array with more than 16 HDDs. Fix this by allowing more request at plug queue. The multiple_queue flag is used to only apply higher limit to multiple queue cases. [1] https://lore.kernel.org/linux-raid/CAFDAVznS71BXW8Jxv6k9dXc2iR3ysX3iZRBww_rzA8WifBFxGg@mail.gmail.com/ Tested-by: Marcin Wanat Signed-off-by: Song Liu Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-mq.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 9e3fedbaa644..6dcb86c1c985 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2109,6 +2109,18 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) } } +/* + * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple + * queues. This is important for md arrays to benefit from merging + * requests. + */ +static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) +{ + if (plug->multiple_queues) + return BLK_MAX_REQUEST_COUNT * 4; + return BLK_MAX_REQUEST_COUNT; +} + /** * blk_mq_submit_bio - Create and send a request to block device. * @bio: Bio pointer. @@ -2202,7 +2214,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) else last = list_entry_rq(plug->mq_list.prev); - if (request_count >= BLK_MAX_REQUEST_COUNT || (last && + if (request_count >= blk_plug_max_rq_count(plug) || (last && blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { blk_flush_plug_list(plug, false); trace_block_plug(q); From 647c19bc61023b305389f5b027b2f7edab0feca2 Mon Sep 17 00:00:00 2001 From: Yu-Tung Chang Date: Mon, 30 Aug 2021 13:25:32 +0800 Subject: [PATCH 62/65] rtc: rx8010: select REGMAP_I2C [ Upstream commit 0c45d3e24ef3d3d87c5e0077b8f38d1372af7176 ] The rtc-rx8010 uses the I2C regmap but doesn't select it in Kconfig so depending on the configuration the build may fail. Fix it. Signed-off-by: Yu-Tung Chang Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210830052532.40356-1-mtwget@gmail.com Signed-off-by: Sasha Levin --- drivers/rtc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 33e4ecd6c665..54cf5ec8f401 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -624,6 +624,7 @@ config RTC_DRV_FM3130 config RTC_DRV_RX8010 tristate "Epson RX8010SJ" + select REGMAP_I2C help If you say yes here you get support for the Epson RX8010SJ RTC chip. From 83a3cb200effb8f61d51516bd64fd94dea4c7805 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 6 Sep 2021 13:30:34 +0200 Subject: [PATCH 63/65] sched/idle: Make the idle timer expire in hard interrupt context [ Upstream commit 9848417926353daa59d2b05eb26e185063dbac6e ] The intel powerclamp driver will setup a per-CPU worker with RT priority. The worker will then invoke play_idle() in which it remains in the idle poll loop until it is stopped by the timer it started earlier. That timer needs to expire in hard interrupt context on PREEMPT_RT. Otherwise the timer will expire in ksoftirqd as a SOFT timer but that task won't be scheduled on the CPU because its priority is lower than the priority of the worker which is in the idle loop. Always expire the idle timer in hard interrupt context. Reported-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210906113034.jgfxrjdvxnjqgtmc@linutronix.de Signed-off-by: Sasha Levin --- kernel/sched/idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 36b545f17206..2593a733c084 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -372,10 +372,10 @@ void play_idle_precise(u64 duration_ns, u64 latency_ns) cpuidle_use_deepest_state(latency_ns); it.done = 0; - hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); it.timer.function = idle_inject_timer_fn; hrtimer_start(&it.timer, ns_to_ktime(duration_ns), - HRTIMER_MODE_REL_PINNED); + HRTIMER_MODE_REL_PINNED_HARD); while (!READ_ONCE(it.done)) do_idle(); From 7c09505e9e6b1461cddf7c85fdb4f4ff1cc3fd23 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Sep 2021 12:08:17 -0700 Subject: [PATCH 64/65] drm/nouveau/nvkm: Replace -ENOSYS with -ENODEV commit e8f71f89236ef82d449991bfbc237e3cb6ea584f upstream. nvkm test builds fail with the following error. drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c: In function 'nvkm_control_mthd_pstate_info': drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c:60:35: error: overflow in conversion from 'int' to '__s8' {aka 'signed char'} changes value from '-251' to '5' The code builds on most architectures, but fails on parisc where ENOSYS is defined as 251. Replace the error code with -ENODEV (-19). The actual error code does not really matter and is not passed to userspace - it just has to be negative. Fixes: 7238eca4cf18 ("drm/nouveau: expose pstate selection per-power source in sysfs") Signed-off-by: Guenter Roeck Cc: Ben Skeggs Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c index b0ece71aefde..ce774579c89d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c @@ -57,7 +57,7 @@ nvkm_control_mthd_pstate_info(struct nvkm_control *ctrl, void *data, u32 size) args->v0.count = 0; args->v0.ustate_ac = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; args->v0.ustate_dc = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; - args->v0.pwrsrc = -ENOSYS; + args->v0.pwrsrc = -ENODEV; args->v0.pstate = NVIF_CONTROL_PSTATE_INFO_V0_PSTATE_UNKNOWN; } From 5f4196eaa90c139ac470111b8e2deabf5f283baa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 26 Sep 2021 14:09:02 +0200 Subject: [PATCH 65/65] Linux 5.10.69 Link: https://lore.kernel.org/r/20210924124334.228235870@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Fox Chen Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Salvatore Bonaccorso Link: https://lore.kernel.org/r/20210925120750.056868347@linuxfoundation.org Tested-by: Fox Chen Tested-by: Pavel Machek (CIP) Tested-by: Guenter Roeck Tested-by: Hulk Robot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e50581c9db50..e14943205b83 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 68 +SUBLEVEL = 69 EXTRAVERSION = NAME = Dare mighty things